Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ if (UMPIRE_ENABLE_MPI3_SHARED_MEMORY)
endif()
endif()

if (UMPIRE_ENABLE_IPC_SHARED_MEMORY)
blt_add_executable(
NAME ipc_shared_memory_release
SOURCES ipc_shared_memory_release.cpp
DEPENDS_ON ${example_depends})
list(APPEND umpire_examples ipc_shared_memory_release)
endif()

blt_add_executable(
NAME aligned_allocator
SOURCES aligned_allocator.cpp
Expand Down
154 changes: 154 additions & 0 deletions examples/ipc_shared_memory_release.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
//////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2016-26, Lawrence Livermore National Security, LLC and Umpire
// project contributors. See the COPYRIGHT file for details.
//
// SPDX-License-Identifier: (MIT)
//////////////////////////////////////////////////////////////////////////////

#include <cstddef>
#include <cstdint>
#include <cctype>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>

#include <unistd.h>

#include "umpire/Allocator.hpp"
#include "umpire/ResourceManager.hpp"
#include "umpire/Umpire.hpp"

namespace {
std::size_t page_size()
{
long ps = ::sysconf(_SC_PAGESIZE);
return (ps > 0) ? static_cast<std::size_t>(ps) : 4096;
}

std::string format_bytes(std::size_t bytes)
{
constexpr double KiB = 1024.0;
constexpr double MiB = 1024.0 * KiB;
constexpr double GiB = 1024.0 * MiB;

const double b = static_cast<double>(bytes);
if (b >= GiB) {
return std::to_string(b / GiB) + " GiB";
} else if (b >= MiB) {
return std::to_string(b / MiB) + " MiB";
} else if (b >= KiB) {
return std::to_string(b / KiB) + " KiB";
} else {
return std::to_string(bytes) + " B";
}
}

void touch_one_byte_per_page(std::uint8_t* buffer, std::size_t bytes)
{
const std::size_t ps = page_size();
for (std::size_t i = 0; i < bytes; i += ps) {
buffer[i] = static_cast<std::uint8_t>(buffer[i] + 1);

@adayton1 adayton1 Jun 4, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not simplify to ++buffer[i];?

}
if (bytes > 0) {
buffer[bytes - 1] = static_cast<std::uint8_t>(buffer[bytes - 1] + 1);
}
}

std::size_t get_smaps_rss_bytes_for_mapping(const std::string& mapping_name_substr)
Comment thread
kab163 marked this conversation as resolved.
Outdated
{
#if defined(__linux__)
std::ifstream smaps("/proc/self/smaps");
if (!smaps) {
return 0;
}

std::size_t rss_kb{0};
bool in_target{false};

std::string line;
while (std::getline(smaps, line)) {
const bool is_header = (!line.empty() && std::isxdigit(static_cast<unsigned char>(line[0])) && line.find('-') != std::string::npos);

if (is_header) {
in_target = (line.find(mapping_name_substr) != std::string::npos);
continue;
}

if (!in_target) {
continue;
}

constexpr const char* rss_prefix = "Rss:";
if (line.rfind(rss_prefix, 0) == 0) {
std::size_t value_kb{0};
std::sscanf(line.c_str(), "Rss: %zu kB", &value_kb);
rss_kb += value_kb;
}
}

return rss_kb * 1024;
#else
(void)mapping_name_substr;
return 0;
#endif
}
} // namespace

int main(int, char**)
{
constexpr std::size_t segment_size = 512ULL * 1024ULL * 1024ULL;
constexpr std::size_t alloc_size = 256ULL * 1024ULL * 1024ULL;

auto& rm = umpire::ResourceManager::getInstance();
auto traits = umpire::get_default_resource_traits("SHARED::POSIX");
traits.size = segment_size;

const std::string allocator_name = "SHARED::POSIX::release_example";
umpire::Allocator allocator = rm.makeResource(allocator_name, traits);

const std::size_t rss_before = umpire::get_process_memory_usage();
std::cout << "RSS before: " << format_bytes(rss_before) << "\n";

const std::size_t shm_rss_before = get_smaps_rss_bytes_for_mapping(allocator_name);
if (shm_rss_before > 0) {
std::cout << "Shared segment RSS before: " << format_bytes(shm_rss_before) << "\n";
}

void* ptr = allocator.allocate("buffer", alloc_size);
touch_one_byte_per_page(static_cast<std::uint8_t*>(ptr), alloc_size);

const std::size_t rss_after_touch = umpire::get_process_memory_usage();
std::cout << "RSS after touching allocation: " << format_bytes(rss_after_touch) << "\n";

const std::size_t shm_rss_after_touch = get_smaps_rss_bytes_for_mapping(allocator_name);
if (shm_rss_after_touch > 0) {
std::cout << "Shared segment RSS after touch: " << format_bytes(shm_rss_after_touch) << "\n";
}

allocator.deallocate(ptr);

const std::size_t rss_after_free = umpire::get_process_memory_usage();
std::cout << "RSS after deallocate (before release): " << format_bytes(rss_after_free) << "\n";

const std::size_t shm_rss_after_free = get_smaps_rss_bytes_for_mapping(allocator_name);
if (shm_rss_after_free > 0) {
std::cout << "Shared segment RSS after deallocate: " << format_bytes(shm_rss_after_free) << "\n";
}

allocator.release();

const std::size_t rss_after_release = umpire::get_process_memory_usage();
std::cout << "RSS after allocator.release(): " << format_bytes(rss_after_release) << "\n";

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"allocator.release()" should probably be changed to "release" to match the other output.


const std::size_t shm_rss_after_release = get_smaps_rss_bytes_for_mapping(allocator_name);
if (shm_rss_after_release > 0) {
std::cout << "Shared segment RSS after release: " << format_bytes(shm_rss_after_release) << "\n";
} else {
std::cout << "NOTE: shared segment RSS reporting requires Linux (/proc/self/smaps)\n";
}

return 0;
}
5 changes: 5 additions & 0 deletions src/umpire/resource/HostSharedMemoryResource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ void HostSharedMemoryResource::deallocate(void* ptr, std::size_t)
return pimpl->deallocate(ptr);
}

void HostSharedMemoryResource::release()
{
return pimpl->release();
}

bool HostSharedMemoryResource::isPageable() noexcept
{
#if defined(UMPIRE_ENABLE_CUDA)
Expand Down
10 changes: 10 additions & 0 deletions src/umpire/resource/HostSharedMemoryResource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ class HostSharedMemoryResource : public MemoryResource {

void deallocate(void* ptr, std::size_t size) override;

/*!
* \brief Attempt to return unused shared memory pages to the OS.
*
* This resource uses a fixed-size POSIX shared memory segment and maintains
* free blocks internally. Calling release() will not change allocation
* semantics, but may allow the OS to reclaim pages that back free blocks so
* they can be reused by the system.
*/
void release() override;

std::size_t getActualSize() const noexcept override;

bool isAccessibleFrom(Platform p) noexcept override;
Expand Down
51 changes: 51 additions & 0 deletions src/umpire/resource/HostSharedMemoryResourceImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <sys/types.h> // ftruncate, fstat
#include <unistd.h> // ftruncate, fstat

#include <cerrno>
#include <cstdint>
#include <limits>
#include <string>
#include <thread>
Expand Down Expand Up @@ -301,6 +303,55 @@ class HostSharedMemoryResource::impl {
return ptr;
}

void release()
{
int err{0};
if ((err = pthread_mutex_lock(&m_segment->mutex)) != 0) {
Comment thread
adayton1 marked this conversation as resolved.
UMPIRE_ERROR(runtime_error,
fmt::format("Failed to lock mutex for shared memory segment {}: {}", m_segment_name, strerror(err)));
}

long page_size = ::sysconf(_SC_PAGESIZE);
if (page_size <= 0) {
page_size = 4096;
}

SharedMemoryBlock* block_ptr{nullptr};
offset_to_pointer(m_segment->free_blocks_off, block_ptr);

while (block_ptr != nullptr) {
#if defined(__linux__)
char* const block_begin = reinterpret_cast<char*>(block_ptr);
char* const block_end = block_begin + block_ptr->block_size;

constexpr std::size_t keep_bytes = sizeof(SharedMemoryBlock);

std::uintptr_t begin = reinterpret_cast<std::uintptr_t>(block_begin + keep_bytes);
std::uintptr_t end = reinterpret_cast<std::uintptr_t>(block_end);

const std::uintptr_t ps = static_cast<std::uintptr_t>(page_size);

begin = (begin + ps - 1) & ~(ps - 1); // align up
end = end & ~(ps - 1); // align down

if (end > begin) {
void* advise_ptr = reinterpret_cast<void*>(begin);
const std::size_t advise_len = static_cast<std::size_t>(end - begin);

if (::madvise(advise_ptr, advise_len, MADV_DONTNEED) != 0) {
int madvise_err = errno;
UMPIRE_LOG(Debug, "madvise(MADV_DONTNEED) failed for shared memory segment "
<< m_segment_name << ": " << strerror(madvise_err));
}
}
#endif

offset_to_pointer(block_ptr->next_block_off, block_ptr);
}

pthread_mutex_unlock(&m_segment->mutex);
}

std::size_t getActualSize() const noexcept
{
int err{0};
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/resource/ipc_shared_memory_resource_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,18 @@ TEST_F(SharedMemoryTest, UnitTests)
MPI_Barrier(MPI_COMM_WORLD);
ASSERT_EQ(shmem_resource->getActualSize(), shmem_state->initial_size);

MPI_Barrier(MPI_COMM_WORLD);
ASSERT_NO_THROW(allocator.release(););
MPI_Barrier(MPI_COMM_WORLD);
ASSERT_EQ(shmem_resource->getActualSize(), shmem_state->initial_size);

MPI_Barrier(MPI_COMM_WORLD);
if (m_rank == 0) {
void* ptr = allocator.allocate("PostReleaseAlloc", shmem_state->largest_allocation_size);
allocator.deallocate(ptr);
}
MPI_Barrier(MPI_COMM_WORLD);

MPI_Barrier(MPI_COMM_WORLD);
}
}
Expand Down
Loading