Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ if (UMPIRE_ENABLE_MPI3_SHARED_MEMORY)
endif()
endif()

if (UMPIRE_ENABLE_IPC_SHARED_MEMORY)
blt_add_executable(
NAME ipc_shared_memory_release
SOURCES ipc_shared_memory_release.cpp
DEPENDS_ON ${example_depends})
list(APPEND umpire_examples ipc_shared_memory_release)
endif()

blt_add_executable(
NAME aligned_allocator
SOURCES aligned_allocator.cpp
Expand Down
118 changes: 118 additions & 0 deletions examples/ipc_shared_memory_release.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2016-26, Lawrence Livermore National Security, LLC and Umpire
// project contributors. See the COPYRIGHT file for details.
//
// SPDX-License-Identifier: (MIT)
//////////////////////////////////////////////////////////////////////////////

#include <cstddef>
#include <cstdint>
#include <iostream>
#include <string>

#include <unistd.h>

#include "umpire/Allocator.hpp"
#include "umpire/ResourceManager.hpp"
#include "umpire/Umpire.hpp"

namespace {
std::size_t page_size()
{
long ps = ::sysconf(_SC_PAGESIZE);
return (ps > 0) ? static_cast<std::size_t>(ps) : 4096;
}

std::string format_bytes(std::size_t bytes)
{
constexpr double KiB = 1024.0;
constexpr double MiB = 1024.0 * KiB;
constexpr double GiB = 1024.0 * MiB;

const double b = static_cast<double>(bytes);
if (b >= GiB) {
return std::to_string(b / GiB) + " GiB";
} else if (b >= MiB) {
return std::to_string(b / MiB) + " MiB";
} else if (b >= KiB) {
return std::to_string(b / KiB) + " KiB";
} else {
return std::to_string(bytes) + " B";
}
}

void touch_one_byte_per_page(std::uint8_t* buffer, std::size_t bytes)
{
const std::size_t ps = page_size();
for (std::size_t i = 0; i < bytes; i += ps) {
buffer[i] = static_cast<std::uint8_t>(buffer[i] + 1);

@adayton1 adayton1 Jun 4, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not simplify to ++buffer[i];?

}
if (bytes > 0) {
buffer[bytes - 1] = static_cast<std::uint8_t>(buffer[bytes - 1] + 1);
}
}

} // namespace

int main(int, char**)
{
constexpr std::size_t segment_size = 512ULL * 1024ULL * 1024ULL;
constexpr std::size_t alloc_size = 256ULL * 1024ULL * 1024ULL;

auto& rm = umpire::ResourceManager::getInstance();
auto traits = umpire::get_default_resource_traits("SHARED::POSIX");
traits.size = segment_size;

const std::string allocator_name = "SHARED::POSIX::release_example";
umpire::Allocator allocator = rm.makeResource(allocator_name, traits);

const std::size_t rss_before = umpire::get_process_memory_usage();
std::cout << "RSS before: " << format_bytes(rss_before) << "\n";

const std::size_t shm_rss_before = umpire::get_mapping_memory_usage(allocator_name);
if (shm_rss_before > 0) {
std::cout << "Shared segment RSS before: " << format_bytes(shm_rss_before) << "\n";
}

void* ptr = nullptr;
try {
ptr = allocator.allocate("buffer", alloc_size);
} catch (const std::exception& e) {
std::cerr << "Failed to allocate " << format_bytes(alloc_size) << ": " << e.what() << "\n";
return 1;
}

touch_one_byte_per_page(static_cast<std::uint8_t*>(ptr), alloc_size);

const std::size_t rss_after_touch = umpire::get_process_memory_usage();
std::cout << "RSS after touching allocation: " << format_bytes(rss_after_touch) << "\n";

const std::size_t shm_rss_after_touch = umpire::get_mapping_memory_usage(allocator_name);
if (shm_rss_after_touch > 0) {
std::cout << "Shared segment RSS after touch: " << format_bytes(shm_rss_after_touch) << "\n";
}

allocator.deallocate(ptr);

const std::size_t rss_after_free = umpire::get_process_memory_usage();
std::cout << "RSS after deallocate (before release): " << format_bytes(rss_after_free) << "\n";

const std::size_t shm_rss_after_free = umpire::get_mapping_memory_usage(allocator_name);
if (shm_rss_after_free > 0) {
std::cout << "Shared segment RSS after deallocate: " << format_bytes(shm_rss_after_free) << "\n";
}

allocator.release();

const std::size_t rss_after_release = umpire::get_process_memory_usage();
std::cout << "RSS after allocator.release(): " << format_bytes(rss_after_release) << "\n";

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"allocator.release()" should probably be changed to "release" to match the other output.


const std::size_t shm_rss_after_release = umpire::get_mapping_memory_usage(allocator_name);
if (shm_rss_after_release > 0) {
std::cout << "Shared segment RSS after release: " << format_bytes(shm_rss_after_release) << "\n";
} else {
std::cout << "NOTE: shared segment RSS reporting requires Linux (/proc/self/smaps)\n";
}

return 0;
}
43 changes: 43 additions & 0 deletions src/umpire/Umpire.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "umpire/Umpire.hpp"

#include <algorithm>
#include <cctype>
#include <cstdio>
#include <iostream>
#include <iterator>
#include <limits>
Expand All @@ -17,7 +19,9 @@

#include "umpire/ResourceManager.hpp"
#include "umpire/config.hpp"
#if defined(UMPIRE_ENABLE_IPC_SHARED_MEMORY)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a different file included for MPI shared memory?

#include "umpire/resource/HostSharedMemoryResource.hpp"
#endif
#include "umpire/resource/MemoryResource.hpp"
#if defined(UMPIRE_ENABLE_MPI) && defined(UMPIRE_ENABLE_IPC_SHARED_MEMORY)
#if defined(UMPIRE_ENABLE_DEVICE)
Expand Down Expand Up @@ -172,6 +176,45 @@ std::size_t get_process_memory_usage()
#endif
}

std::size_t get_mapping_memory_usage(const std::string& mapping_name)
{
#if defined(__linux__)
std::ifstream smaps{"/proc/self/smaps"};
if (!smaps) {
return 0;
}

std::size_t rss_kb{0};
bool in_target_mapping{false};
std::string line;

while (std::getline(smaps, line)) {
// Memory mapping header lines follow the format: "address-address perms ..."
// Check if line starts with hex digits and contains a hyphen in the first field
const bool is_header = (!line.empty() && std::isxdigit(static_cast<unsigned char>(line[0])) &&
line.find('-') != std::string::npos && line.find('-') < 20);

if (is_header) {
in_target_mapping = (line.find(mapping_name) != std::string::npos);
continue;
}

if (!in_target_mapping || line.rfind("Rss:", 0) != 0) {
continue;
}

std::size_t value_kb{0};
std::sscanf(line.c_str(), "Rss: %zu kB", &value_kb);
rss_kb += value_kb;
}

return rss_kb * 1024;
#else
UMPIRE_USE_VAR(mapping_name);
return 0;
#endif
}

std::size_t get_internal_memory_usage()
{
return umpire::ResourceManager::getInstance().getInternalMemoryUsage();
Expand Down
16 changes: 16 additions & 0 deletions src/umpire/Umpire.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,22 @@ std::string get_backtrace(void* ptr);
*/
std::size_t get_process_memory_usage();

/*!
* \brief Get resident memory usage for memory mappings whose name contains
* the provided substring.
*
* On Linux, this uses /proc/self/smaps and sums the RSS values for all mappings
* whose name field contains the provided substring. The matching is case-sensitive
* and uses substring matching (not exact matching), so providing "POSIX" will match
* any mapping path containing "POSIX" anywhere in its name.
*
* Returns 0 on unsupported platforms or if no matching mappings are found.
*
* \param mapping_name Substring to search for in mapping names (case-sensitive)
* \return Total RSS in bytes for all matching mappings, or 0 if none found
*/
std::size_t get_mapping_memory_usage(const std::string& mapping_name);

/*!
* \brief Get high watermark memory usage of the current process (uses underlying
* system-dependent calls)
Expand Down
5 changes: 5 additions & 0 deletions src/umpire/resource/HostSharedMemoryResource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ void HostSharedMemoryResource::deallocate(void* ptr, std::size_t)
return pimpl->deallocate(ptr);
}

void HostSharedMemoryResource::release()
{
return pimpl->release();
}

bool HostSharedMemoryResource::isPageable() noexcept
{
#if defined(UMPIRE_ENABLE_CUDA)
Expand Down
10 changes: 10 additions & 0 deletions src/umpire/resource/HostSharedMemoryResource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ class HostSharedMemoryResource : public MemoryResource {

void deallocate(void* ptr, std::size_t size) override;

/*!
* \brief Attempt to return unused shared memory pages to the OS.
*
* This resource uses a fixed-size POSIX shared memory segment and maintains
* free blocks internally. Calling release() will not change allocation
* semantics, but may allow the OS to reclaim pages that back free blocks so
* they can be reused by the system.
*/
void release() override;

std::size_t getActualSize() const noexcept override;

bool isAccessibleFrom(Platform p) noexcept override;
Expand Down
52 changes: 52 additions & 0 deletions src/umpire/resource/HostSharedMemoryResourceImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <sys/types.h> // ftruncate, fstat
#include <unistd.h> // ftruncate, fstat

#include <cerrno>
#include <cstdint>
#include <limits>
#include <string>
#include <thread>
Expand Down Expand Up @@ -301,6 +303,56 @@ class HostSharedMemoryResource::impl {
return ptr;
}

void release()
{
int err{0};
if ((err = pthread_mutex_lock(&m_segment->mutex)) != 0) {
Comment thread
adayton1 marked this conversation as resolved.
UMPIRE_ERROR(runtime_error,
fmt::format("Failed to lock mutex for shared memory segment {}: {}", m_segment_name, strerror(err)));
}

long page_size = ::sysconf(_SC_PAGESIZE);
if (page_size <= 0) {
page_size = 4096;
}

SharedMemoryBlock* block_ptr{nullptr};
offset_to_pointer(m_segment->free_blocks_off, block_ptr);

while (block_ptr != nullptr) {
char* const block_begin = reinterpret_cast<char*>(block_ptr);
char* const block_end = block_begin + block_ptr->block_size;

constexpr std::size_t keep_bytes = sizeof(SharedMemoryBlock);

std::uintptr_t begin = reinterpret_cast<std::uintptr_t>(block_begin + keep_bytes);
std::uintptr_t end = reinterpret_cast<std::uintptr_t>(block_end);

const std::uintptr_t ps = static_cast<std::uintptr_t>(page_size);

begin = (begin + ps - 1) & ~(ps - 1); // align up
end = end & ~(ps - 1); // align down

if (end > begin) {
void* advise_ptr = reinterpret_cast<void*>(begin);
const std::size_t advise_len = static_cast<std::size_t>(end - begin);

if (::madvise(advise_ptr, advise_len, MADV_DONTNEED) != 0) {
int madvise_err = errno;
UMPIRE_LOG(Debug, "madvise(MADV_DONTNEED) failed for shared memory segment "
<< m_segment_name << ": " << strerror(madvise_err));
}
}

offset_to_pointer(block_ptr->next_block_off, block_ptr);
}

if ((err = pthread_mutex_unlock(&m_segment->mutex)) != 0) {
UMPIRE_ERROR(runtime_error, fmt::format("Failed to unlock mutex for shared memory segment {}: {}", m_segment_name,
strerror(err)));
}
}

std::size_t getActualSize() const noexcept
{
int err{0};
Expand Down
Loading
Loading