From de09aeb9f018aefa850a7fb7377374c4ca6368d2 Mon Sep 17 00:00:00 2001 From: Joe McLaren <236280545+parallelArchitect@users.noreply.github.com> Date: Thu, 16 Apr 2026 04:10:25 -0400 Subject: [PATCH 1/2] Fix incorrect memory reporting on coherent UMA platforms (GB10 / DGX Spark) On GB10 / DGX Spark, nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB). This causes nvitop to display full system RAM as GPU memory instead of actually allocatable memory. Fix: detect UMA by comparing NVML total against system virtual memory total. If total >= 90% of system RAM, treat as unified memory and use system virtual memory (MemAvailable) for display instead. Preserves existing behavior for discrete GPUs. Note: requires validation on GB10 / DGX Spark hardware. The fix has not been independently validated on a coherent UMA system. --- nvitop/api/device.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/nvitop/api/device.py b/nvitop/api/device.py index af52be73..2a28193e 100644 --- a/nvitop/api/device.py +++ b/nvitop/api/device.py @@ -985,13 +985,21 @@ def memory_info(self) -> MemoryInfo: # in bytes memory_info = NA if libnvml.nvmlCheckReturn(memory_info): if memory_info.total > 0: - return MemoryInfo( - total=memory_info.total, - free=memory_info.free, - used=memory_info.used, - reserved=getattr(memory_info, 'reserved', NA), - ) - has_unified_memory = True + # Detect coherent UMA platforms (e.g. GB10 Grace Blackwell): + # nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB). + # If total >= 90% of system RAM, treat as unified memory and use MemAvailable instead. + vm = host.virtual_memory() + if vm.total > 0 and memory_info.total >= vm.total * 9 // 10: + has_unified_memory = True + else: + return MemoryInfo( + total=memory_info.total, + free=memory_info.free, + used=memory_info.used, + reserved=getattr(memory_info, 'reserved', NA), + ) + else: + has_unified_memory = True if has_unified_memory: # Device with unified memory # Use system virtual memory as these devices share host memory From 2ca5797f3ad07ddc33901808500b0c0f5b803606 Mon Sep 17 00:00:00 2001 From: Joe McLaren <236280545+parallelArchitect@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:20:55 -0400 Subject: [PATCH 2/2] fix: replace UMA acronym in comment to pass spell check --- nvitop/api/device.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nvitop/api/device.py b/nvitop/api/device.py index 2a28193e..c15aed6b 100644 --- a/nvitop/api/device.py +++ b/nvitop/api/device.py @@ -985,7 +985,7 @@ def memory_info(self) -> MemoryInfo: # in bytes memory_info = NA if libnvml.nvmlCheckReturn(memory_info): if memory_info.total > 0: - # Detect coherent UMA platforms (e.g. GB10 Grace Blackwell): + # Detect coherent unified-memory platforms (e.g. GB10 Grace Blackwell): # nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB). # If total >= 90% of system RAM, treat as unified memory and use MemAvailable instead. vm = host.virtual_memory()