From de09aeb9f018aefa850a7fb7377374c4ca6368d2 Mon Sep 17 00:00:00 2001
From: Joe McLaren <236280545+parallelArchitect@users.noreply.github.com>
Date: Thu, 16 Apr 2026 04:10:25 -0400
Subject: [PATCH 1/2] Fix incorrect memory reporting on coherent UMA platforms
 (GB10 / DGX Spark)

On GB10 / DGX Spark, nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with
total == system MemTotal (~121GB). This causes nvitop to display full
system RAM as GPU memory instead of actually allocatable memory.

Fix: detect UMA by comparing NVML total against system virtual memory
total. If total >= 90% of system RAM, treat as unified memory and use
system virtual memory (MemAvailable) for display instead.

Preserves existing behavior for discrete GPUs.

Note: requires validation on GB10 / DGX Spark hardware. The fix has
not been independently validated on a coherent UMA system.
---
 nvitop/api/device.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/nvitop/api/device.py b/nvitop/api/device.py
index af52be73..2a28193e 100644
--- a/nvitop/api/device.py
+++ b/nvitop/api/device.py
@@ -985,13 +985,21 @@ def memory_info(self) -> MemoryInfo:  # in bytes
                 memory_info = NA
             if libnvml.nvmlCheckReturn(memory_info):
                 if memory_info.total > 0:
-                    return MemoryInfo(
-                        total=memory_info.total,
-                        free=memory_info.free,
-                        used=memory_info.used,
-                        reserved=getattr(memory_info, 'reserved', NA),
-                    )
-                has_unified_memory = True
+                    # Detect coherent UMA platforms (e.g. GB10 Grace Blackwell):
+                    # nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB).
+                    # If total >= 90% of system RAM, treat as unified memory and use MemAvailable instead.
+                    vm = host.virtual_memory()
+                    if vm.total > 0 and memory_info.total >= vm.total * 9 // 10:
+                        has_unified_memory = True
+                    else:
+                        return MemoryInfo(
+                            total=memory_info.total,
+                            free=memory_info.free,
+                            used=memory_info.used,
+                            reserved=getattr(memory_info, 'reserved', NA),
+                        )
+                else:
+                    has_unified_memory = True
             if has_unified_memory:
                 # Device with unified memory
                 # Use system virtual memory as these devices share host memory

From 2ca5797f3ad07ddc33901808500b0c0f5b803606 Mon Sep 17 00:00:00 2001
From: Joe McLaren <236280545+parallelArchitect@users.noreply.github.com>
Date: Thu, 16 Apr 2026 13:20:55 -0400
Subject: [PATCH 2/2] fix: replace UMA acronym in comment to pass spell check

---
 nvitop/api/device.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvitop/api/device.py b/nvitop/api/device.py
index 2a28193e..c15aed6b 100644
--- a/nvitop/api/device.py
+++ b/nvitop/api/device.py
@@ -985,7 +985,7 @@ def memory_info(self) -> MemoryInfo:  # in bytes
                 memory_info = NA
             if libnvml.nvmlCheckReturn(memory_info):
                 if memory_info.total > 0:
-                    # Detect coherent UMA platforms (e.g. GB10 Grace Blackwell):
+                    # Detect coherent unified-memory platforms (e.g. GB10 Grace Blackwell):
                     # nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB).
                     # If total >= 90% of system RAM, treat as unified memory and use MemAvailable instead.
                     vm = host.virtual_memory()