From e436c6b7c71fa46d5fefab2e8506f70f8566bf44 Mon Sep 17 00:00:00 2001 From: Joe McLaren <236280545+parallelArchitect@users.noreply.github.com> Date: Wed, 15 Apr 2026 06:50:48 -0400 Subject: [PATCH] Fix NVML memory reporting regression on coherent UMA platforms (Fixes #449) On GB10 / DGX Spark, nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB). This prevents has_unified_memory from being set, causing incorrect VRAM reporting and broken memory graph since 3.3.1. Fix: detect UMA by comparing NVML total against /proc/meminfo MemTotal. If total >= 90% of system RAM, classify as unified memory and use MemAvailable instead of MemTotal for display. Note: requires validation on GB10 / DGX Spark hardware. Author does not have access to a coherent UMA system. --- src/extract_gpuinfo_nvidia.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/extract_gpuinfo_nvidia.c b/src/extract_gpuinfo_nvidia.c index 5ba298cc..7c637058 100644 --- a/src/extract_gpuinfo_nvidia.c +++ b/src/extract_gpuinfo_nvidia.c @@ -655,9 +655,24 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) { memory_info.version = 2; last_nvml_return_status = nvmlDeviceGetMemoryInfo_v2(device, &memory_info); if (last_nvml_return_status == NVML_SUCCESS) { - // Check if this is a unified memory GPU (total == 0 indicates unified memory) got_meminfo = true; - if (memory_info.total == 0) { + // Detect coherent UMA platforms (e.g. GB10 Grace Blackwell): + // NVML returns NVML_SUCCESS but total == full system MemTotal. + // This is not usable VRAM — treat as unified memory and use MemAvailable. + unsigned long long sys_mem_total = 0; + FILE *mf = fopen("/proc/meminfo", "r"); + if (mf) { + char ml[256]; + while (fgets(ml, sizeof(ml), mf)) { + if (sscanf(ml, "MemTotal: %llu kB", &sys_mem_total) == 1) { + sys_mem_total *= 1024; + break; + } + } + fclose(mf); + } + if (memory_info.total == 0 || + (sys_mem_total > 0 && memory_info.total >= sys_mem_total * 9 / 10)) { has_unified_memory = true; } else { SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.total);