Skip to content

nvproxy: support nvidia 595.71.05#13371

Open
LandonTClipp wants to merge 1 commit into
google:masterfrom
LandonTClipp:nvidia-595
Open

nvproxy: support nvidia 595.71.05#13371
LandonTClipp wants to merge 1 commit into
google:masterfrom
LandonTClipp:nvidia-595

Conversation

@LandonTClipp
Copy link
Copy Markdown
Contributor

@LandonTClipp LandonTClipp commented Jun 4, 2026

Summary

595.71.05 differs from 590.48.01 only in simple (byte-copied) ioctl structs: NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS,
NV2080_CTRL_NVLINK_GET_PLATFORM_INFO_PARAMS,
NV0000_CTRL_SYSTEM_GET_P2P_CAPS_MATRIX_PARAMS,
NV00FD_CTRL_ATTACH_REMOTE_GPU_PARAMS, and the fabric attrs/event structs nested under them. None gained a pointer (NvP64) or fd field, so they remain simple and require no nvproxy changes; the ABI is inherited from 590.48.01 unchanged.

I would appreciate someone double checking my work, but I believe this was done properly according to the README.

Verification

I was able to run a CUDA vectoradd workload on 595.71.05:

lclipp@CW-HP216DG9DT-L wreakit % k logs vectoradd-cuda
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done

Driver Hash and Diff

The result of the driver hashing and diffing tools are below:

INFO: Running command line: bazel-bin/tools/gpu/main_/main <args omitted>
I0604 19:37:17.128923  782726 install_driver.go:275] Downloading driver from URL: https://us.download.nvidia.com/tesla/595.71.05/NVIDIA-Linux-x86_64-595.71.05.run
I0604 19:37:17.128917  782726 install_driver.go:275] Downloading driver from URL: https://us.download.nvidia.com/XFree86/aarch64/595.71.05/NVIDIA-Linux-aarch64-595.71.05.run
amd64 595.71.05 36203b8960b7e49c8a42f6ba1f5863cde34a05e93d2b24b798af06dd846f6b82
arm64 595.71.05 5f32a5a12d347452937780135a44c8866f5e1bf3b347f4a29ba31ba0d6563eef

I0604 20:06:06.626311  791221 run_differ.go:111] Parsing driver version 590.48.01
I0604 20:06:23.951550  791221 run_differ.go:116] Parsing driver version 595.71.05
I0604 20:06:43.391048  791221 run_differ.go:124] Comparing record definitions between 590.48.01 and 595.71.05
I0604 20:06:43.391635  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl00f8.h:152:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl00f8.h:152:16
struct NV_FABRIC_MEMORY_ATTRS
  []parser.RecordField{
        ... // 3 identical elements
        {Name: "cliqueId", Type: "NvU32", Offset: 24},
        {Name: "bwModeEpoch", Type: "NvU64", Offset: 32},
        {
                Name:   "bwMode",
-               Type:   "NvU8",
+               Type:   "NvU16",
                Offset: 40,
        },
  }

I0604 20:06:43.391769  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:334:5
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:323:5
- union NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS::gpuNameString_t
+ struct NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS::gpuNameString_t
  size: 128 -> 64 (bytes)
  []parser.RecordField{
        {Name: "ascii", Type: "NvU8[64]"},
-       s"NvU16[64] unicode",
  }

I0604 20:06:43.391857  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h:2661:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h:2669:16
struct NV2080_CTRL_NVLINK_GET_PLATFORM_INFO_PARAMS
  size: 38 -> 39 (bytes)
  []parser.RecordField{
        ... // 6 identical elements
        {Name: "moduleId", Type: "NvU8", Offset: 36},
        {Name: "nvlinkSignalingProtocol", Type: "NvU8", Offset: 37},
+       s"NvU8 linksPerSpigot",
  }

I0604 20:06:43.391999  791221 run_differ.go:148]
--- A: 590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h:1310:16
+++ B: 595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h:1309:16
struct NV0000_CTRL_SYSTEM_GET_P2P_CAPS_MATRIX_PARAMS
  []parser.RecordField{
        ... // 2 identical elements
        {Name: "gpuIdGrpA", Type: "NvU32[8]", Offset: 8},
        {Name: "gpuIdGrpB", Type: "NvU32[8]", Offset: 40},
        {
                Name:   "p2pCaps",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 72,
        },
        {
                Name:   "a2bOptimalReadCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 328,
        },
        {
                Name:   "a2bOptimalWriteCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 584,
        },
        {
                Name:   "b2aOptimalReadCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 840,
        },
        {
                Name:   "b2aOptimalWriteCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 1096,
        },
  }

I0604 20:06:43.392039  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:332:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:321:16
struct NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS
  size: 132 -> 68 (bytes)

I0604 20:06:43.392557  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl00fd.h:243:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl00fd.h:243:16
struct NV00FD_CTRL_ATTACH_REMOTE_GPU_PARAMS
  []parser.RecordField{
        ... // 4 identical elements
        {Name: "cliqueId", Type: "NvU32", Offset: 32},
        {Name: "nodeId", Type: "NvU16", Offset: 36},
        {
                Name:   "bwMode",
-               Type:   "NvU8",
+               Type:   "NvU16",
                Offset: 38,
        },
  }

I0604 20:06:43.392984  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl00f1.h:128:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl00f1.h:128:16
struct NV00F1_CTRL_ATTACH_REMOTE_GPU_EVENT_DATA
  []parser.RecordField{
        ... // 5 identical elements
        {Name: "index", Type: "NvU16", Offset: 36},
        {Name: "exportNodeId", Type: "NvU16", Offset: 38},
        {
                Name:   "bwMode",
-               Type:   "NvU8",
+               Type:   "NvU16",
                Offset: 40,
        },
        {
                Name:   "exportUuid",
                Type:   "NvU8[16]",
-               Offset: 41,
+               Offset: 42,
        },
  }

I0604 20:06:43.393034  791221 run_differ.go:152] Comparing type aliases between 590.48.01 and 595.71.05
I0604 20:06:43.393066  791221 run_differ.go:168] alias NV0000_CTRL_P2P_CAPS_MATRIX_ROW not found in second source file
I0604 20:06:43.393082  791221 run_differ.go:164] alias NvU32[8] not found in first source file
I0604 20:06:43.393096  791221 run_differ.go:179] Comparing constants between 590.48.01 and 595.71.05

595.71.05 differs from 590.48.01 only in simple (byte-copied) ioctl
structs: NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS,
NV2080_CTRL_NVLINK_GET_PLATFORM_INFO_PARAMS,
NV0000_CTRL_SYSTEM_GET_P2P_CAPS_MATRIX_PARAMS,
NV00FD_CTRL_ATTACH_REMOTE_GPU_PARAMS, and the fabric attrs/event
structs nested under them. None gained a pointer (NvP64) or fd field,
so they remain simple and require no nvproxy changes; the ABI is
inherited from 590.48.01 unchanged.

The result of the driver hashing and diffing tools are below:

```
INFO: Running command line: bazel-bin/tools/gpu/main_/main <args omitted>
I0604 19:37:17.128923  782726 install_driver.go:275] Downloading driver from URL: https://us.download.nvidia.com/tesla/595.71.05/NVIDIA-Linux-x86_64-595.71.05.run
I0604 19:37:17.128917  782726 install_driver.go:275] Downloading driver from URL: https://us.download.nvidia.com/XFree86/aarch64/595.71.05/NVIDIA-Linux-aarch64-595.71.05.run
amd64 595.71.05 36203b8960b7e49c8a42f6ba1f5863cde34a05e93d2b24b798af06dd846f6b82
arm64 595.71.05 5f32a5a12d347452937780135a44c8866f5e1bf3b347f4a29ba31ba0d6563eef

I0604 20:06:06.626311  791221 run_differ.go:111] Parsing driver version 590.48.01
I0604 20:06:23.951550  791221 run_differ.go:116] Parsing driver version 595.71.05
I0604 20:06:43.391048  791221 run_differ.go:124] Comparing record definitions between 590.48.01 and 595.71.05
I0604 20:06:43.391635  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl00f8.h:152:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl00f8.h:152:16
struct NV_FABRIC_MEMORY_ATTRS
  []parser.RecordField{
        ... // 3 identical elements
        {Name: "cliqueId", Type: "NvU32", Offset: 24},
        {Name: "bwModeEpoch", Type: "NvU64", Offset: 32},
        {
                Name:   "bwMode",
-               Type:   "NvU8",
+               Type:   "NvU16",
                Offset: 40,
        },
  }

I0604 20:06:43.391769  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:334:5
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:323:5
- union NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS::gpuNameString_t
+ struct NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS::gpuNameString_t
  size: 128 -> 64 (bytes)
  []parser.RecordField{
        {Name: "ascii", Type: "NvU8[64]"},
-       s"NvU16[64] unicode",
  }

I0604 20:06:43.391857  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h:2661:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h:2669:16
struct NV2080_CTRL_NVLINK_GET_PLATFORM_INFO_PARAMS
  size: 38 -> 39 (bytes)
  []parser.RecordField{
        ... // 6 identical elements
        {Name: "moduleId", Type: "NvU8", Offset: 36},
        {Name: "nvlinkSignalingProtocol", Type: "NvU8", Offset: 37},
+       s"NvU8 linksPerSpigot",
  }

I0604 20:06:43.391999  791221 run_differ.go:148]
--- A: 590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h:1310:16
+++ B: 595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h:1309:16
struct NV0000_CTRL_SYSTEM_GET_P2P_CAPS_MATRIX_PARAMS
  []parser.RecordField{
        ... // 2 identical elements
        {Name: "gpuIdGrpA", Type: "NvU32[8]", Offset: 8},
        {Name: "gpuIdGrpB", Type: "NvU32[8]", Offset: 40},
        {
                Name:   "p2pCaps",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 72,
        },
        {
                Name:   "a2bOptimalReadCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 328,
        },
        {
                Name:   "a2bOptimalWriteCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 584,
        },
        {
                Name:   "b2aOptimalReadCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 840,
        },
        {
                Name:   "b2aOptimalWriteCes",
-               Type:   "NV0000_CTRL_P2P_CAPS_MATRIX_ROW[8]",
+               Type:   "NvU32[8][8]",
                Offset: 1096,
        },
  }

I0604 20:06:43.392039  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:332:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h:321:16
struct NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS
  size: 132 -> 68 (bytes)

I0604 20:06:43.392557  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl00fd.h:243:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl00fd.h:243:16
struct NV00FD_CTRL_ATTACH_REMOTE_GPU_PARAMS
  []parser.RecordField{
        ... // 4 identical elements
        {Name: "cliqueId", Type: "NvU32", Offset: 32},
        {Name: "nodeId", Type: "NvU16", Offset: 36},
        {
                Name:   "bwMode",
-               Type:   "NvU8",
+               Type:   "NvU16",
                Offset: 38,
        },
  }

I0604 20:06:43.392984  791221 run_differ.go:148]
--- A: /tmp/run_differ_98664942/run_differ_1884048076/590.48.01/src/common/sdk/nvidia/inc/ctrl/ctrl00f1.h:128:16
+++ B: /tmp/run_differ_98664942/run_differ_1827132553/595.71.05/src/common/sdk/nvidia/inc/ctrl/ctrl00f1.h:128:16
struct NV00F1_CTRL_ATTACH_REMOTE_GPU_EVENT_DATA
  []parser.RecordField{
        ... // 5 identical elements
        {Name: "index", Type: "NvU16", Offset: 36},
        {Name: "exportNodeId", Type: "NvU16", Offset: 38},
        {
                Name:   "bwMode",
-               Type:   "NvU8",
+               Type:   "NvU16",
                Offset: 40,
        },
        {
                Name:   "exportUuid",
                Type:   "NvU8[16]",
-               Offset: 41,
+               Offset: 42,
        },
  }

I0604 20:06:43.393034  791221 run_differ.go:152] Comparing type aliases between 590.48.01 and 595.71.05
I0604 20:06:43.393066  791221 run_differ.go:168] alias NV0000_CTRL_P2P_CAPS_MATRIX_ROW not found in second source file
I0604 20:06:43.393082  791221 run_differ.go:164] alias NvU32[8] not found in first source file
I0604 20:06:43.393096  791221 run_differ.go:179] Comparing constants between 590.48.01 and 595.71.05
```

Signed-off-by: LandonTClipp <lclipp@coreweave.com>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant