From 61b711c66366c79a1bd397db2a5e1bde4aca56c9 Mon Sep 17 00:00:00 2001 From: jbernloehr Date: Sat, 21 Mar 2026 13:03:00 +0100 Subject: [PATCH 1/2] [None][infra] Reduce Docker image layer count in release stage Consolidate all COPY and RUN steps in the release stage into a single RUN using bind mounts and cp. This replaces 7 COPY layers and 2 RUN layers with a single RUN layer. Also use a bind mount for the OSS attribution script instead of COPY+RUN+rm. Reduces the release stage from ~13 layers to 3 (pip install, copy+setup, OSS attribution), saving ~10 layers in the published image. Signed-off-by: jbernloehr --- docker/Dockerfile.multi | 58 +++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index 62a5cb028fe3..d4bf170470ac 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -129,35 +129,41 @@ WORKDIR /app/tensorrt_llm RUN --mount=type=cache,target=/root/.cache/pip --mount=type=bind,from=wheel,source=/src/tensorrt_llm/build,target=/tmp/wheel \ pip install /tmp/wheel/tensorrt_llm*.whl -COPY README.md ./ -COPY --from=wheel /src/tensorrt_llm/build/tensorrt_llm*.whl ./ -COPY docs docs -COPY cpp/include include - -RUN ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/bin")') bin && \ - test -f bin/executorWorker && \ - ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/libs")') lib && \ - test -f lib/libnvinfer_plugin_tensorrt_llm.so && \ - echo "/app/tensorrt_llm/lib" > /etc/ld.so.conf.d/tensorrt_llm.conf && \ - ldconfig && \ - ! ( ldd -v bin/executorWorker | grep tensorrt_llm | grep -q "not found" ) - -ARG SRC_DIR=/src/tensorrt_llm -COPY --from=wheel ${SRC_DIR}/benchmarks benchmarks -ARG CPP_BUILD_DIR=${SRC_DIR}/cpp/build -COPY --from=wheel \ - ${CPP_BUILD_DIR}/benchmarks/bertBenchmark \ - ${CPP_BUILD_DIR}/benchmarks/gptManagerBenchmark \ - ${CPP_BUILD_DIR}/benchmarks/disaggServerBenchmark \ - benchmarks/cpp/ - -COPY examples examples -RUN chmod -R a+w examples && \ +RUN --mount=type=bind,source=README.md,target=/mnt/ctx/README.md \ + --mount=type=bind,source=docs,target=/mnt/ctx/docs \ + --mount=type=bind,source=cpp/include,target=/mnt/ctx/include \ + --mount=type=bind,source=examples,target=/mnt/ctx/examples \ + --mount=type=bind,from=wheel,source=/src/tensorrt_llm/build,target=/mnt/wheel \ + --mount=type=bind,from=wheel,source=/src/tensorrt_llm/benchmarks,target=/mnt/benchmarks \ + --mount=type=bind,from=wheel,source=/src/tensorrt_llm/cpp/build/benchmarks,target=/mnt/cpp_benchmarks \ + # Copy build context files + cp /mnt/ctx/README.md ./ && \ + cp -r /mnt/ctx/docs ./docs && \ + cp -r /mnt/ctx/include ./include && \ + cp -r /mnt/ctx/examples ./examples && \ + chmod -R a+w examples && \ + # Copy wheel stage outputs + cp /mnt/wheel/tensorrt_llm*.whl ./ && \ + cp -r /mnt/benchmarks ./benchmarks && \ + mkdir -p benchmarks/cpp && \ + cp /mnt/cpp_benchmarks/bertBenchmark \ + /mnt/cpp_benchmarks/gptManagerBenchmark \ + /mnt/cpp_benchmarks/disaggServerBenchmark \ + benchmarks/cpp/ && \ rm -v \ benchmarks/cpp/bertBenchmark.cpp \ benchmarks/cpp/gptManagerBenchmark.cpp \ benchmarks/cpp/disaggServerBenchmark.cpp \ benchmarks/cpp/CMakeLists.txt && \ + # Create symlinks to installed package binaries and libraries + ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/bin")') bin && \ + test -f bin/executorWorker && \ + ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/libs")') lib && \ + test -f lib/libnvinfer_plugin_tensorrt_llm.so && \ + echo "/app/tensorrt_llm/lib" > /etc/ld.so.conf.d/tensorrt_llm.conf && \ + ldconfig && \ + ! ( ldd -v bin/executorWorker | grep tensorrt_llm | grep -q "not found" ) && \ + # Clean up caches and CVE workarounds rm -rf /root/.cache/uv/archive-v0 && \ # WAR against https://github.com/advisories/GHSA-58pv-8j8x-9vj2 rm -rf /usr/local/lib/python3.12/dist-packages/setuptools/_vendor/jaraco.context-5.3.0.dist-info && \ @@ -171,8 +177,8 @@ ENV TRT_LLM_GIT_COMMIT=${GIT_COMMIT} \ TRT_LLM_VERSION=${TRT_LLM_VER} # Generate OSS attribution file for release image -COPY scripts/generate_container_oss_attribution.sh /tmp/generate_container_oss_attribution.sh -RUN bash /tmp/generate_container_oss_attribution.sh "release" "${TRT_LLM_VER}" "${TARGETARCH}" && rm /tmp/generate_container_oss_attribution.sh +RUN --mount=type=bind,source=scripts/generate_container_oss_attribution.sh,target=/mnt/gen_attribution.sh \ + bash /mnt/gen_attribution.sh "release" "${TRT_LLM_VER}" "${TARGETARCH}" FROM wheel AS tritonbuild From 4aefe7779e59acecb93b55f3c5a5724e026ae655 Mon Sep 17 00:00:00 2001 From: Tyler Burt <195370667+tburt-nv@users.noreply.github.com> Date: Fri, 5 Jun 2026 09:01:25 -0700 Subject: [PATCH 2/2] update CI images Signed-off-by: Tyler Burt <195370667+tburt-nv@users.noreply.github.com> --- jenkins/current_image_tags.properties | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index e0e879d8f9d4..e220c19ae3bd 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -13,8 +13,8 @@ # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-x86_64-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606012126-14025 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-sbsa-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606012126-14025 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py310-trt10.15.1.29-skip-tritondevel-202606012126-14025 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py312-trt10.15.1.29-skip-tritondevel-202606012126-14025 -LLM_SBSA_WHEEL_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-ubuntu24.04-sbsa-ubuntu24.04-py312-trt10.15.1.29-skip-tritondevel-202606012126-14025 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-x86_64-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606051544-14972 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-sbsa-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606051544-14972 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py310-trt10.15.1.29-skip-tritondevel-202606051544-14972 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py312-trt10.15.1.29-skip-tritondevel-202606051544-14972 +LLM_SBSA_WHEEL_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-ubuntu24.04-sbsa-ubuntu24.04-py312-trt10.15.1.29-skip-tritondevel-202606051544-14972