Update dockerfile to TensorRT-LLM 0.17

kevalmorabia97 · yeyu-nvidia · commit 0dfb7c102252 · 2025-12-08T10:51:05.000-08:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,7 @@ Model Optimizer Changelog (Linux)
 
 **Backward Breaking Changes**
 
+- Support TensorRT-LLM to 0.17. Examples (e.g. benchmark task in llm_ptq) may not be fully compatible with TensorRT-LLM 0.15.
 - Nvidia TensorRT Model Optimizer has changed its LICENSE from NVIDIA Proprietary (library wheel) and MIT (examples) to Apache 2.0 in this first full OSS release.
 - Deprecate Python 3.8, Torch 2.0, and Cuda 11.x support.
 - ONNX Runtime dependency upgraded to 1.20 which no longer supports Python 3.9.
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.6.2-devel-ubuntu22.04
+FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
 
 WORKDIR /workspace
 
@@ -13,7 +13,7 @@ RUN rm -rf /usr/lib/python3/dist-packages/setuptools*
 RUN pip install setuptools -U
 
 # Install TensorRT-LLM
-ARG TRT_LLM_VERSION=0.15.0
+ARG TRT_LLM_VERSION=0.17.0
 RUN pip install "tensorrt-llm~=$TRT_LLM_VERSION" -U
 RUN git clone --depth 1 --branch "v$TRT_LLM_VERSION" https://github.com/NVIDIA/TensorRT-LLM.git && \
     mkdir tensorrt-llm && \
@@ -26,7 +26,7 @@ ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs:$L
 ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib:$LD_LIBRARY_PATH
 
 # Install TensorRT dev environment
-ARG TENSORRT_URL=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz
+ARG TENSORRT_URL=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/tars/TensorRT-10.8.0.43.Linux.x86_64-gnu.cuda-12.8.tar.gz
 RUN wget -q -O tensorrt.tar.gz $TENSORRT_URL && \
     tar -xf tensorrt.tar.gz && \
     cp TensorRT-*/bin/trtexec /usr/local/bin && \
@@ -39,7 +39,7 @@ ENV LD_LIBRARY_PATH=$TRT_LIB_PATH:$LD_LIBRARY_PATH
 
 # Install modelopt with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
 RUN pip install "nvidia-modelopt[all]" -U
-ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
+ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0+PTX"
 RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"
 
 # Find and install requirements.txt files for all examples excluding windows