|
1 |
| -FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 |
| 1 | +FROM nvcr.io/nvidia/pytorch:25.03-py3 |
| 2 | + |
| 3 | +ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" |
| 4 | +ARG TRT_LLM_COMMIT=v0.19.0 |
| 5 | +ARG REMOVE_TRT_LLM_SRC=1 |
| 6 | +ARG CUDA_ARCH="89-real;90-real;100-real" |
| 7 | + |
| 8 | +ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \ |
| 9 | + PIP_NO_CACHE_DIR=off \ |
| 10 | + PIP_CONSTRAINT= \ |
| 11 | + TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX" |
2 | 12 |
|
3 | 13 | WORKDIR /workspace
|
4 | 14 |
|
5 |
| -RUN apt-get update && \ |
6 |
| - apt-get -y install python3.10 python3-pip python-is-python3 openmpi-bin libopenmpi-dev libgl1 libglib2.0-0 wget git git-lfs unzip jq cmake vim && \ |
7 |
| - rm -rf /var/lib/apt/lists/* |
| 15 | +# Install TensorRT-LLM from source |
| 16 | +RUN --mount=type=ssh,id=nvidia git clone https://github.com/NVIDIA/TensorRT-LLM.git tensorrt-llm \ |
| 17 | + && cd tensorrt-llm \ |
| 18 | + && git checkout ${TRT_LLM_COMMIT} \ |
| 19 | + && git submodule update --init --recursive |
8 | 20 |
|
9 |
| -ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" |
10 |
| -ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL |
11 |
| -ENV PIP_NO_CACHE_DIR=off |
12 |
| - |
13 |
| -# Install the latest setuptools using pip |
14 |
| -RUN rm -rf /usr/lib/python3/dist-packages/setuptools* && \ |
15 |
| - pip install --upgrade pip setuptools |
16 |
| - |
17 |
| -# Install TensorRT-LLM |
18 |
| -ARG TRT_LLM_VERSION=0.18.1 |
19 |
| -RUN pip install "tensorrt-llm~=$TRT_LLM_VERSION" -U |
20 |
| -RUN git clone --depth 1 --branch "v$TRT_LLM_VERSION" https://github.com/NVIDIA/TensorRT-LLM.git && \ |
21 |
| - mkdir tensorrt-llm && \ |
22 |
| - mv TensorRT-LLM/benchmarks/ tensorrt-llm && \ |
23 |
| - rm -rf TensorRT-LLM |
24 |
| -RUN cd /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs && ln -s libnvinfer_plugin_tensorrt_llm.so libnvinfer_plugin_tensorrt_llm.so.10 |
25 |
| -ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs:$LD_LIBRARY_PATH |
| 21 | +# Install required dependencies |
| 22 | +RUN bash tensorrt-llm/docker/common/install_base.sh $(python --version 2>&1 | awk '{print $2}') |
| 23 | +RUN bash tensorrt-llm/docker/common/install_cmake.sh |
| 24 | +RUN bash tensorrt-llm/docker/common/install_mpi4py.sh |
| 25 | +RUN bash tensorrt-llm/docker/common/install_tensorrt.sh |
| 26 | +RUN bash tensorrt-llm/docker/common/install_cuda_toolkit.sh |
| 27 | + |
| 28 | +RUN cd tensorrt-llm && git lfs install && git lfs pull |
| 29 | + |
| 30 | +RUN cd tensorrt-llm \ |
| 31 | + && ./scripts/build_wheel.py --job_count $(nproc) --clean --python_bindings --benchmarks --install --cuda_architecture=${CUDA_ARCH} \ |
| 32 | + && git rev-parse --short HEAD > /workspace/tensorrt-llm.commit \ |
| 33 | + && chmod -R 777 . |
| 34 | +RUN pip install tensorrt-llm/build/tensorrt_llm*.whl |
| 35 | + |
| 36 | +# Remove TensorRT-LLM source code to reduce image size except for benchmarks and examples folders |
| 37 | +RUN if [ "$REMOVE_TRT_LLM_SRC" = "1" ]; then \ |
| 38 | + mkdir -p tensorrt-llm_keep; \ |
| 39 | + mv tensorrt-llm/benchmarks tensorrt-llm_keep/benchmarks; \ |
| 40 | + mv tensorrt-llm/examples tensorrt-llm_keep/examples; \ |
| 41 | + rm -rf tensorrt-llm; \ |
| 42 | + mv tensorrt-llm_keep tensorrt-llm; \ |
| 43 | + fi |
| 44 | + |
| 45 | +# Update PATH and LD_LIBRARY_PATH variables for the TensorRT binaries |
| 46 | +ENV LD_LIBRARY_PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \ |
| 47 | + PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}" |
26 | 48 |
|
27 | 49 | # Export the path to 'libcudnn.so.X' needed by 'libonnxruntime_providers_tensorrt.so'
|
28 |
| -ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib:$LD_LIBRARY_PATH |
29 |
| - |
30 |
| -# Install TensorRT dev environment |
31 |
| -ARG TENSORRT_URL=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.9.0/tars/TensorRT-10.9.0.34.Linux.x86_64-gnu.cuda-12.8.tar.gz |
32 |
| -RUN wget -q -O tensorrt.tar.gz $TENSORRT_URL && \ |
33 |
| - tar -xf tensorrt.tar.gz && \ |
34 |
| - cp TensorRT-*/bin/trtexec /usr/local/bin && \ |
35 |
| - cp TensorRT-*/include/* /usr/include/x86_64-linux-gnu && \ |
36 |
| - python -m pip install TensorRT-*/python/tensorrt-*-cp310-none-linux_x86_64.whl && \ |
37 |
| - cp -a TensorRT-*/targets/x86_64-linux-gnu/lib/* /usr/local/lib/python3.10/dist-packages/tensorrt_libs && \ |
38 |
| - rm -rf TensorRT-*.Linux.x86_64-gnu.cuda-*.tar.gz TensorRT-* tensorrt.tar.gz |
39 |
| -ENV TRT_LIB_PATH=/usr/local/lib/python3.10/dist-packages/tensorrt_libs |
40 |
| -ENV LD_LIBRARY_PATH=$TRT_LIB_PATH:$LD_LIBRARY_PATH |
| 50 | +ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH |
41 | 51 |
|
42 | 52 | # Install modelopt with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
|
43 | 53 | RUN pip install -U "nvidia-modelopt[all,dev-test]"
|
44 |
| -ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX" |
45 | 54 | RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"
|
46 | 55 |
|
47 | 56 | # Find and install requirements.txt files for all examples excluding windows
|
|
0 commit comments