|
1 |
| -FROM nvcr.io/nvidia/pytorch:25.04-py3 |
| 1 | +FROM nvcr.io/nvidia/tensorrt-llm/release:1.0.0rc6 |
2 | 2 |
|
3 | 3 | ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com"
|
4 |
| -ARG TRT_LLM_COMMIT=v0.20.0 |
5 |
| -ARG REMOVE_TRT_LLM_SRC=1 |
6 |
| -ARG CUDA_ARCH="89-real;90-real;100-real" |
7 |
| - |
8 | 4 | ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \
|
9 | 5 | PIP_NO_CACHE_DIR=off \
|
10 | 6 | PIP_CONSTRAINT= \
|
11 | 7 | TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX"
|
12 | 8 |
|
13 |
| -WORKDIR /workspace |
14 |
| - |
15 |
| -# Install TensorRT-LLM from source |
16 |
| -RUN --mount=type=ssh,id=nvidia git clone https://github.com/NVIDIA/TensorRT-LLM.git tensorrt-llm \ |
17 |
| - && cd tensorrt-llm \ |
18 |
| - && git checkout ${TRT_LLM_COMMIT} \ |
19 |
| - && git submodule update --init --recursive |
20 |
| - |
21 |
| -# Install required dependencies |
22 |
| -RUN bash tensorrt-llm/docker/common/install_base.sh $(python --version 2>&1 | awk '{print $2}') |
23 |
| -RUN bash tensorrt-llm/docker/common/install_cmake.sh |
24 |
| -RUN bash tensorrt-llm/docker/common/install_mpi4py.sh |
25 |
| -RUN bash tensorrt-llm/docker/common/install_tensorrt.sh |
26 |
| -RUN bash tensorrt-llm/docker/common/install_cuda_toolkit.sh |
| 9 | +RUN apt-get update && \ |
| 10 | + apt-get install -y libgl1 && \ |
| 11 | + rm -rf /var/lib/apt/lists/* |
27 | 12 |
|
28 |
| -RUN cd tensorrt-llm && git lfs install && git lfs pull |
29 |
| - |
30 |
| -RUN cd tensorrt-llm \ |
31 |
| - && ./scripts/build_wheel.py --job_count $(nproc) --clean --python_bindings --benchmarks --install --cuda_architecture=${CUDA_ARCH} \ |
32 |
| - && git rev-parse --short HEAD > /workspace/tensorrt-llm.commit \ |
33 |
| - && chmod -R 777 . |
34 |
| -RUN pip install tensorrt-llm/build/tensorrt_llm*.whl |
| 13 | +WORKDIR /workspace |
35 | 14 |
|
36 |
| -# Remove TensorRT-LLM source code to reduce image size except for benchmarks and examples folders |
37 |
| -RUN if [ "$REMOVE_TRT_LLM_SRC" = "1" ]; then \ |
38 |
| - mkdir -p tensorrt-llm_keep; \ |
39 |
| - mv tensorrt-llm/benchmarks tensorrt-llm_keep/benchmarks; \ |
40 |
| - mv tensorrt-llm/examples tensorrt-llm_keep/examples; \ |
41 |
| - rm -rf tensorrt-llm; \ |
42 |
| - mv tensorrt-llm_keep tensorrt-llm; \ |
43 |
| - fi |
| 15 | +RUN ln -s /app/tensorrt_llm /workspace/tensorrt_llm |
44 | 16 |
|
45 | 17 | # Update PATH and LD_LIBRARY_PATH variables for the TensorRT binaries
|
46 |
| -ENV LD_LIBRARY_PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \ |
| 18 | +ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \ |
47 | 19 | PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
|
48 | 20 |
|
49 |
| -# Export the path to 'libcudnn.so.X' needed by 'libonnxruntime_providers_tensorrt.so' |
50 |
| -ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH |
51 |
| - |
52 | 21 | # Install modelopt with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
|
53 | 22 | RUN pip install -U "nvidia-modelopt[all,dev-test]"
|
54 | 23 | RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"
|
|
0 commit comments