|
69 | 69 | # incorrectly load the other version of the openvino libraries. |
70 | 70 | # |
71 | 71 | TRITON_VERSION_MAP = { |
72 | | - "2.48.0dev": ( |
73 | | - "24.06dev", # triton container |
74 | | - "24.06", # upstream container |
| 72 | + "2.49.0dev": ( |
| 73 | + "24.08dev", # triton container |
| 74 | + "24.07", # upstream container |
75 | 75 | "1.18.1", # ORT |
76 | 76 | "2024.0.0", # ORT OpenVINO |
77 | 77 | "2024.0.0", # Standalone OpenVINO |
78 | 78 | "3.2.6", # DCGM version |
79 | | - "0.5.0.post1", # vLLM version |
| 79 | + "0.5.3.post1", # vLLM version |
80 | 80 | ) |
81 | 81 | } |
82 | 82 |
|
@@ -1086,18 +1086,23 @@ def create_dockerfile_linux( |
1086 | 1086 | # Remove contents that are not needed in runtime |
1087 | 1087 | # Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1 |
1088 | 1088 | # The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0 |
1089 | | -RUN ldconfig && \ |
1090 | | - ARCH="$(uname -i)" && \ |
1091 | | - rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ |
1092 | | - rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ |
1093 | | - rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples && \ |
1094 | | - python3 -m pip install --upgrade pip && \ |
1095 | | - pip3 install --no-cache-dir transformers && \ |
1096 | | - find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf && \ |
1097 | | - find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf && \ |
1098 | | - pip3 install --no-cache-dir setuptools==69.5.1 grpcio-tools==1.64.0 |
1099 | | -
|
| 1089 | +RUN ldconfig && \\ |
| 1090 | + ARCH="$(uname -i)" && \\ |
| 1091 | + rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \\ |
| 1092 | + rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \\ |
| 1093 | + rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples && \\ |
| 1094 | + python3 -m pip install --upgrade pip && \\ |
| 1095 | + pip3 install --no-cache-dir transformers && \\ |
| 1096 | + find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf && \\ |
| 1097 | + find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf && \\ |
| 1098 | + pip3 install --no-cache-dir grpcio-tools==1.64.0 && \\ |
| 1099 | + pip3 uninstall -y setuptools |
1100 | 1100 | ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH |
| 1101 | +
|
| 1102 | +# There are some ucc issues when spawning mpi processes with ompi v4.1.7a1. |
| 1103 | +# Downgrade to ompi v4.1.5rc2 to avoid the issue. |
| 1104 | +RUN rm -fr /opt/hpcx/ompi |
| 1105 | +COPY --from=nvcr.io/nvidia/tritonserver:24.02-py3-min /opt/hpcx/ompi /opt/hpcx/ompi |
1101 | 1106 | """ |
1102 | 1107 | with open(os.path.join(ddir, dockerfile_name), "w") as dfile: |
1103 | 1108 | dfile.write(df) |
@@ -1229,6 +1234,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach |
1229 | 1234 | virtualenv \\ |
1230 | 1235 | && rm -rf /var/lib/apt/lists/* |
1231 | 1236 | """ |
| 1237 | + if "tensorrtllm" in backends: |
| 1238 | + df += """ |
| 1239 | +# Updating the openssh-client to fix for the CVE-2024-6387. This can be removed when trtllm uses a later CUDA container(12.5 or later) |
| 1240 | +RUN apt-get update \\ |
| 1241 | + && apt-get install -y --no-install-recommends \\ |
| 1242 | + openssh-client \\ |
| 1243 | + && rm -rf /var/lib/apt/lists/* |
| 1244 | + """ |
1232 | 1245 |
|
1233 | 1246 | if "vllm" in backends: |
1234 | 1247 | df += """ |
|
0 commit comments