Skip to content

Commit 68dcabd

Browse files
pvijayakrishmc-nv
andcommitted
Build: Update to use custom vllm and TRT version at build and model generation respectively (#7927)
Co-authored-by: Misha Chornyi <[email protected]>
1 parent 4ef346c commit 68dcabd

File tree

2 files changed

+68
-13
lines changed

2 files changed

+68
-13
lines changed

build.py

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@
7373
DEFAULT_TRITON_VERSION_MAP = {
7474
"release_version": "2.54.0dev",
7575
"triton_container_version": "25.01dev",
76-
"upstream_container_version": "24.12",
76+
"upstream_container_version": "25.01",
7777
"ort_version": "1.20.1",
78-
"ort_openvino_version": "2024.4.0",
79-
"standalone_openvino_version": "2024.4.0",
78+
"ort_openvino_version": "2024.5.0",
79+
"standalone_openvino_version": "2024.5.0",
8080
"dcgm_version": "3.3.6",
8181
"vllm_version": "0.6.3.post1",
8282
"rhel_py_version": "3.12.3",
@@ -1467,12 +1467,31 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
14671467
"""
14681468

14691469
if "vllm" in backends:
1470-
df += """
1471-
# vLLM needed for vLLM backend
1472-
RUN pip3 install vllm=={}
1473-
""".format(
1474-
FLAGS.vllm_version
1475-
)
1470+
df += f"""
1471+
ARG BUILD_PUBLIC_VLLM="true"
1472+
ARG VLLM_INDEX_URL
1473+
ARG PYTORCH_TRITON_URL
1474+
1475+
RUN --mount=type=secret,id=req,target=/run/secrets/requirements \\
1476+
if [ "$BUILD_PUBLIC_VLLM" = "false" ]; then \\
1477+
pip3 install --no-cache-dir \\
1478+
mkl==2021.1.1 \\
1479+
mkl-include==2021.1.1 \\
1480+
mkl-devel==2021.1.1 \\
1481+
&& pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
1482+
# Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1
1483+
&& cd /tmp \\
1484+
&& wget $PYTORCH_TRITON_URL \\
1485+
&& pip install --no-cache-dir /tmp/pytorch_triton-*.whl \\
1486+
&& rm /tmp/pytorch_triton-*.whl; \\
1487+
else \\
1488+
# public vLLM needed for vLLM backend
1489+
pip3 install vllm=={DEFAULT_TRITON_VERSION_MAP["vllm_version"]}; \\
1490+
fi
1491+
1492+
ARG PYVER=3.12
1493+
ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib/python${{PYVER}}/dist-packages/torch/lib:${{LD_LIBRARY_PATH}}
1494+
"""
14761495

14771496
if "dali" in backends:
14781497
df += """
@@ -1840,13 +1859,21 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
18401859
finalargs = [
18411860
"docker",
18421861
"build",
1862+
]
1863+
if secrets != "":
1864+
finalargs += [
1865+
f"--secret id=req,src={requirements}",
1866+
f"--build-arg VLLM_INDEX_URL={vllm_index_url}",
1867+
f"--build-arg PYTORCH_TRITON_URL={pytorch_triton_url}",
1868+
f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}",
1869+
]
1870+
finalargs += [
18431871
"-t",
18441872
"tritonserver",
18451873
"-f",
18461874
os.path.join(FLAGS.build_dir, "Dockerfile"),
18471875
".",
18481876
]
1849-
18501877
docker_script.cwd(THIS_SCRIPT_DIR)
18511878
docker_script.cmd(finalargs, check_exitcode=True)
18521879

@@ -2691,6 +2718,19 @@ def enable_all():
26912718
default=DEFAULT_TRITON_VERSION_MAP["rhel_py_version"],
26922719
help="This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version.",
26932720
)
2721+
parser.add_argument(
2722+
"--build-secret",
2723+
action="append",
2724+
required=False,
2725+
nargs=2,
2726+
metavar=("key", "value"),
2727+
help="Add build secrets in the form of <key> <value>. These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=<key>`. The following keys are expected and their purposes are described below:\n\n"
2728+
" - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n"
2729+
" - 'vllm_index_url': The index URL for the pip install.\n"
2730+
" - 'pytorch_triton_url': The location of the PyTorch wheel to download.\n"
2731+
" - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n"
2732+
"Ensure that the required environment variables for these secrets are set before running the build.",
2733+
)
26942734
FLAGS = parser.parse_args()
26952735

26962736
if FLAGS.image is None:
@@ -2717,6 +2757,8 @@ def enable_all():
27172757
FLAGS.override_backend_cmake_arg = []
27182758
if FLAGS.extra_backend_cmake_arg is None:
27192759
FLAGS.extra_backend_cmake_arg = []
2760+
if FLAGS.build_secret is None:
2761+
FLAGS.build_secret = []
27202762

27212763
# if --enable-all is specified, then update FLAGS to enable all
27222764
# settings, backends, repo-agents, caches, file systems, endpoints, etc.
@@ -2810,6 +2852,14 @@ def enable_all():
28102852
)
28112853
backends["python"] = backends["vllm"]
28122854

2855+
secrets = dict(getattr(FLAGS, "build_secret", []))
2856+
if secrets is not None:
2857+
requirements = secrets.get("req", "")
2858+
vllm_index_url = secrets.get("vllm_index_url", "")
2859+
pytorch_triton_url = secrets.get("pytorch_triton_url", "")
2860+
build_public_vllm = secrets.get("build_public_vllm", "true")
2861+
log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm))
2862+
28132863
# Initialize map of repo agents to build and repo-tag for each.
28142864
repoagents = {}
28152865
for be in FLAGS.repoagent:

qa/common/gen_qa_model_repository

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -500,9 +500,14 @@ chmod -R 777 $VOLUME_FORMATDESTDIR
500500
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
501501
chmod -R 777 $VOLUME_DATADEPENDENTDIR
502502
# Make shared library for custom Hardmax plugin.
503-
(git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git && \
504-
cd /workspace/TensorRT/samples/python/onnx_custom_plugin && rm -rf build && mkdir build && \
505-
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.)
503+
if [ -d "/usr/src/tensorrt" ]; then
504+
cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
505+
else
506+
git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git
507+
cd /workspace/TensorRT/samples/python/onnx_custom_plugin
508+
fi
509+
rm -rf build && mkdir build && \
510+
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.
506511
LD_PRELOAD=$VOLUME_PLGDESTDIR/libcustomHardmaxPlugin.so python3 $VOLUME_SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$VOLUME_PLGDESTDIR
507512
chmod -R 777 $VOLUME_PLGDESTDIR
508513
EOF

0 commit comments

Comments
 (0)