Skip to content

Commit 9a61b81

Browse files
authored
Merge branch 'NVIDIA:main' into user/venky/test-1-auto-assign-pr
2 parents 5ae7685 + aa72d39 commit 9a61b81

File tree

387 files changed

+12520
-5407
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

387 files changed

+12520
-5407
lines changed

.devcontainer/devcontainer.env

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Environment variables used to configure the Dev Container setup.
2+
#
3+
# The syntax needs to be compatible with
4+
# https://docs.docker.com/compose/how-tos/environment-variables/variable-interpolation/#env-file-syntax
5+
#
6+
# Edit this file as necessary. For local changes not to be committed back
7+
# to the repository, create/edit devcontainer.env.user instead.
8+
HF_HOME_DEFAULT="${HOME}/.cache/huggingface"
9+
HF_HOME_XDG_DEFAULT="${XDG_CACHE_HOME:-${HF_HOME_DEFAULT}}"
10+
LOCAL_HF_HOME="${HF_HOME:-${HF_HOME_XDG_DEFAULT}}"

.devcontainer/devcontainer.json

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,18 @@
33
{
44
"name": "TRT-LLM Devcontainer",
55
"dockerComposeFile": [
6-
"docker-compose.yml"
6+
"docker-compose.yml",
7+
"docker-compose.override.yml"
78
],
89
"service": "tensorrt_llm-dev",
910
"remoteUser": "ubuntu",
1011
"containerEnv": {
11-
// "CCACHE_DIR" : "/home/coder/${localWorkspaceFolderBasename}/cpp/.ccache",
12-
// "CCACHE_BASEDIR" : "/home/coder/${localWorkspaceFolderBasename}",
1312
"HF_TOKEN": "${localEnv:HF_TOKEN}",
1413
"HF_HOME": "/huggingface",
1514
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history"
1615
},
1716
"workspaceFolder": "/workspaces/tensorrt_llm",
18-
// "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
19-
// "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
20-
"mounts": [
21-
"source=${localEnv:HOME}/.cache/huggingface,target=/huggingface,type=bind", // HF cache
22-
"source=/home/scratch.trt_llm_data/,target=/home/scratch.trt_llm_data/,type=bind,consistency=consistent"
23-
],
17+
"initializeCommand": "cd ${localWorkspaceFolder} && ./.devcontainer/make_env.py",
2418
// Note: sourcing .profile is required since we use a local user and the python interpreter is
2519
// global (/usr/bin/python). In this case, pip will default to a local user path which is not
2620
// by default in the PATH. In interactive devcontainer shells, .profile is sourced by default.
@@ -43,7 +37,9 @@
4337
// "ms-vscode.cmake-tools",
4438
// Git & Github
4539
// "GitHub.vscode-pull-request-github"
46-
"eamodio.gitlens"
40+
"eamodio.gitlens",
41+
// Docs
42+
"ms-vscode.live-server"
4743
],
4844
"settings": {
4945
"C_Cpp.intelliSenseEngine": "disabled",
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Example .devcontainer/docker-compose.override.yml
2+
version: "3.9"
3+
services:
4+
tensorrt_llm-dev:
5+
volumes:
6+
# Uncomment the following lines to enable
7+
# # Mount TRTLLM data volume:
8+
# - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro

.devcontainer/docker-compose.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
version: "3.9"
22
services:
33
tensorrt_llm-dev:
4-
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202506051650-4885
4+
image: ${DEV_CONTAINER_IMAGE}
55
network_mode: host
66
ipc: host
77

@@ -22,7 +22,8 @@ services:
2222
capabilities: [gpu]
2323

2424
volumes:
25-
- ..:/workspaces/tensorrt_llm:cached
25+
- ${SOURCE_DIR}:/workspaces/tensorrt_llm
26+
- ${LOCAL_HF_HOME}:/huggingface # HF cache
2627

2728
environment:
2829
- CCACHE_DIR=/workspaces/tensorrt_llm/cpp/.ccache

.devcontainer/make_env.py

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import logging
5+
import os
6+
import re
7+
import shlex
8+
import subprocess
9+
import sys
10+
from pathlib import Path
11+
from tempfile import TemporaryDirectory
12+
from typing import Dict, List, Optional
13+
14+
JENKINS_PROPS_PATH = Path("jenkins/current_image_tags.properties")
15+
DEV_CONTAINER_ENV_PATH = Path(".devcontainer/devcontainer.env")
16+
DEV_CONTAINER_USER_ENV_PATH = Path(".devcontainer/devcontainer.env.user")
17+
DOT_ENV_PATH = Path(".devcontainer/.env")
18+
COMPOSE_OVERRIDE_PATH = Path(".devcontainer/docker-compose.override.yml")
19+
COMPOSE_OVERRIDE_EXAMPLE_PATH = Path(
20+
".devcontainer/docker-compose.override-example.yml")
21+
22+
HOME_DIR_VAR = "HOME_DIR"
23+
SOURCE_DIR_VAR = "SOURCE_DIR"
24+
DEV_CONTAINER_IMAGE_VAR = "DEV_CONTAINER_IMAGE"
25+
BUILD_LOCAL_VAR = "BUILD_LOCAL"
26+
JENKINS_IMAGE_VAR = "LLM_DOCKER_IMAGE"
27+
LOCAL_HF_HOME_VAR = "LOCAL_HF_HOME"
28+
29+
LOGGER = logging.getLogger("make_env")
30+
31+
32+
def _load_env(env_files: List[Path]) -> Dict[str, str]:
33+
"""Evaluate files using 'sh' and return resulting environment."""
34+
with TemporaryDirectory("trtllm_make_env") as temp_dir:
35+
json_path = Path(temp_dir) / 'env.json'
36+
subprocess.run(
37+
("(echo set -a && cat " +
38+
" ".join(shlex.quote(str(env_file)) for env_file in env_files) +
39+
" && echo && echo exec /usr/bin/env python3 -c \"'import json; import os; print(json.dumps(dict(os.environ)))'\""
40+
+ f") | sh > {json_path}"),
41+
shell=True,
42+
check=True,
43+
)
44+
with open(json_path, "r") as f:
45+
env = json.load(f)
46+
return env
47+
48+
49+
def _detect_rootless() -> bool:
50+
proc = subprocess.run("./docker/detect_rootless.sh",
51+
capture_output=True,
52+
check=True,
53+
shell=True)
54+
return bool(int(proc.stdout.decode("utf-8").strip()))
55+
56+
57+
def _handle_rootless(env_inout: Dict[str, str]):
58+
is_rootless = _detect_rootless()
59+
if is_rootless:
60+
LOGGER.info("Docker Rootless Mode detected.")
61+
if HOME_DIR_VAR not in env_inout:
62+
raise ValueError(
63+
"Docker Rootless Mode requires setting HOME_DIR in devcontainer.env.user"
64+
)
65+
if SOURCE_DIR_VAR not in env_inout:
66+
raise ValueError(
67+
"Docker Rootless Mode requires setting SOURCE_DIR in devcontainer.env.user"
68+
)
69+
70+
# Handle HF_HOME
71+
if "HF_HOME" in os.environ and "HF_HOME" in env_inout:
72+
raise ValueError(
73+
"Docker Rootless Mode requires either not setting HF_HOME at all or overriding it in devcontainer.env.user"
74+
)
75+
if env_inout[LOCAL_HF_HOME_VAR].startswith(env_inout["HOME"]):
76+
env_inout[LOCAL_HF_HOME_VAR] = env_inout[LOCAL_HF_HOME_VAR].replace(
77+
env_inout["HOME"], env_inout[HOME_DIR_VAR], 1)
78+
else:
79+
env_inout[HOME_DIR_VAR] = env_inout["HOME"]
80+
env_inout[SOURCE_DIR_VAR] = os.getcwd()
81+
82+
83+
def _select_prebuilt_image(env: Dict[str, str]) -> Optional[str]:
84+
# Jenkins image
85+
candidate_images: List[str] = [env[JENKINS_IMAGE_VAR]]
86+
87+
# NGC images
88+
proc = subprocess.run(
89+
r"git tag --sort=creatordate --merged=HEAD | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+' | sed -E 's/^v(.*)$/\1/' | tac",
90+
shell=True,
91+
capture_output=True,
92+
check=True,
93+
)
94+
for git_tag in proc.stdout.splitlines():
95+
git_tag = git_tag.strip()
96+
candidate_images.append(f"nvcr.io/nvidia/tensorrt-llm/devel:{git_tag}")
97+
98+
# Check image availability
99+
for candidate_image in candidate_images:
100+
LOGGER.info(f"Trying image {candidate_image}")
101+
102+
try:
103+
subprocess.run(
104+
f"docker run --rm -it --pull=missing --entrypoint=/bin/true {shlex.quote(candidate_image)}",
105+
check=True,
106+
shell=True)
107+
except subprocess.CalledProcessError:
108+
continue
109+
110+
LOGGER.info(f"Using image {candidate_image}")
111+
return candidate_image
112+
113+
LOGGER.info("No pre-built image found!")
114+
return None
115+
116+
117+
def _build_local_image() -> str:
118+
LOGGER.info("Building container image locally")
119+
120+
with TemporaryDirectory("trtllm_make_env") as temp_dir:
121+
log_path = Path(temp_dir) / "build.log"
122+
subprocess.run(
123+
f"make -C docker devel_build | tee {shlex.quote(str(log_path))}",
124+
check=True,
125+
shell=True,
126+
)
127+
with open(log_path) as f:
128+
build_log = f.read()
129+
130+
# Handle escaped and actual line breaks
131+
build_log_lines = re.sub(r"\\\n", " ", build_log).splitlines()
132+
for build_log_line in build_log_lines:
133+
tokens = shlex.split(build_log_line)
134+
if tokens[:3] != ["docker", "buildx", "build"]:
135+
continue
136+
token = None
137+
while tokens and not (token := tokens.pop(0)).startswith("--tag"):
138+
pass
139+
if token is None:
140+
continue
141+
if token.startswith("--arg="):
142+
token = token.removeprefix("--arg=")
143+
else:
144+
if not tokens:
145+
continue
146+
token = tokens.pop(0)
147+
return token # this is the image URI
148+
raise RuntimeError(
149+
f"Could not parse --tag argument from build log: {build_log}")
150+
151+
152+
def _ensure_compose_override():
153+
if not COMPOSE_OVERRIDE_PATH.exists():
154+
LOGGER.info(
155+
f"Creating initial {COMPOSE_OVERRIDE_PATH} from {COMPOSE_OVERRIDE_EXAMPLE_PATH}"
156+
)
157+
COMPOSE_OVERRIDE_PATH.write_bytes(
158+
COMPOSE_OVERRIDE_EXAMPLE_PATH.read_bytes())
159+
160+
161+
def _update_dot_env(env: Dict[str, str]):
162+
LOGGER.info(f"Updating {DOT_ENV_PATH}")
163+
164+
output_lines = [
165+
"# NOTE: This file is generated by make_env.py, modify devcontainer.env.user instead of this file.\n",
166+
"\n",
167+
]
168+
169+
for env_key, env_value in env.items():
170+
if os.environ.get(env_key) == env_value:
171+
# Only storing differences w.r.t. base env
172+
continue
173+
output_lines.append(f"{env_key}=\"{shlex.quote(env_value)}\"\n")
174+
175+
with open(DOT_ENV_PATH, "w") as f:
176+
f.writelines(output_lines)
177+
178+
179+
def main():
180+
env_files = [
181+
JENKINS_PROPS_PATH,
182+
DEV_CONTAINER_ENV_PATH,
183+
DEV_CONTAINER_USER_ENV_PATH,
184+
]
185+
186+
env = _load_env(env_files)
187+
_handle_rootless(env_inout=env)
188+
189+
# Determine container image to use
190+
image_uri = env.get(DEV_CONTAINER_IMAGE_VAR)
191+
if image_uri:
192+
LOGGER.info(f"Using user-provided container image: {image_uri}")
193+
else:
194+
build_local = bool(int(
195+
env[BUILD_LOCAL_VAR].strip())) if BUILD_LOCAL_VAR in env else None
196+
image_uri = None
197+
if not build_local:
198+
image_uri = _select_prebuilt_image(env)
199+
if image_uri is None:
200+
if build_local is False:
201+
raise RuntimeError(
202+
"No suitable container image found and local build disabled."
203+
)
204+
image_uri = _build_local_image()
205+
LOGGER.info(f"Using locally built container image: {image_uri}")
206+
env[DEV_CONTAINER_IMAGE_VAR] = image_uri
207+
208+
_ensure_compose_override()
209+
210+
_update_dot_env(env)
211+
212+
213+
if __name__ == "__main__":
214+
logging.basicConfig(level=logging.INFO)
215+
try:
216+
main()
217+
except Exception as e:
218+
LOGGER.error(f"{e.__class__.__name__}: {e}")
219+
sys.exit(-1)

0 commit comments

Comments
 (0)