[None][doc] Update docker cmd in quick start guide and trtllm-serve … (#7787)

nv-guomingz · web-flow · commit 22c120e27be4 · 2025-09-17T20:35:05.000+08:00
Signed-off-by: nv-guomingz &lt;137257613+nv-guomingz@users.noreply.github.com&gt;
diff --git a/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md b/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md
@@ -24,9 +24,10 @@ TensorRT LLM distributes the pre-built container on [NGC Catalog](https://catalo
 You can launch the container using the following command:
 
 ```bash
-docker run --rm --ipc host -p 8000:8000 --gpus all -it nvcr.io/nvidia/tensorrt-llm/release
+docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:x.y.z
 ```
 
+
 ## Start the trtllm-serve service
 > [!WARNING]
 > The commands and configurations presented in this document are for illustrative purposes only.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -164,7 +164,7 @@ def tag_role(name, rawtext, text, lineno, inliner, options=None, content=None):
 
 
 def setup(app):
-    from helper import generate_examples, generate_llmapi
+    from helper import generate_examples, generate_llmapi, update_version
 
     from tensorrt_llm.llmapi.utils import tag_llm_params
     tag_llm_params()
@@ -173,6 +173,7 @@ def setup(app):
 
     generate_examples()
     generate_llmapi()
+    update_version()
 
 
 def gen_cpp_doc(ofile_name: str, header_dir: str, summary: str):
diff --git a/docs/source/helper.py b/docs/source/helper.py
@@ -1,4 +1,6 @@
+import importlib.util
 import logging
+import os
 import re
 from dataclasses import dataclass
 from itertools import chain, groupby
@@ -340,6 +342,29 @@ def generate_llmapi():
         f.write(content)
 
 
+def update_version():
+    version_path = os.path.abspath(
+        os.path.join(os.path.dirname(__file__),
+                     "../../tensorrt_llm/version.py"))
+    spec = importlib.util.spec_from_file_location("version_module",
+                                                  version_path)
+    version_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(version_module)
+    version = version_module.__version__
+    file_list = [
+        "docs/source/quick-start-guide.md",
+        "docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md"
+    ]
+    for file in file_list:
+        file_path = os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "../../" + file))
+        with open(file_path, "r") as f:
+            content = f.read()
+        content = content.replace("x.y.z", version)
+        with open(file_path, "w") as f:
+            f.write(content)
+
+
 if __name__ == "__main__":
     import os
     path = os.environ["TEKIT_ROOT"] + "/examples/llm-api/llm_inference.py"
diff --git a/docs/source/quick-start-guide.md b/docs/source/quick-start-guide.md
@@ -8,7 +8,7 @@ This is the starting point to try out TensorRT LLM. Specifically, this Quick Sta
 ## Launch Docker on a node with NVIDIA GPUs deployed
 
 ```bash
-docker run --ipc host --gpus all -p 8000:8000 -it nvcr.io/nvidia/tensorrt-llm/release
+docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:x.y.z
 ```