[https://nvbugs/5747938][fix] Use local tokenizer (NVIDIA#10230)

LinPoly · web-flow · commit 684b37df0285 · 2025-12-26T22:08:10.000+08:00
Signed-off-by: Pengyun Lin &lt;81065165+LinPoly@users.noreply.github.com&gt;
diff --git a/examples/serve/aiperf_client.sh b/examples/serve/aiperf_client.sh
@@ -2,7 +2,7 @@
 
 aiperf profile \
     -m TinyLlama-1.1B-Chat-v1.0 \
-    --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
+    --tokenizer ${AIPERF_TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \
     --endpoint-type chat \
     --random-seed 123 \
     --synthetic-input-tokens-mean 128 \
diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@@ -1,17 +1,14 @@
 import json
 import os
 import subprocess
-import sys
 import tempfile
 
 import pytest
 import yaml
 
+from ..test_llm import get_model_path
 from .openai_server import RemoteOpenAIServer
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from test_llm import get_model_path
-
 
 @pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"])
 def model_name():
@@ -57,15 +54,19 @@ def example_root():
                     ("bash", "curl_completion_client.sh"),
                     ("bash", "aiperf_client.sh"),
                     ("bash", "curl_responses_client.sh")])
-def test_trtllm_serve_examples(exe: str, script: str,
+def test_trtllm_serve_examples(exe: str, script: str, model_name: str,
                                server: RemoteOpenAIServer, example_root: str):
     client_script = os.path.join(example_root, script)
     # CalledProcessError will be raised if any errors occur
+    custom_env = os.environ.copy()
+    if script.startswith("aiperf"):
+        custom_env[""] = get_model_path(model_name)
     result = subprocess.run([exe, client_script],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             text=True,
-                            check=True)
+                            check=True,
+                            env=custom_env)
     if script.startswith("curl"):
         # For curl scripts, we expect a JSON response
         result_stdout = result.stdout.strip()