[Misc][Benchmark] Add support for different tokenizer_mode (#15040)

aarnphm · russellb · web-flow · commit 6c5a3195db12 · 2025-03-19T14:56:50.000Z
Signed-off-by: Aaron Pham &lt;contact@aarnphm.xyz&gt;
Signed-off-by: Russell Bryant &lt;rbryant@redhat.com&gt;
Co-authored-by: Russell Bryant &lt;rbryant@redhat.com&gt;
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
@@ -732,8 +732,11 @@ def main(args: argparse.Namespace):
         api_url = f"http://{args.host}:{args.port}{args.endpoint}"
         base_url = f"http://{args.host}:{args.port}"
 
-    tokenizer = get_tokenizer(tokenizer_id,
-                              trust_remote_code=args.trust_remote_code)
+    tokenizer = get_tokenizer(
+        tokenizer_id,
+        trust_remote_code=args.trust_remote_code,
+        tokenizer_mode=args.tokenizer_mode,
+    )
 
     if args.dataset == 'grammar':
         args.structure_type = 'guided_grammar'
@@ -876,6 +879,13 @@ def main(args: argparse.Namespace):
         help=
         "Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
     )
+    parser.add_argument(
+        "--tokenizer-mode",
+        type=str,
+        default="auto",
+        help=
+        "Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+    )
     parser.add_argument(
         "--num-prompts",
         type=int,
diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh
@@ -54,6 +54,7 @@ for qps in "${QPS_VALUES[@]}"; do
   python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
     --request-rate $qps \
     --result-filename "$FILENAME" \
+    --tokenizer-mode ${TOKENIZER_MODE:-"auto"} \
     --port ${PORT:-8000}
 
   echo "Completed benchmark with QPS: $qps"