[https://nvbugs/5685428][fix] fix test_openai_chat_multimodal.py (#9406)

QiJune · web-flow · commit 786d308b888e · 2025-11-24T16:56:33.000-08:00
Signed-off-by: junq &lt;22017000+QiJune@users.noreply.github.com&gt;
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -421,4 +421,3 @@ disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_qwen3[
 test_e2e.py::test_openai_responses SKIP (https://nvbugs/5635153)
 accuracy/test_llm_api_pytorch.py::TestSeedOss_36B::test_auto_dtype SKIP (https://nvbugs/5612438)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline] SKIP (https://nvbugs/5680905)
-test_e2e.py::test_openai_chat_multimodal_example SKIP (https://nvbugs/5685428)
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py b/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py
@@ -33,9 +33,6 @@ def temp_extra_llm_api_options_file(request):
                 "enable_block_reuse": False,
                 "free_gpu_memory_fraction": 0.6,
             },
-            "build_config": {
-                "max_num_tokens": 16384,
-            },
         }
 
         with open(temp_file_path, 'w') as f:
@@ -51,8 +48,12 @@ def temp_extra_llm_api_options_file(request):
 def server(model_name: str, temp_extra_llm_api_options_file: str):
     model_path = get_model_path(model_name)
     args = [
-        "--extra_llm_api_options", temp_extra_llm_api_options_file,
-        "--max_batch_size", "64"
+        "--extra_llm_api_options",
+        temp_extra_llm_api_options_file,
+        "--max_batch_size",
+        "64",
+        "--max_num_tokens",
+        "16384",
     ]
     with RemoteOpenAIServer(model_path, args) as remote_server:
         yield remote_server