File tree Expand file tree Collapse file tree 2 files changed +6
-6
lines changed
Expand file tree Collapse file tree 2 files changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -421,4 +421,3 @@ disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_qwen3[
421421test_e2e.py::test_openai_responses SKIP (https://nvbugs/5635153)
422422accuracy/test_llm_api_pytorch.py::TestSeedOss_36B::test_auto_dtype SKIP (https://nvbugs/5612438)
423423accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline] SKIP (https://nvbugs/5680905)
424- test_e2e.py::test_openai_chat_multimodal_example SKIP (https://nvbugs/5685428)
Original file line number Diff line number Diff line change @@ -33,9 +33,6 @@ def temp_extra_llm_api_options_file(request):
3333 "enable_block_reuse" : False ,
3434 "free_gpu_memory_fraction" : 0.6 ,
3535 },
36- "build_config" : {
37- "max_num_tokens" : 16384 ,
38- },
3936 }
4037
4138 with open (temp_file_path , 'w' ) as f :
@@ -51,8 +48,12 @@ def temp_extra_llm_api_options_file(request):
5148def server (model_name : str , temp_extra_llm_api_options_file : str ):
5249 model_path = get_model_path (model_name )
5350 args = [
54- "--extra_llm_api_options" , temp_extra_llm_api_options_file ,
55- "--max_batch_size" , "64"
51+ "--extra_llm_api_options" ,
52+ temp_extra_llm_api_options_file ,
53+ "--max_batch_size" ,
54+ "64" ,
55+ "--max_num_tokens" ,
56+ "16384" ,
5657 ]
5758 with RemoteOpenAIServer (model_path , args ) as remote_server :
5859 yield remote_server
You can’t perform that action at this time.
0 commit comments