[https://nvbugs/5745152][fix] Fix some GPTOSS test setups (#10085)

dongfengy · web-flow · commit bfc591994ccb · 2025-12-26T17:52:40.000+08:00
Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;
diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@@ -1091,11 +1091,13 @@ def test_auto_dtype(self, block_reuse, mocker):
             "max_attention_window": [128, 32768],
             "enable_block_reuse": block_reuse,
             "enable_partial_reuse": False,
+            "free_gpu_memory_fraction": 0.5,
         }
         gen_server_config["kv_cache_config"] = {
             "max_attention_window": [128, 32768],
             "enable_block_reuse": block_reuse,
             "enable_partial_reuse": False,
+            "free_gpu_memory_fraction": 0.5,
         }
         disaggregated_server_config = {
             "hostname": "localhost",
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -4369,6 +4369,11 @@ def test_eagle3_4gpus(self, moe_backend, one_model, overlap_scheduler,
                 "https://nvbugs/5636916: Remaining Hopper Eagle Accuracy Issue for only TP=4"
             )
 
+        if not one_model and overlap_scheduler:
+            pytest.skip(
+                "https://nvbugs/5745152: two_model + overlap_scheduler can sometimes time out."
+            )
+
         MAX_OUTPUT_LEN = 128179
         MAX_INPUT_LEN = 32768
 
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -320,8 +320,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequan
 accuracy/test_llm_api_pytorch.py::TestNemotronH_47B_Base::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (https://nvbugs/5640697)
 accuracy/test_llm_api_pytorch.py::TestNemotronH_47B_Base::test_reasoning_fp8_prequantized[tp8ep8-cuda_graph=True] SKIP (https://nvbugs/5640697)
 accuracy/test_llm_api_pytorch.py::TestQwQ_32B::test_auto_dtype_tp4 SKIP (https://nvbugs/5640697)
-accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[True] SKIP (https://nvbugs/5644632)
-accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[False] SKIP (https://nvbugs/5644632)
 test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True] SKIP (https://nvbugs/5648560)
 test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-False] SKIP (https://nvbugs/5648560)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] SKIP (https://nvbugs/5629136)

Original file line number	Diff line number	Diff line change
`@@ -1091,11 +1091,13 @@ def test_auto_dtype(self, block_reuse, mocker):`
`1091`	`1091`	`"max_attention_window": [128, 32768],`
`1092`	`1092`	`"enable_block_reuse": block_reuse,`
`1093`	`1093`	`"enable_partial_reuse": False,`
	`1094`	`+ "free_gpu_memory_fraction": 0.5,`
`1094`	`1095`	`}`
`1095`	`1096`	`gen_server_config["kv_cache_config"] = {`
`1096`	`1097`	`"max_attention_window": [128, 32768],`
`1097`	`1098`	`"enable_block_reuse": block_reuse,`
`1098`	`1099`	`"enable_partial_reuse": False,`
	`1100`	`+ "free_gpu_memory_fraction": 0.5,`
`1099`	`1101`	`}`
`1100`	`1102`	`disaggregated_server_config = {`
`1101`	`1103`	`"hostname": "localhost",`