add test to l0_dgx_h200. limit to world_size=4

galagam · galagam · commit 645b45984ce2 · 2025-12-25T22:54:16.000-08:00
Signed-off-by: Gal Hubara Agam &lt;96368689+galagam@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/accuracy/test_llm_api_autodeploy.py b/tests/integration/defs/accuracy/test_llm_api_autodeploy.py
@@ -236,7 +236,7 @@ def test_fp8(self):
 
 class TestNemotronSuperV3(LlmapiAccuracyTestHarness):
     MODEL_NAME = "nvidia/Nemotron-Super-V3"
-    MODEL_PATH_BF16 = "/scratch/models/super-v3-iter_0440000/hf"  # add to llm_models_root? I don't have permissions
+    MODEL_PATH_BF16 = f"{llm_models_root()}/Nemotron-Super-3-120B-A12B-dev"
 
     def get_default_kwargs(self):
         return {
@@ -265,15 +265,15 @@ def get_default_sampling_params(self):
                               n=beam_width,
                               use_beam_search=beam_width > 1)
 
-    @pytest.mark.skip_less_device_memory(
-        32000)  # might need to require more memory
-    @pytest.mark.skip_less_device(8)
+    # 180GB works, might be able to go lower
+    @pytest.mark.skip_less_device_memory(180000)
+    @pytest.mark.skip_less_device(4)
     def test_bf16(self):
         kwargs = self.get_default_kwargs()
         sampling_params = self.get_default_sampling_params()
         with AutoDeployLLM(model=self.MODEL_PATH_BF16,
                            tokenizer=self.MODEL_PATH_BF16,
-                           world_size=8,
+                           world_size=4,
                            **kwargs) as llm:
             task = MMLU(self.MODEL_NAME)
             task.evaluate(llm, sampling_params=sampling_params)
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h200.yml b/tests/integration/test_lists/test-db/l0_dgx_h200.yml
@@ -134,6 +134,7 @@ l0_dgx_h200:
   # ------------- AutoDeploy tests ---------------
   - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4]
   - accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
+  - accuracy/test_llm_api_autodeploy.py::TestNemotronSuperV3::test_bf16
 - condition:
     ranges:
       system_gpu_count: