File tree Expand file tree Collapse file tree 2 files changed +6
-5
lines changed
Expand file tree Collapse file tree 2 files changed +6
-5
lines changed Original file line number Diff line number Diff line change @@ -236,7 +236,7 @@ def test_fp8(self):
236236
237237class TestNemotronSuperV3 (LlmapiAccuracyTestHarness ):
238238 MODEL_NAME = "nvidia/Nemotron-Super-V3"
239- MODEL_PATH_BF16 = "/scratch/models/super-v3-iter_0440000/hf" # add to llm_models_root? I don't have permissions
239+ MODEL_PATH_BF16 = f" { llm_models_root () } /Nemotron-Super-3-120B-A12B-dev"
240240
241241 def get_default_kwargs (self ):
242242 return {
@@ -265,15 +265,15 @@ def get_default_sampling_params(self):
265265 n = beam_width ,
266266 use_beam_search = beam_width > 1 )
267267
268- @ pytest . mark . skip_less_device_memory (
269- 32000 ) # might need to require more memory
270- @pytest .mark .skip_less_device (8 )
268+ # 180GB works, might be able to go lower
269+ @ pytest . mark . skip_less_device_memory ( 180000 )
270+ @pytest .mark .skip_less_device (4 )
271271 def test_bf16 (self ):
272272 kwargs = self .get_default_kwargs ()
273273 sampling_params = self .get_default_sampling_params ()
274274 with AutoDeployLLM (model = self .MODEL_PATH_BF16 ,
275275 tokenizer = self .MODEL_PATH_BF16 ,
276- world_size = 8 ,
276+ world_size = 4 ,
277277 ** kwargs ) as llm :
278278 task = MMLU (self .MODEL_NAME )
279279 task .evaluate (llm , sampling_params = sampling_params )
Original file line number Diff line number Diff line change @@ -134,6 +134,7 @@ l0_dgx_h200:
134134 # ------------- AutoDeploy tests ---------------
135135 - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4]
136136 - accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
137+ - accuracy/test_llm_api_autodeploy.py::TestNemotronSuperV3::test_bf16
137138- condition :
138139 ranges :
139140 system_gpu_count :
You can’t perform that action at this time.
0 commit comments