Skip to content

Commit 20b69a9

Browse files
galagamnvchenghaoz
andauthored
[NVIDIA#10056][test] AutoDeploy: Add accuracy test for Nemotron SuperV3 (NVIDIA#10131)
Signed-off-by: Gal Hubara Agam <[email protected]> Signed-off-by: Chenghao Zhang <[email protected]> Co-authored-by: Chenghao Zhang <[email protected]>
1 parent 5489d18 commit 20b69a9

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,8 @@ bigcode/starcoder2-15b:
289289
- accuracy: 54.5
290290
mistral/Mistral-Large-3-675B:
291291
- accuracy: 90.83
292+
nvidia/Nemotron-Super-V3:
293+
- accuracy: 84.38
292294
nvidia/Nemotron-3-Nano:
293295
- accuracy: 69.37
294296
- quant_algo: FP8

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,8 @@ mistralai/Mistral-Nemo-12b-Base:
346346
accuracy: 69.66
347347
mistral/Mistral-Large-3-675B:
348348
- accuracy: 87.54
349+
nvidia/Nemotron-Super-V3:
350+
- accuracy: 79.41
349351
nvidia/Nemotron-3-Nano:
350352
- accuracy: 73.85
351353
- quant_algo: FP8

tests/integration/defs/accuracy/test_llm_api_autodeploy.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,50 @@ def test_fp8(self):
232232
task.evaluate(llm, sampling_params=sampling_params)
233233
task = GSM8K(self.MODEL_NAME)
234234
task.evaluate(llm)
235+
236+
237+
class TestNemotronSuperV3(LlmapiAccuracyTestHarness):
238+
MODEL_NAME = "nvidia/Nemotron-Super-V3"
239+
MODEL_PATH_BF16 = "/scratch/models/super-v3-iter_0440000/hf" # add to llm_models_root? I don't have permissions
240+
241+
def get_default_kwargs(self):
242+
return {
243+
"skip_tokenizer_init": False,
244+
"trust_remote_code": True,
245+
"skip_loading_weights": False,
246+
"compile_backend": "torch-cudagraph",
247+
"free_mem_ratio": 0.5, # maybe we can increase
248+
"max_batch_size": 128,
249+
"max_seq_len": 8192,
250+
"max_num_tokens": 8192,
251+
"cuda_graph_batch_sizes": [1, 2, 4, 8, 16, 32, 64, 128],
252+
"transforms": {
253+
"detect_sharding": {
254+
"sharding_source": ['factory', 'heuristic'],
255+
"sharding_dims": ['ep', 'bmm'],
256+
},
257+
}
258+
}
259+
260+
def get_default_sampling_params(self):
261+
eos_id = -1
262+
beam_width = 1
263+
return SamplingParams(end_id=eos_id,
264+
pad_id=eos_id,
265+
n=beam_width,
266+
use_beam_search=beam_width > 1)
267+
268+
@pytest.mark.skip_less_device_memory(
269+
32000) # might need to require more memory
270+
@pytest.mark.skip_less_device(8)
271+
def test_bf16(self):
272+
kwargs = self.get_default_kwargs()
273+
sampling_params = self.get_default_sampling_params()
274+
with AutoDeployLLM(model=self.MODEL_PATH_BF16,
275+
tokenizer=self.MODEL_PATH_BF16,
276+
world_size=8,
277+
**kwargs) as llm:
278+
task = MMLU(self.MODEL_NAME)
279+
task.evaluate(llm, sampling_params=sampling_params)
280+
task = GSM8K(self.MODEL_NAME)
281+
task.evaluate(llm)

0 commit comments

Comments
 (0)