Skip to content

Commit b5e4ee9

Browse files
[ci] fix correctness testing for neuronx (#2704)
1 parent 1c4ab1a commit b5e4ee9

File tree

3 files changed

+22
-23
lines changed

3 files changed

+22
-23
lines changed

tests/integration/llm/client.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,7 @@ def get_model_name():
835835
"num_run": 4,
836836
"tokenizer": "bullerwins/Codestral-22B-v0.1-hf",
837837
"dataset": "humaneval",
838-
"score": 0.01,
839-
"parameters": {
840-
"return_full_text": True
841-
}
838+
"score": 0.01
842839
},
843840
"trtllm-llama3-8b": {
844841
"batch_size": [213],
@@ -856,13 +853,13 @@ def get_model_name():
856853
"dataset": "mmlu",
857854
"score": 0.6
858855
},
859-
"neuronx-llama3-1-8b": {
860-
"batch_size": [213],
856+
"neuronx-llama3-2-1b": {
857+
"batch_size": [32],
861858
"seq_length": [1],
862859
"num_run": 66,
863-
"tokenizer": "TheBloke/Llama-2-7B-fp16",
860+
"tokenizer": "NousResearch/Llama-3.2-1B",
864861
"dataset": "mmlu",
865-
"score": 0.6
862+
"score": 0.45
866863
},
867864
"trtllm-meta-llama3-8b-fp8": {
868865
"batch_size": [213],

tests/integration/llm/prepare.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,8 +1253,9 @@
12531253
"neuronx-codestral-22b": {
12541254
"engine": "Python",
12551255
"option.entryPoint": "djl_python.transformers_neuronx",
1256-
"option.model_id": "bullerwins/Codestral-22B-v0.1-hf",
1257-
"option.tensor_parallel_degree": 12,
1256+
"option.model_id": "s3://djl-llm/Codestral-22B-v0.1-hf/",
1257+
"option.tensor_parallel_degree": 8,
1258+
"option.block_size": 32,
12581259
"option.n_positions": 1024,
12591260
"option.rolling_batch": "auto",
12601261
"option.max_rolling_batch_size": 41,
@@ -1276,15 +1277,16 @@
12761277
"option.tensor_parallel_degree": 4,
12771278
"option.max_rolling_batch_size": 213
12781279
},
1279-
"neuronx-llama3-1-8b": {
1280+
"neuronx-llama3-2-1b": {
12801281
"engine": "Python",
12811282
"option.entryPoint": "djl_python.transformers_neuronx",
1282-
"option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
1283-
"option.tensor_parallel_degree": 12,
1284-
"option.n_positions": 768,
1285-
"option.rolling_batch": "auto",
1286-
"option.max_rolling_batch_size": 213,
1287-
"option.model_loading_timeout": 1800
1283+
"option.model_id": "s3://djl-llm/llama-3-2-1b-instruct/",
1284+
"option.tensor_parallel_degree": 2,
1285+
"option.n_positions": 1024,
1286+
"option.block_size": 32,
1287+
"option.rolling_batch": "vllm",
1288+
"option.max_rolling_batch_size": 32,
1289+
"option.model_loading_timeout": 2400
12881290
},
12891291
"trtllm-meta-llama3-8b-fp8": {
12901292
"engine": "Python",

tests/integration/tests.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -998,14 +998,14 @@ class TestCorrectnessNeuronx:
998998
def test_codestral_22b(self):
999999
with Runner('pytorch-inf2', 'codestral-22b') as r:
10001000
prepare.build_correctness_model("neuronx-codestral-22b")
1001-
r.launch(container='pytorch-inf2-6')
1001+
r.launch(container='pytorch-inf2-4')
10021002
client.run("correctness neuronx-codestral-22b".split())
10031003

1004-
def test_llama3_1_8b(self):
1005-
with Runner('pytorch-inf2', 'llama3-1-8b') as r:
1006-
prepare.build_correctness_model("neuronx-llama3-1-8b")
1007-
r.launch(container='pytorch-inf2-6')
1008-
client.run("correctness neuronx-llama3-1-8b".split())
1004+
def test_llama3_2_1b(self):
1005+
with Runner('pytorch-inf2', 'llama3-2-1b') as r:
1006+
prepare.build_correctness_model("neuronx-llama3-2-1b")
1007+
r.launch(container='pytorch-inf2-1')
1008+
client.run("correctness neuronx-llama3-2-1b".split())
10091009

10101010

10111011
class TestMultiModalLmiDist:

0 commit comments

Comments
 (0)