[ci] fix correctness testing for neuronx (#2704)

sindhuvahinis · web-flow · commit b5e4ee991ec4 · 2025-02-02T20:08:35.000-08:00
diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
@@ -835,10 +835,7 @@ def get_model_name():
         "num_run": 4,
         "tokenizer": "bullerwins/Codestral-22B-v0.1-hf",
         "dataset": "humaneval",
-        "score": 0.01,
-        "parameters": {
-            "return_full_text": True
-        }
+        "score": 0.01
     },
     "trtllm-llama3-8b": {
         "batch_size": [213],
@@ -856,13 +853,13 @@ def get_model_name():
         "dataset": "mmlu",
         "score": 0.6
     },
-    "neuronx-llama3-1-8b": {
-        "batch_size": [213],
+    "neuronx-llama3-2-1b": {
+        "batch_size": [32],
         "seq_length": [1],
         "num_run": 66,
-        "tokenizer": "TheBloke/Llama-2-7B-fp16",
+        "tokenizer": "NousResearch/Llama-3.2-1B",
         "dataset": "mmlu",
-        "score": 0.6
+        "score": 0.45
     },
     "trtllm-meta-llama3-8b-fp8": {
         "batch_size": [213],
diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
@@ -1253,8 +1253,9 @@
     "neuronx-codestral-22b": {
         "engine": "Python",
         "option.entryPoint": "djl_python.transformers_neuronx",
-        "option.model_id": "bullerwins/Codestral-22B-v0.1-hf",
-        "option.tensor_parallel_degree": 12,
+        "option.model_id": "s3://djl-llm/Codestral-22B-v0.1-hf/",
+        "option.tensor_parallel_degree": 8,
+        "option.block_size": 32,
         "option.n_positions": 1024,
         "option.rolling_batch": "auto",
         "option.max_rolling_batch_size": 41,
@@ -1276,15 +1277,16 @@
         "option.tensor_parallel_degree": 4,
         "option.max_rolling_batch_size": 213
     },
-    "neuronx-llama3-1-8b": {
+    "neuronx-llama3-2-1b": {
         "engine": "Python",
         "option.entryPoint": "djl_python.transformers_neuronx",
-        "option.model_id": "s3://djl-llm/llama-3.1-8b-hf/",
-        "option.tensor_parallel_degree": 12,
-        "option.n_positions": 768,
-        "option.rolling_batch": "auto",
-        "option.max_rolling_batch_size": 213,
-        "option.model_loading_timeout": 1800
+        "option.model_id": "s3://djl-llm/llama-3-2-1b-instruct/",
+        "option.tensor_parallel_degree": 2,
+        "option.n_positions": 1024,
+        "option.block_size": 32,
+        "option.rolling_batch": "vllm",
+        "option.max_rolling_batch_size": 32,
+        "option.model_loading_timeout": 2400
     },
     "trtllm-meta-llama3-8b-fp8": {
         "engine": "Python",
diff --git a/tests/integration/tests.py b/tests/integration/tests.py
@@ -998,14 +998,14 @@ class TestCorrectnessNeuronx:
     def test_codestral_22b(self):
         with Runner('pytorch-inf2', 'codestral-22b') as r:
             prepare.build_correctness_model("neuronx-codestral-22b")
-            r.launch(container='pytorch-inf2-6')
+            r.launch(container='pytorch-inf2-4')
             client.run("correctness neuronx-codestral-22b".split())
 
-    def test_llama3_1_8b(self):
-        with Runner('pytorch-inf2', 'llama3-1-8b') as r:
-            prepare.build_correctness_model("neuronx-llama3-1-8b")
-            r.launch(container='pytorch-inf2-6')
-            client.run("correctness neuronx-llama3-1-8b".split())
+    def test_llama3_2_1b(self):
+        with Runner('pytorch-inf2', 'llama3-2-1b') as r:
+            prepare.build_correctness_model("neuronx-llama3-2-1b")
+            r.launch(container='pytorch-inf2-1')
+            client.run("correctness neuronx-llama3-2-1b".split())
 
 
 class TestMultiModalLmiDist: