Skip to content

Commit 1aca75c

Browse files
authored
tests: Refactor to TRTLLM (#218)
Signed-off-by: oliver könig <[email protected]>
1 parent 4aaed2b commit 1aca75c

File tree

5 files changed

+146
-101
lines changed

5 files changed

+146
-101
lines changed

.github/workflows/cicd-main.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,6 @@ jobs:
230230
include:
231231
- script: L2_Launch_TRTLLM
232232
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
233-
- script: L2_ONNX_TRT_LLM_Embedding_Export
234-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
235-
- script: L2_ONNX_TRT_LLM_Embedding_Export_INT8
236-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
237-
- script: L2_NeMo_2_Export_Qnemo_TRT_LLM
238-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
239233
- script: L2_TRTLLM_API_Deploy_Query
240234
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
241235
needs: [cicd-unit-tests-trtllm]

tests/functional_tests/L2_NeMo_2_Export_Qnemo_TRT_LLM.sh

Lines changed: 0 additions & 44 deletions
This file was deleted.

tests/functional_tests/L2_ONNX_TRT_LLM_Embedding_Export.sh

Lines changed: 0 additions & 20 deletions
This file was deleted.

tests/functional_tests/L2_ONNX_TRT_LLM_Embedding_Export_INT8.sh

Lines changed: 0 additions & 19 deletions
This file was deleted.

tests/functional_tests/tests_trtllm/test_export.py

Lines changed: 146 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,33 @@
1616
import shutil
1717
import subprocess
1818
import tempfile
19+
from pathlib import Path
1920

2021
import pytest
2122

23+
from nemo_export.tensorrt_llm import TensorRTLLM
24+
2225
logging.basicConfig(level=logging.INFO)
2326
logger = logging.getLogger(__name__)
2427

2528

29+
@pytest.fixture
30+
def tmp_dir():
31+
tmp_dir = tempfile.mkdtemp()
32+
yield tmp_dir
33+
try:
34+
shutil.rmtree(tmp_dir)
35+
except FileNotFoundError as e:
36+
logger.warning(f"Error removing temporary directory {tmp_dir}: {e}")
37+
38+
2639
class TestTRTLLMExport:
2740
@pytest.mark.pleasefixme
2841
@pytest.mark.parametrize("tensor_parallelism_size", [2, 1])
2942
def test_nemo2_convert_to_export(self, tensor_parallelism_size):
3043
"""
3144
Test safe tensor exporter. This tests the whole nemo export until engine building.
3245
"""
33-
from pathlib import Path
34-
35-
from nemo_export.tensorrt_llm import TensorRTLLM
3646

3747
trt_llm_exporter = TensorRTLLM(model_dir="/tmp/safe_tensor_test_2/")
3848
trt_llm_exporter.export(
@@ -90,8 +100,7 @@ def test_nemo2_convert_to_export(self, tensor_parallelism_size):
90100

91101
shutil.rmtree("/tmp/safe_tensor_test_2/")
92102

93-
def test_export_hf(self):
94-
tmp_dir = tempfile.mkdtemp()
103+
def test_export_hf(self, tmp_dir):
95104
subprocess.run(
96105
[
97106
"coverage",
@@ -117,13 +126,7 @@ def test_export_hf(self):
117126
check=True,
118127
)
119128

120-
try:
121-
shutil.rmtree(tmp_dir)
122-
except FileNotFoundError as e:
123-
logger.warning(f"Error removing temporary directory {tmp_dir}: {e}")
124-
125-
def test_export_nemo2(self):
126-
tmp_dir = tempfile.mkdtemp()
129+
def test_export_nemo2(self, tmp_dir):
127130
subprocess.run(
128131
[
129132
"coverage",
@@ -147,3 +150,134 @@ def test_export_nemo2(self):
147150
"--debug",
148151
]
149152
)
153+
154+
def test_export_qnemo(self, tmp_dir):
155+
subprocess.run(
156+
[
157+
"coverage",
158+
"run",
159+
"--data-file=/workspace/.coverage",
160+
"--source=/workspace/",
161+
"--parallel-mode",
162+
"tests/functional_tests/utils/create_hf_model.py",
163+
"--model_name_or_path",
164+
"/home/TestData/hf/Llama-2-7b-hf",
165+
"--output_dir",
166+
f"{tmp_dir}/llama_tiny_hf",
167+
"--config_updates",
168+
'{"num_hidden_layers": 2, "hidden_size": 512, "intermediate_size": 384, "num_attention_heads": 8, "num_key_value_heads": 8}',
169+
],
170+
check=True,
171+
)
172+
173+
subprocess.run(
174+
[
175+
"coverage",
176+
"run",
177+
"--data-file=/workspace/.coverage",
178+
"--source=/workspace/",
179+
"--parallel-mode",
180+
"tests/functional_tests/utils/test_hf_import.py",
181+
"--hf_model",
182+
f"{tmp_dir}/llama_tiny_hf",
183+
"--output_path",
184+
f"{tmp_dir}/nemo2_ckpt",
185+
],
186+
check=True,
187+
)
188+
189+
subprocess.run(
190+
[
191+
"coverage",
192+
"run",
193+
"--data-file=/workspace/.coverage",
194+
"--source=/workspace/",
195+
"--parallel-mode",
196+
"tests/functional_tests/utils/create_ptq_ckpt.py",
197+
"--nemo_checkpoint",
198+
f"{tmp_dir}/nemo2_ckpt",
199+
"--algorithm",
200+
"int8_sq",
201+
"--calibration_dataset",
202+
"tests/functional_tests/data/calibration_dataset.json",
203+
"--calibration_batch_size",
204+
"2",
205+
"--calibration_dataset_size",
206+
"6",
207+
"--export_format",
208+
"trtllm",
209+
"--export_path",
210+
f"{tmp_dir}/nemo2_ptq",
211+
"--generate_sample",
212+
],
213+
check=True,
214+
)
215+
216+
subprocess.run(
217+
[
218+
"coverage",
219+
"run",
220+
"--data-file=/workspace/.coverage",
221+
"--source=/workspace/",
222+
"--parallel-mode",
223+
"tests/functional_tests/utils/run_nemo_export.py",
224+
"--model_name",
225+
"test",
226+
"--model_dir",
227+
f"{tmp_dir}/trt_llm_model_dir/",
228+
"--checkpoint_dir",
229+
f"{tmp_dir}/nemo2_ptq",
230+
"--min_tps",
231+
"1",
232+
"--test_deployment",
233+
"True",
234+
"--debug",
235+
],
236+
check=True,
237+
)
238+
239+
def test_export_onnx(self):
240+
subprocess.run(
241+
[
242+
"coverage",
243+
"run",
244+
"--data-file=/workspace/.coverage",
245+
"--source=/workspace/",
246+
"--parallel-mode",
247+
"tests/functional_tests/utils/test_export_onnx.py",
248+
"--hf_model_path",
249+
"/home/TestData/llm/models/llama-3.2-nv-embedqa-1b-v2",
250+
"--quant_cfg",
251+
"int8_sq",
252+
"--calibration_dataset",
253+
"tests/functional_tests/data/calibration_dataset.json",
254+
"--calibration_batch_size",
255+
"2",
256+
"--calibration_dataset_size",
257+
"6",
258+
],
259+
check=True,
260+
)
261+
262+
def test_export_onnx_int8(self):
263+
subprocess.run(
264+
[
265+
"coverage",
266+
"run",
267+
"--data-file=/workspace/.coverage",
268+
"--source=/workspace/",
269+
"--parallel-mode",
270+
"tests/functional_tests/utils/test_export_onnx.py",
271+
"--hf_model_path",
272+
"/home/TestData/llm/models/llama-3.2-nv-embedqa-1b-v2",
273+
"--quant_cfg",
274+
"int8_sq",
275+
"--calibration_dataset",
276+
"tests/functional_tests/data/calibration_dataset.json",
277+
"--calibration_batch_size",
278+
"2",
279+
"--calibration_dataset_size",
280+
"6",
281+
],
282+
check=True,
283+
)

0 commit comments

Comments
 (0)