1616import shutil
1717import subprocess
1818import tempfile
19+ from pathlib import Path
1920
2021import pytest
2122
23+ from nemo_export .tensorrt_llm import TensorRTLLM
24+
2225logging .basicConfig (level = logging .INFO )
2326logger = logging .getLogger (__name__ )
2427
2528
29+ @pytest .fixture
30+ def tmp_dir ():
31+ tmp_dir = tempfile .mkdtemp ()
32+ yield tmp_dir
33+ try :
34+ shutil .rmtree (tmp_dir )
35+ except FileNotFoundError as e :
36+ logger .warning (f"Error removing temporary directory { tmp_dir } : { e } " )
37+
38+
2639class TestTRTLLMExport :
2740 @pytest .mark .pleasefixme
2841 @pytest .mark .parametrize ("tensor_parallelism_size" , [2 , 1 ])
2942 def test_nemo2_convert_to_export (self , tensor_parallelism_size ):
3043 """
3144 Test safe tensor exporter. This tests the whole nemo export until engine building.
3245 """
33- from pathlib import Path
34-
35- from nemo_export .tensorrt_llm import TensorRTLLM
3646
3747 trt_llm_exporter = TensorRTLLM (model_dir = "/tmp/safe_tensor_test_2/" )
3848 trt_llm_exporter .export (
@@ -90,8 +100,7 @@ def test_nemo2_convert_to_export(self, tensor_parallelism_size):
90100
91101 shutil .rmtree ("/tmp/safe_tensor_test_2/" )
92102
93- def test_export_hf (self ):
94- tmp_dir = tempfile .mkdtemp ()
103+ def test_export_hf (self , tmp_dir ):
95104 subprocess .run (
96105 [
97106 "coverage" ,
@@ -117,13 +126,7 @@ def test_export_hf(self):
117126 check = True ,
118127 )
119128
120- try :
121- shutil .rmtree (tmp_dir )
122- except FileNotFoundError as e :
123- logger .warning (f"Error removing temporary directory { tmp_dir } : { e } " )
124-
125- def test_export_nemo2 (self ):
126- tmp_dir = tempfile .mkdtemp ()
129+ def test_export_nemo2 (self , tmp_dir ):
127130 subprocess .run (
128131 [
129132 "coverage" ,
@@ -147,3 +150,134 @@ def test_export_nemo2(self):
147150 "--debug" ,
148151 ]
149152 )
153+
154+ def test_export_qnemo (self , tmp_dir ):
155+ subprocess .run (
156+ [
157+ "coverage" ,
158+ "run" ,
159+ "--data-file=/workspace/.coverage" ,
160+ "--source=/workspace/" ,
161+ "--parallel-mode" ,
162+ "tests/functional_tests/utils/create_hf_model.py" ,
163+ "--model_name_or_path" ,
164+ "/home/TestData/hf/Llama-2-7b-hf" ,
165+ "--output_dir" ,
166+ f"{ tmp_dir } /llama_tiny_hf" ,
167+ "--config_updates" ,
168+ '{"num_hidden_layers": 2, "hidden_size": 512, "intermediate_size": 384, "num_attention_heads": 8, "num_key_value_heads": 8}' ,
169+ ],
170+ check = True ,
171+ )
172+
173+ subprocess .run (
174+ [
175+ "coverage" ,
176+ "run" ,
177+ "--data-file=/workspace/.coverage" ,
178+ "--source=/workspace/" ,
179+ "--parallel-mode" ,
180+ "tests/functional_tests/utils/test_hf_import.py" ,
181+ "--hf_model" ,
182+ f"{ tmp_dir } /llama_tiny_hf" ,
183+ "--output_path" ,
184+ f"{ tmp_dir } /nemo2_ckpt" ,
185+ ],
186+ check = True ,
187+ )
188+
189+ subprocess .run (
190+ [
191+ "coverage" ,
192+ "run" ,
193+ "--data-file=/workspace/.coverage" ,
194+ "--source=/workspace/" ,
195+ "--parallel-mode" ,
196+ "tests/functional_tests/utils/create_ptq_ckpt.py" ,
197+ "--nemo_checkpoint" ,
198+ f"{ tmp_dir } /nemo2_ckpt" ,
199+ "--algorithm" ,
200+ "int8_sq" ,
201+ "--calibration_dataset" ,
202+ "tests/functional_tests/data/calibration_dataset.json" ,
203+ "--calibration_batch_size" ,
204+ "2" ,
205+ "--calibration_dataset_size" ,
206+ "6" ,
207+ "--export_format" ,
208+ "trtllm" ,
209+ "--export_path" ,
210+ f"{ tmp_dir } /nemo2_ptq" ,
211+ "--generate_sample" ,
212+ ],
213+ check = True ,
214+ )
215+
216+ subprocess .run (
217+ [
218+ "coverage" ,
219+ "run" ,
220+ "--data-file=/workspace/.coverage" ,
221+ "--source=/workspace/" ,
222+ "--parallel-mode" ,
223+ "tests/functional_tests/utils/run_nemo_export.py" ,
224+ "--model_name" ,
225+ "test" ,
226+ "--model_dir" ,
227+ f"{ tmp_dir } /trt_llm_model_dir/" ,
228+ "--checkpoint_dir" ,
229+ f"{ tmp_dir } /nemo2_ptq" ,
230+ "--min_tps" ,
231+ "1" ,
232+ "--test_deployment" ,
233+ "True" ,
234+ "--debug" ,
235+ ],
236+ check = True ,
237+ )
238+
239+ def test_export_onnx (self ):
240+ subprocess .run (
241+ [
242+ "coverage" ,
243+ "run" ,
244+ "--data-file=/workspace/.coverage" ,
245+ "--source=/workspace/" ,
246+ "--parallel-mode" ,
247+ "tests/functional_tests/utils/test_export_onnx.py" ,
248+ "--hf_model_path" ,
249+ "/home/TestData/llm/models/llama-3.2-nv-embedqa-1b-v2" ,
250+ "--quant_cfg" ,
251+ "int8_sq" ,
252+ "--calibration_dataset" ,
253+ "tests/functional_tests/data/calibration_dataset.json" ,
254+ "--calibration_batch_size" ,
255+ "2" ,
256+ "--calibration_dataset_size" ,
257+ "6" ,
258+ ],
259+ check = True ,
260+ )
261+
262+ def test_export_onnx_int8 (self ):
263+ subprocess .run (
264+ [
265+ "coverage" ,
266+ "run" ,
267+ "--data-file=/workspace/.coverage" ,
268+ "--source=/workspace/" ,
269+ "--parallel-mode" ,
270+ "tests/functional_tests/utils/test_export_onnx.py" ,
271+ "--hf_model_path" ,
272+ "/home/TestData/llm/models/llama-3.2-nv-embedqa-1b-v2" ,
273+ "--quant_cfg" ,
274+ "int8_sq" ,
275+ "--calibration_dataset" ,
276+ "tests/functional_tests/data/calibration_dataset.json" ,
277+ "--calibration_batch_size" ,
278+ "2" ,
279+ "--calibration_dataset_size" ,
280+ "6" ,
281+ ],
282+ check = True ,
283+ )
0 commit comments