@@ -310,18 +310,6 @@ def test_chatglm3_6b(self):
310310 r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
311311 client .run ("trtllm chatglm3-6b" .split ())
312312
313- def test_gpt2 (self ):
314- with Runner ('tensorrt-llm' , 'gpt2' ) as r :
315- prepare .build_trtllm_handler_model ("gpt2" )
316- r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
317- client .run ("trtllm gpt2" .split ())
318-
319- def test_santacoder (self ):
320- with Runner ('tensorrt-llm' , 'santacoder' ) as r :
321- prepare .build_trtllm_handler_model ("santacoder" )
322- r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
323- client .run ("trtllm santacoder" .split ())
324-
325313 def test_llama_31_8b (self ):
326314 with Runner ('tensorrt-llm' , 'llama-3-1-8b' ) as r :
327315 prepare .build_trtllm_handler_model ('llama-3-1-8b' )
@@ -345,12 +333,6 @@ def test_mistral(self):
345333 r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
346334 client .run ("trtllm mistral-7b" .split ())
347335
348- def test_gpt_j_6b (self ):
349- with Runner ('tensorrt-llm' , 'gpt-j-6b' ) as r :
350- prepare .build_trtllm_handler_model ("gpt-j-6b" )
351- r .launch ("CUDA_VISIBLE_DEVICES=0" )
352- client .run ("trtllm gpt-j-6b" .split ())
353-
354336 def test_qwen_7b (self ):
355337 with Runner ('tensorrt-llm' , 'qwen-7b' ) as r :
356338 prepare .build_trtllm_handler_model ("qwen-7b" )
@@ -563,31 +545,6 @@ def test_llama3_8b(self):
563545@pytest .mark .gpu_4
564546class TestVllm1 :
565547
566- def test_gpt_neox_20b (self ):
567- with Runner ('lmi' , 'gpt-neox-20b' ) as r :
568- prepare .build_vllm_model ("gpt-neox-20b" )
569- r .launch ()
570- client .run ("vllm gpt-neox-20b" .split ())
571-
572- def test_mistral_7b (self ):
573- with Runner ('lmi' , 'mistral-7b' ) as r :
574- prepare .build_vllm_model ("mistral-7b" )
575- r .launch ()
576- client .run ("vllm mistral-7b" .split ())
577- client .run ("vllm_chat mistral-7b" .split ())
578-
579- def test_phi2 (self ):
580- with Runner ('lmi' , 'phi-2' ) as r :
581- prepare .build_vllm_model ("phi-2" )
582- r .launch ("VLLM_USE_V1=0" )
583- client .run ("vllm phi-2" .split ())
584-
585- def test_starcoder2_7b (self ):
586- with Runner ('lmi' , 'starcoder2-7b' ) as r :
587- prepare .build_vllm_model ("starcoder2-7b" )
588- r .launch ()
589- client .run ("vllm starcoder2-7b" .split ())
590-
591548 def test_gemma_2b (self ):
592549 with Runner ('lmi' , 'gemma-2b' ) as r :
593550 prepare .build_vllm_model ("gemma-2b" )
@@ -968,12 +925,6 @@ def test_llama_vllm_nxdi_aot(self):
968925@pytest .mark .gpu_4
969926class TestCorrectnessTrtLlm :
970927
971- def test_codestral_22b (self ):
972- with Runner ('tensorrt-llm' , 'codestral-22b' ) as r :
973- prepare .build_correctness_model ("trtllm-codestral-22b" )
974- r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
975- client .run ("correctness trtllm-codestral-22b" .split ())
976-
977928 def test_llama3_8b (self ):
978929 with Runner ('tensorrt-llm' , 'llama3-8b' ) as r :
979930 prepare .build_correctness_model ("trtllm-llama3-8b" )
0 commit comments