@@ -4049,7 +4049,7 @@ def test_llama3_2_1b(self):
40494049 "16a4w" ,
40504050 "--temperature" ,
40514051 "0" ,
4052- "--llama_model " ,
4052+ "--decoder_model " ,
40534053 "llama3_2" ,
40544054 "--model_mode" ,
40554055 "hybrid" ,
@@ -4129,7 +4129,7 @@ def test_llama_stories_110m(self):
41294129 "16a4w" ,
41304130 "--temperature" ,
41314131 "0" ,
4132- "--llama_model " ,
4132+ "--decoder_model " ,
41334133 "stories110m" ,
41344134 "--model_mode" ,
41354135 "hybrid" ,
@@ -4171,6 +4171,65 @@ def test_llama_stories_110m(self):
41714171 if not self .compile_only and not self .enable_x86_64 :
41724172 self .assertGreaterEqual (msg ["inference_speed" ], 220 ) # Lanai
41734173
4174+ def test_qwen2_5 (self ):
4175+ if not self .required_envs ():
4176+ self .skipTest ("missing required envs" )
4177+
4178+ prompt = "My favourite condiment is "
4179+ cmds = [
4180+ "python" ,
4181+ f"{ self .executorch_root } /examples/qualcomm/oss_scripts/llama/llama.py" ,
4182+ "--artifact" ,
4183+ self .artifact_dir ,
4184+ "--build_folder" ,
4185+ self .build_folder ,
4186+ "--model" ,
4187+ self .model ,
4188+ "--ip" ,
4189+ self .ip ,
4190+ "--port" ,
4191+ str (self .port ),
4192+ "--prompt" ,
4193+ f"{ prompt } " ,
4194+ "--ptq" ,
4195+ "16a8w" ,
4196+ "--decoder_model" ,
4197+ "qwen2_5" ,
4198+ "--model_mode" ,
4199+ "hybrid" ,
4200+ "--prefill_ar_len" ,
4201+ "32" ,
4202+ "--max_seq_len" ,
4203+ "128" ,
4204+ ]
4205+ if self .compile_only :
4206+ cmds .extend (["--compile_only" ])
4207+ elif self .device :
4208+ cmds .extend (["--device" , self .device ])
4209+ if self .host :
4210+ cmds .extend (["--host" , self .host ])
4211+ elif self .enable_x86_64 :
4212+ cmds .extend (["--enable_x86_64" ])
4213+ if self .pre_gen_pte :
4214+ cmds .extend (["--pre_gen_pte" , self .pre_gen_pte ])
4215+
4216+ # Accuracy is bad for now. Just check user's prompt is returned.
4217+ golden_start_with = "My favourite condiment is "
4218+ p = subprocess .Popen (cmds , stdout = subprocess .DEVNULL )
4219+ with Listener ((self .ip , self .port )) as listener :
4220+ conn = listener .accept ()
4221+ p .communicate ()
4222+ msg = json .loads (conn .recv ())
4223+ if "Error" in msg :
4224+ self .fail (msg ["Error" ])
4225+ else :
4226+ model_out = msg ["result" ][0 ]
4227+ self .assertTrue (
4228+ model_out .startswith (golden_start_with ),
4229+ f"Expected Output: { golden_start_with } . Actual Output: { model_out } " ,
4230+ )
4231+ self .assertGreaterEqual (msg ["inference_speed" ], 95 ) # Lanai
4232+
41744233
41754234class TestExampleOssScript (TestQNN ):
41764235 def test_albert (self ):
0 commit comments