@@ -832,7 +832,11 @@ def test_qnn_backend_up_sampling_nearest_2d_with_size(self):
832832 self .lower_module_and_test_output (module , sample_input )
833833
834834 def test_qnn_backend_layer_norm (self ):
835- modules = [LayerNorm (), LayerNorm (bias = False )] # noqa: F405
835+ modules = [
836+ LayerNorm (), # noqa: F405
837+ LayerNorm (bias = False ), # noqa: F405
838+ LayerNormWithoutParams (768 ), # noqa: F405
839+ ]
836840 sample_input = (torch .randn (196 , 768 ),)
837841 for i , module in enumerate (modules ):
838842 with self .subTest (i = i ):
@@ -2360,7 +2364,11 @@ def test_qnn_backend_up_sampling_nearest_2d_with_size(self):
23602364 self .lower_module_and_test_output (module , sample_input )
23612365
23622366 def test_qnn_backend_layer_norm (self ):
2363- modules = [LayerNorm (), LayerNorm (bias = False )] # noqa: F405
2367+ modules = [
2368+ LayerNorm (), # noqa: F405
2369+ LayerNorm (bias = False ), # noqa: F405
2370+ LayerNormWithoutParams (768 ), # noqa: F405
2371+ ]
23642372 sample_input = (torch .randn (196 , 768 ),)
23652373 for i , module in enumerate (modules ):
23662374 with self .subTest (i = i ):
@@ -4863,6 +4871,65 @@ def test_llama_stories_110m(self):
48634871 if not self .compile_only and not self .enable_x86_64 :
48644872 self .assertGreaterEqual (msg ["inference_speed" ], 220 ) # Lanai
48654873
4874+ def test_static_olmo (self ):
4875+ if not self .required_envs ():
4876+ self .skipTest ("missing required envs" )
4877+
4878+ prompt = "Simply put, the theory of relativity states that"
4879+ cmds = [
4880+ "python" ,
4881+ f"{ self .executorch_root } /examples/qualcomm/oss_scripts/llama/llama.py" ,
4882+ "--artifact" ,
4883+ self .artifact_dir ,
4884+ "--build_folder" ,
4885+ self .build_folder ,
4886+ "--model" ,
4887+ self .model ,
4888+ "--ip" ,
4889+ self .ip ,
4890+ "--port" ,
4891+ str (self .port ),
4892+ "--prompt" ,
4893+ f"{ prompt } " ,
4894+ "--decoder_model" ,
4895+ "olmo-1b" ,
4896+ "--model_mode" ,
4897+ "kv" ,
4898+ "--temperature" ,
4899+ "0" ,
4900+ "--max_seq_len" ,
4901+ "1024" ,
4902+ "--eval_perplexity" ,
4903+ "--task" ,
4904+ "wikitext" ,
4905+ ]
4906+ if self .compile_only :
4907+ cmds .extend (["--compile_only" ])
4908+ elif self .device :
4909+ cmds .extend (["--device" , self .device ])
4910+ if self .host :
4911+ cmds .extend (["--host" , self .host ])
4912+ elif self .enable_x86_64 :
4913+ cmds .extend (["--enable_x86_64" ])
4914+ if self .pre_gen_pte :
4915+ cmds .extend (["--pre_gen_pte" , self .pre_gen_pte ])
4916+
4917+ p = subprocess .Popen (cmds , stdout = subprocess .DEVNULL )
4918+ with Listener ((self .ip , self .port )) as listener :
4919+ conn = listener .accept ()
4920+ p .communicate ()
4921+ msg = json .loads (conn .recv ())
4922+ if "Error" in msg :
4923+ self .fail (msg ["Error" ])
4924+ else :
4925+ inference_speed_ref = {"SM8650" : 35 , "SM8750" : 60 }
4926+ self .assertLessEqual (msg ["wiki_ppl" ], 10 )
4927+ self .assertLessEqual (msg ["pte_size" ], 1_000_000_000 ) # 1GB
4928+ if self .model in inference_speed_ref :
4929+ self .assertGreaterEqual (
4930+ msg ["inference_speed" ], inference_speed_ref [self .model ]
4931+ )
4932+
48664933 def test_static_phi4 (self ):
48674934 if not self .required_envs ():
48684935 self .skipTest ("missing required envs" )
0 commit comments