@@ -2014,6 +2014,7 @@ def test_qnn_backend_multi_graphs(self):
20142014 soc_model = self .chipset_table [TestQNN .model ],
20152015 backend_options = backend_options ,
20162016 multiple_graphs = True ,
2017+ weight_sharing = True ,
20172018 graph_name = graph_name ,
20182019 )
20192020 for graph_name in graph_names
@@ -2577,6 +2578,7 @@ def test_qnn_backend_multi_graphs(self):
25772578 soc_model = self .chipset_table [TestQNN .model ],
25782579 backend_options = backend_options ,
25792580 multiple_graphs = True ,
2581+ weight_sharing = True ,
25802582 graph_name = graph_name ,
25812583 )
25822584 for graph_name in graph_names
@@ -3822,8 +3824,6 @@ def test_stories_single_llama(self):
38223824 self .artifact_dir ,
38233825 "--build_folder" ,
38243826 self .build_folder ,
3825- "--device" ,
3826- self .device ,
38273827 "--model" ,
38283828 self .model ,
38293829 "--checkpoint" ,
@@ -3846,9 +3846,21 @@ def test_stories_single_llama(self):
38463846 "0" ,
38473847 "--llama_model" ,
38483848 "stories110m" ,
3849+ "--model_mode" ,
3850+ "hybrid" ,
3851+ "--prefill_seq_len" ,
3852+ "32" ,
3853+ "--kv_seq_len" ,
3854+ "128" ,
38493855 ]
3856+ if self .compile_only :
3857+ cmds .extend (["--compile_only" ])
3858+ elif self .device :
3859+ cmds .extend (["--device" , self .device ])
38503860 if self .host :
38513861 cmds .extend (["--host" , self .host ])
3862+ elif self .enable_x86_64 :
3863+ cmds .extend (["--enable_x86_64" ])
38523864
38533865 golden_start_with = "Once upon a time,"
38543866 p = subprocess .Popen (cmds , stdout = subprocess .DEVNULL )
@@ -3859,8 +3871,13 @@ def test_stories_single_llama(self):
38593871 if "Error" in msg :
38603872 self .fail (msg ["Error" ])
38613873 else :
3862- model_out = msg ["result" ][0 ]
3863- self .assertTrue (model_out .startswith (golden_start_with ))
3874+ if not self .compile_only :
3875+ model_out = msg ["result" ][0 ]
3876+ self .assertTrue (model_out .startswith (golden_start_with ))
3877+ # x86 does not allow weight sharing, so we don't check pte size
3878+ if not self .enable_x86_64 :
3879+ pte_size = msg ["pte_size" ]
3880+ self .assertLessEqual (pte_size , 130000000 )
38643881
38653882 @unittest .skip ("dynamic shape inputs appear in recent torch.export.export" )
38663883 def test_mobilebert (self ):
@@ -4065,12 +4082,6 @@ def setup_environment():
40654082 help = "Path to open source software model repository" ,
40664083 type = str ,
40674084 )
4068- parser .add_argument (
4069- "-x" ,
4070- "--enable_x86_64" ,
4071- help = "Enable unittest to be executed on x86_64 platform" ,
4072- action = "store_true" ,
4073- )
40744085
40754086 args , ns_args = parser .parse_known_args (namespace = unittest )
40764087 TestQNN .host = args .host
@@ -4089,6 +4100,8 @@ def setup_environment():
40894100 TestQNN .shared_buffer = args .shared_buffer
40904101 TestQNN .enable_x86_64 = args .enable_x86_64
40914102 TestQNN .dump_intermediate_outputs = args .dump_intermediate_outputs
4103+ TestQNN .compile_only = args .compile_only
4104+
40924105 return sys .argv [:1 ] + ns_args
40934106
40944107
0 commit comments