|
17 | 17 | generate_htp_compiler_spec, |
18 | 18 | generate_qnn_executorch_compiler_spec, |
19 | 19 | skip_annotation, |
| 20 | + to_edge_transform_and_lower_to_qnn, |
20 | 21 | ) |
21 | 22 | from executorch.examples.qualcomm.utils import ( |
22 | 23 | build_executorch_binary, |
23 | 24 | make_output_dir, |
24 | 25 | make_quantizer, |
25 | 26 | parse_skip_delegation_node, |
26 | | - QnnPartitioner, |
27 | 27 | setup_common_args_and_variables, |
28 | 28 | SimpleADB, |
29 | 29 | ) |
30 | | -from executorch.exir import to_edge |
| 30 | +from executorch.exir import ExecutorchBackendConfig |
31 | 31 | from transformers import BertTokenizer, MobileBertForSequenceClassification |
32 | 32 |
|
33 | 33 |
|
@@ -273,30 +273,42 @@ def calibrator(gm): |
273 | 273 |
|
274 | 274 | quantizer = make_quantizer(quant_dtype=quant_dtype) |
275 | 275 | backend_options = generate_htp_compiler_spec(quant_dtype is not None) |
276 | | - partitioner = QnnPartitioner( |
277 | | - generate_qnn_executorch_compiler_spec( |
278 | | - soc_model=getattr(QcomChipset, args.model), |
279 | | - backend_options=backend_options, |
280 | | - ), |
281 | | - skip_node_id_set=skip_node_id_set, |
282 | | - skip_node_op_set=skip_node_op_set, |
| 276 | + # partitioner = QnnPartitioner( |
| 277 | + # generate_qnn_executorch_compiler_spec( |
| 278 | + # soc_model=getattr(QcomChipset, args.model), |
| 279 | + # backend_options=backend_options, |
| 280 | + # ), |
| 281 | + # skip_node_id_set=skip_node_id_set, |
| 282 | + # skip_node_op_set=skip_node_op_set, |
| 283 | + # ) |
| 284 | + backend_options = generate_htp_compiler_spec( |
| 285 | + use_fp16=False, |
| 286 | + ) |
| 287 | + compile_spec = generate_qnn_executorch_compiler_spec( |
| 288 | + soc_model=QcomChipset.SM8550, |
| 289 | + backend_options=backend_options, |
283 | 290 | ) |
284 | 291 | # skip embedding layer cause it's quantization sensitive |
285 | 292 | graph_module, _ = skip_annotation( |
286 | 293 | nn_module=model, |
287 | 294 | quantizer=quantizer, |
288 | | - partitioner=partitioner, |
| 295 | + compiler_specs=compile_spec, |
289 | 296 | sample_input=inputs[0], |
290 | 297 | calibration_cb=calibrator, |
291 | 298 | fp_node_op_set={torch.ops.aten.embedding.default}, |
292 | 299 | ) |
293 | 300 | # lower all graph again, the skipped operators will be left in CPU |
294 | | - exec_prog = to_edge( |
295 | | - torch.export.export(graph_module, inputs[0], strict=True), |
296 | | - ).to_executorch() |
297 | | - |
| 301 | + # exec_prog = to_edge( |
| 302 | + # torch.export.export(graph_module, inputs[0], strict=True), |
| 303 | + # ).to_executorch() |
| 304 | + delegated_program = to_edge_transform_and_lower_to_qnn( |
| 305 | + graph_module, inputs[0], compile_spec |
| 306 | + ) |
| 307 | + executorch_program = delegated_program.to_executorch( |
| 308 | + config=ExecutorchBackendConfig(extract_delegate_segments=True) |
| 309 | + ) |
298 | 310 | with open(f"{args.artifact}/{pte_filename}.pte", "wb") as file: |
299 | | - file.write(exec_prog.buffer) |
| 311 | + file.write(executorch_program.buffer) |
300 | 312 |
|
301 | 313 | if args.compile_only: |
302 | 314 | return |
|
0 commit comments