@@ -354,6 +354,95 @@ The model, inputs, and output location are passed to `qnn_executorch_runner` by
354354
355355Please refer to ` $EXECUTORCH_ROOT/examples/qualcomm/scripts/ ` and ` EXECUTORCH_ROOT/examples/qualcomm/oss_scripts/ ` to the list of supported models.
356356
357+ ## How to Support a Custom Model in HTP Backend
358+
359+ ### Step-by-Step Implementation Guide
360+
361+ Please reference [ the simple example] ( https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/export_example.py ) and [ more compilated examples] ( https://github.com/pytorch/executorch/tree/main/examples/qualcomm/scripts ) for reference
362+ #### Step 1: Prepare Your Model
363+ ``` python
364+ import torch
365+
366+ # Initialize your custom model
367+ model = YourModelClass().eval() # Your custom PyTorch model
368+
369+ # Create example inputs (adjust shape as needed)
370+ example_inputs = (torch.randn(1 , 3 , 224 , 224 ),) # Example input tensor
371+ ```
372+
373+ Step 2: [ Optional] Quantize Your Model
374+ Choose between quantization approaches, post training quantization (PTQ) or quantization aware training (QAT):
375+ ``` python
376+ from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
377+ from torch.ao.quantization.quantize_pt2e import prepare_pt2e, prepare_qat_pt2e, convert_pt2e
378+
379+ quantizer = QnnQuantizer()
380+ m = torch.export.export(model, example_inputs, strict = True ).module()
381+
382+ # PTQ (Post-Training Quantization)
383+ if quantization_type == " ptq" :
384+ prepared_model = prepare_pt2e(m, quantizer)
385+ # Calibration loop would go here
386+ prepared_model(* example_inputs)
387+
388+ # QAT (Quantization-Aware Training)
389+ elif quantization_type == " qat" :
390+ prepared_model = prepare_qat_pt2e(m, quantizer)
391+ # Training loop would go here
392+ for _ in range (training_steps):
393+ prepared_model(* example_inputs)
394+
395+ # Convert to quantized model
396+ quantized_model = convert_pt2e(prepared_model)
397+ ```
398+
399+ Step 3: Configure Compile Specs
400+ During this step, you will need to specify the target SoC, data type, and other QNN compiler spec.
401+ ``` python
402+ from executorch.backends.qualcomm.compiler import (
403+ generate_qnn_executorch_compiler_spec,
404+ generate_htp_compiler_spec,
405+ )
406+ from executorch.backends.qualcomm.utils.utils import QcomChipset
407+
408+ # HTP Compiler Configuration
409+ backend_options = generate_htp_compiler_spec(
410+ use_fp16 = not quantized, # False for quantized models
411+ )
412+
413+ # QNN Compiler Spec
414+ compile_spec = generate_qnn_executorch_compiler_spec(
415+ soc_model = QcomChipset.SM8650 , # Your target SoC
416+ backend_options = backend_options,
417+ saver = False # Set True to save QNN artifacts
418+ )
419+ ```
420+ Step 4: Lower and Export the Model
421+ ``` python
422+ from executorch.backends.qualcomm.partition.qnn_partitioner import (
423+ to_edge_transform_and_lower_to_qnn,
424+ )
425+ from executorch.exir import ExecutorchBackendConfig
426+
427+ # Lower to QNN backend
428+ delegated_program = to_edge_transform_and_lower_to_qnn(
429+ quantized_model if quantized else model,
430+ example_inputs,
431+ compile_spec
432+ )
433+
434+ # Export to ExecuTorch format
435+ executorch_program = delegated_program.to_executorch(
436+ config = ExecutorchBackendConfig(extract_delegate_segments = False )
437+ )
438+
439+ # Save the compiled model
440+ model_name = " custom_model_qnn.pte"
441+ with open (model_name, " wb" ) as f:
442+ f.write(executorch_program.buffer)
443+ print (f " Model successfully exported to { model_name} " )
444+ ```
445+
357446## What is coming?
358447
359448 - Improve the performance for llama3-8B-Instruct and support batch prefill.
0 commit comments