@@ -25,17 +25,18 @@ import torchvision.models as models
2525from torch.export import export, ExportedProgram
2626from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
2727from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
28- from executorch.exir import EdgeProgramManager, ExecutorchProgramManager, to_edge
28+ from executorch.exir import EdgeProgramManager, ExecutorchProgramManager, to_edge_transform_and_lower
2929from executorch.exir.backend.backend_api import to_backend
3030
3131
3232mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights = MobileNet_V2_Weights.DEFAULT ).eval()
3333sample_inputs = (torch.randn(1 , 3 , 224 , 224 ), )
3434
3535exported_program: ExportedProgram = export(mobilenet_v2, sample_inputs)
36- edge: EdgeProgramManager = to_edge(exported_program)
37-
38- edge = edge.to_backend(XnnpackPartitioner())
36+ edge: EdgeProgramManager = to_edge_transform_and_lower(
37+ exported_program,
38+ partitioner = [XnnpackPartitioner()],
39+ )
3940```
4041
4142We will go through this example with the [ MobileNetV2] ( https://pytorch.org/hub/pytorch_vision_mobilenet_v2/ ) pretrained model downloaded from the TorchVision library. The flow of lowering a model starts after exporting the model ` to_edge ` . We call the ` to_backend ` api with the ` XnnpackPartitioner ` . The partitioner identifies the subgraphs suitable for XNNPACK backend delegate to consume. Afterwards, the identified subgraphs will be serialized with the XNNPACK Delegate flatbuffer schema and each subgraph will be replaced with a call to the XNNPACK Delegate.
@@ -47,16 +48,9 @@ GraphModule(
4748 (lowered_module_1): LoweredBackendModule()
4849)
4950
50- def forward (self , arg314_1 ):
51- lowered_module_0 = self .lowered_module_0
52- executorch_call_delegate = torch.ops.higher_order.executorch_call_delegate(lowered_module_0, arg314_1); lowered_module_0 = arg314_1 = None
53- getitem = executorch_call_delegate[0 ]; executorch_call_delegate = None
54- aten_view_copy_default = executorch_exir_dialects_edge__ops_aten_view_copy_default(getitem, [1 , 1280 ]); getitem = None
55- aten_clone_default = executorch_exir_dialects_edge__ops_aten_clone_default(aten_view_copy_default); aten_view_copy_default = None
56- lowered_module_1 = self .lowered_module_1
57- executorch_call_delegate_1 = torch.ops.higher_order.executorch_call_delegate(lowered_module_1, aten_clone_default); lowered_module_1 = aten_clone_default = None
58- getitem_1 = executorch_call_delegate_1[0 ]; executorch_call_delegate_1 = None
59- return (getitem_1,)
51+
52+
53+ def forward(self, b_features_0_1_num_batches_tracked, b_getattr_l__self___features___1___conv_0_1_num_batches_tracked, b_getattr_l__self___features___1___conv_2_num_batches_tracked, b_getattr_l__self___features___2___conv_0_1_num_batches_tracked, b_getattr_l__self___features___2___conv_1_1_num_batches_tracked, b_getattr_l__self___features___2___conv_3_num_batches_tracked, b_getattr_l__self___features___3___conv_0_1_num_batches_tracked, b_getattr_l__self___features___3___conv_1_1_num_batches_tracked, b_getattr_l__self___features___3___conv_3_num_batches_tracked, b_getattr_l__self___features___4___conv_0_1_num_batches_tracked, b_getattr_l__self___features___4___conv_1_1_num_batches_tracked, b_getattr_l__self___features___4___conv_3_num_batches_tracked, b_getattr_l__self___features___5___conv_0_1_num_batches_tracked, b_getattr_l__self___features___5___conv_1_1_num_batches_tracked, b_getattr_l__self___features___5___conv_3_num_batches_tracked, b_getattr_l__self___features___6___conv_0_1_num_batches_tracked, b_getattr_l__self___features___6___conv_1_1_num_batches_tracked, b_getattr_l__self___features___6___conv_3_num_batches_tracked, b_getattr_l__self___features___7___conv_0_1_num_batches_tracked, b_getattr_l__self___features___7___conv_1_1_num_batches_tracked, b_getattr_l__self___features___7___conv_3_num_batches_tracked, b_getattr_l__self___features___8___conv_0_1_num_batches_tracked, b_getattr_l__self___features___8___conv_1_1_num_batches_tracked, b_getattr_l__self___features___8___conv_3_num_batches_tracked, b_getattr_l__self___features___9___conv_0_1_num_batches_tracked, b_getattr_l__self___features___9___conv_1_1_num_batches_tracked, b_getattr_l__self___features___9___conv_3_num_batches_tracked, b_getattr_l__self___features___10___conv_0_1_num_batches_tracked, b_getattr_l__self___features___10___conv_1_1_num_batches_tracked, b_getattr_l__self___features___10___conv_3_num_batches_tracked, b_getattr_l__self___features___11___conv_0_1_num_batches_tracked, b_getattr_l__self___features___11___conv_1_1_num_batches_tracked, b_getattr_l__self___features___11___conv_3_num_batches_tracked, b_getattr_l__self___features___12___conv_0_1_num_batches_tracked, b_getattr_l__self___features___12___conv_1_1_num_batches_tracked, b_getattr_l__self___features___12___conv_3_num_batches_tracked, b_getattr_l__self___features___13___conv_0_1_num_batches_tracked, b_getattr_l__self___features___13___conv_1_1_num_batches_tracked, b_getattr_l__self___features___13___conv_3_num_batches_tracked, b_getattr_l__self___features___14___conv_0_1_num_batches_tracked, b_getattr_l__self___features___14___conv_1_1_num_batches_tracked, b_getattr_l__self___features___14___conv_3_num_batches_tracked, b_getattr_l__self___features___15___conv_0_1_num_batches_tracked, b_getattr_l__self___features___15___conv_1_1_num_batches_tracked, b_getattr_l__self___features___15___conv_3_num_batches_tracked, b_getattr_l__self___features___16___conv_0_1_num_batches_tracked, b_getattr_l__self___features___16___conv_1_1_num_batches_tracked, b_getattr_l__self___features___16___conv_3_num_batches_tracked, b_getattr_l__self___features___17___conv_0_1_num_batches_tracked, b_getattr_l__self___features___17___conv_1_1_num_batches_tracked, b_getattr_l__self___features___17___conv_3_num_batches_tracked, b_features_18_1_num_batches_tracked, x):
6054```
6155
6256We print the graph after lowering above to show the new nodes that were inserted to call the XNNPACK Delegate. The subgraphs which are being delegated to XNNPACK are the first argument at each call site. It can be observed that the majority of ` convolution-relu-add ` blocks and ` linear ` blocks were able to be delegated to XNNPACK. We can also see the operators which were not able to be lowered to the XNNPACK delegate, such as ` clone ` and ` view_copy ` .
@@ -75,7 +69,7 @@ The XNNPACK delegate can also execute symmetrically quantized models. To underst
7569
7670``` python
7771from torch.export import export_for_training
78- from executorch.exir import EdgeCompileConfig
72+ from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
7973
8074mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights = MobileNet_V2_Weights.DEFAULT ).eval()
8175sample_inputs = (torch.randn(1 , 3 , 224 , 224 ), )
@@ -111,9 +105,11 @@ Quantization requires a two stage export. First we use the `export_for_training`
111105
112106``` python
113107# Continued from earlier...
114- edge = to_edge(export(quantized_mobilenetv2, sample_inputs), compile_config = EdgeCompileConfig(_check_ir_validity = False ))
115-
116- edge = edge.to_backend(XnnpackPartitioner())
108+ edge = to_edge_transform_and_lower(
109+ export(quantized_mobilenetv2, sample_inputs),
110+ compile_config = EdgeCompileConfig(_check_ir_validity = False ),
111+ partitioner = [XnnpackPartitioner()]
112+ )
117113
118114exec_prog = edge.to_executorch()
119115
0 commit comments