Update XNNPACK docs to use to_edge_transform_and_lower API

GregoryComer · GregoryComer · commit dd865a63a9a7 · 2024-09-13T03:02:32.000-07:00
diff --git a/docs/source/tutorial-xnnpack-delegate-lowering.md b/docs/source/tutorial-xnnpack-delegate-lowering.md
@@ -25,38 +25,24 @@ import torchvision.models as models
 from torch.export import export, ExportedProgram
 from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
-from executorch.exir import EdgeProgramManager, ExecutorchProgramManager, to_edge
-from executorch.exir.backend.backend_api import to_backend
+from executorch.exir import EdgeProgramManager, ExecutorchProgramManager, to_edge_transform_and_lower
 
 
 mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval()
 sample_inputs = (torch.randn(1, 3, 224, 224), )
 
 exported_program: ExportedProgram = export(mobilenet_v2, sample_inputs)
-edge: EdgeProgramManager = to_edge(exported_program)
-
-edge = edge.to_backend(XnnpackPartitioner())
+edge: EdgeProgramManager = to_edge_transform_and_lower(
+    exported_program,
+    partitioner=[XnnpackPartitioner()],
+)
 ```
 
 We will go through this example with the [MobileNetV2](https://pytorch.org/hub/pytorch_vision_mobilenet_v2/) pretrained model downloaded from the TorchVision library. The flow of lowering a model starts after exporting the model `to_edge`. We call the `to_backend` api with the `XnnpackPartitioner`. The partitioner identifies the subgraphs suitable for XNNPACK backend delegate to consume. Afterwards, the identified subgraphs will be serialized with the XNNPACK Delegate flatbuffer schema and each subgraph will be replaced with a call to the XNNPACK Delegate.
 
 ```python
 >>> print(edge.exported_program().graph_module)
-GraphModule(
-  (lowered_module_0): LoweredBackendModule()
-  (lowered_module_1): LoweredBackendModule()
-)
 
-def forward(self, arg314_1):
-    lowered_module_0 = self.lowered_module_0
-    executorch_call_delegate = torch.ops.higher_order.executorch_call_delegate(lowered_module_0, arg314_1);  lowered_module_0 = arg314_1 = None
-    getitem = executorch_call_delegate[0];  executorch_call_delegate = None
-    aten_view_copy_default = executorch_exir_dialects_edge__ops_aten_view_copy_default(getitem, [1, 1280]);  getitem = None
-    aten_clone_default = executorch_exir_dialects_edge__ops_aten_clone_default(aten_view_copy_default);  aten_view_copy_default = None
-    lowered_module_1 = self.lowered_module_1
-    executorch_call_delegate_1 = torch.ops.higher_order.executorch_call_delegate(lowered_module_1, aten_clone_default);  lowered_module_1 = aten_clone_default = None
-    getitem_1 = executorch_call_delegate_1[0];  executorch_call_delegate_1 = None
-    return (getitem_1,)
 ```
 
 We print the graph after lowering above to show the new nodes that were inserted to call the XNNPACK Delegate. The subgraphs which are being delegated to XNNPACK are the first argument at each call site. It can be observed that the majority of `convolution-relu-add` blocks and `linear` blocks were able to be delegated to XNNPACK. We can also see the operators which were not able to be lowered to the XNNPACK delegate, such as `clone` and `view_copy`.
@@ -149,6 +135,23 @@ mkdir cmake-out
 cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
+dule(
+  (lowered_module_0): LoweredBackendModule()
+  (lowered_module_1): LoweredBackendModule()
+)
+
+
+
+def forward(self, b_features_0_1_num_batches_tracked, b_getattr_l__self___features___1___conv_0_1_num_batches_tracked, b_getattr_l__self___features___1___conv_2_num_batches_tracked, b_getattr_l__self___features___2___conv_0_1_num_batches_tracked, b_getattr_l__self___features___2___conv_1_1_num_batches_tracked, b_getattr_l__self___features___2___conv_3_num_batches_tracked, b_getattr_l__self___features___3___conv_0_1_num_batches_tracked, b_getattr_l__self___features___3___conv_1_1_num_batches_tracked, b_getattr_l__self___features___3___conv_3_num_batches_tracked, b_getattr_l__self___features___4___conv_0_1_num_batches_tracked, b_getattr_l__self___features___4___conv_1_1_num_batches_tracked, b_getattr_l__self___features___4___conv_3_num_batches_tracked, b_getattr_l__self___features___5___conv_0_1_num_batches_tracked, b_getattr_l__self___features___5___conv_1_1_num_batches_tracked, b_getattr_l__self___features___5___conv_3_num_batches_tracked, b_getattr_l__self___features___6___conv_0_1_num_batches_tracked, b_getattr_l__self___features___6___conv_1_1_num_batches_tracked, b_getattr_l__self___features___6___conv_3_num_batches_tracked, b_getattr_l__self___features___7___conv_0_1_num_batches_tracked, b_getattr_l__self___features___7___conv_1_1_num_batches_tracked, b_getattr_l__self___features___7___conv_3_num_batches_tracked, b_getattr_l__self___features___8___conv_0_1_num_batches_tracked, b_getattr_l__self___features___8___conv_1_1_num_batches_tracked, b_getattr_l__self___features___8___conv_3_num_batches_tracked, b_getattr_l__self___features___9___conv_0_1_num_batches_tracked, b_getattr_l__self___features___9___conv_1_1_num_batches_tracked, b_getattr_l__self___features___9___conv_3_num_batches_tracked, b_getattr_l__self___features___10___conv_0_1_num_batches_tracked, b_getattr_l__self___features___10___conv_1_1_num_batches_tracked, b_getattr_l__self___features___10___conv_3_num_batches_tracked, b_getattr_l__self___features___11___conv_0_1_num_batches_tracked, b_getattr_l__self___features___11___conv_1_1_num_batches_tracked, b_getattr_l__self___features___11___conv_3_num_batches_tracked, b_getattr_l__self___features___12___conv_0_1_num_batches_tracked, b_getattr_l__self___features___12___conv_1_1_num_batches_tracked, b_getattr_l__self___features___12___conv_3_num_batches_tracked, b_getattr_l__self___features___13___conv_0_1_num_batches_tracked, b_getattr_l__self___features___13___conv_1_1_num_batches_tracked, b_getattr_l__self___features___13___conv_3_num_batches_tracked, b_getattr_l__self___features___14___conv_0_1_num_batches_tracked, b_getattr_l__self___features___14___conv_1_1_num_batches_tracked, b_getattr_l__self___features___14___conv_3_num_batches_tracked, b_getattr_l__self___features___15___conv_0_1_num_batches_tracked, b_getattr_l__self___features___15___conv_1_1_num_batches_tracked, b_getattr_l__self___features___15___conv_3_num_batches_tracked, b_getattr_l__self___features___16___conv_0_1_num_batches_tracked, b_getattr_l__self___features___16___conv_1_1_num_batches_tracked, b_getattr_l__self___features___16___conv_3_num_batches_tracked, b_getattr_l__self___features___17___conv_0_1_num_batches_tracked, b_getattr_l__self___features___17___conv_1_1_num_batches_tracked, b_getattr_l__self___features___17___conv_3_num_batches_tracked, b_features_18_1_num_batches_tracked, x):
+    lowered_module_0 = self.lowered_module_0
+    lowered_module_1 = self.lowered_module_1
+    executorch_call_delegate_1 = torch.ops.higher_order.executorch_call_delegate(lowered_module_1, x);  lowered_module_1 = x = None
+    getitem_53 = executorch_call_delegate_1[0];  executorch_call_delegate_1 = None
+    aten_view_copy_default = executorch_exir_dialects_edge__ops_aten_view_copy_default(getitem_53, [1, 1280]);  getitem_53 = None
+    aten_clone_default = executorch_exir_dialects_edge__ops_aten_clone_default(aten_view_copy_default);  aten_view_copy_default = None
+    executorch_call_delegate = torch.ops.higher_order.executorch_call_delegate(lowered_module_0, aten_clone_default);  lowered_module_0 = aten_clone_default = None
+    getitem_52 = executorch_call_delegate[0];  executorch_call_delegate = None
+    return (getitem_52,)
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -157,7 +160,7 @@ cmake \
     -DPYTHON_EXECUTABLE=python \
     -Bcmake-out .
 ```
-Then you can build the runtime componenets with
+Then you can build the runtime components with
 
 ```bash
 cmake --build cmake-out -j9 --target install --config Release
@@ -171,4 +174,19 @@ Now you should be able to find the executable built at `./cmake-out/backends/xnn
 ```
 
 ## Building and Linking with the XNNPACK Backend
-You can build the XNNPACK backend [CMake target](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt#L83), and link it with your application binary such as an Android or iOS application. For more information on this you may take a look at this [resource](demo-apps-android.md) next.
+aou can build the XNNPACK backend [CMake target](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt#L83), and link it with your application binary such as an Android or iOS application. For more information on this you may take a look at this [resource](demo-apps-android.md) next.
+RraphModule(
+  (lowered_module_0): LoweredBackendModule()
+  (lowered_module_1): LoweredBackendModule()
+)
+
+def forward(self, arg314_1):
+    lowered_module_0 = self.lowered_module_0
+    executorch_call_delegate = torch.ops.higher_order.executorch_call_delegate(lowered_module_0, arg314_1);  lowered_module_0 = arg314_1 = None
+    getitem = executorch_call_delegate[0];  executorch_call_delegate = None
+    aten_view_copy_default = executorch_exir_dialects_edge__ops_aten_view_copy_default(getitem, [1, 1280]);  getitem = None
+    aten_clone_default = executorch_exir_dialects_edge__ops_aten_clone_default(aten_view_copy_default);  aten_view_copy_default = None
+    lowered_module_1 = self.lowered_module_1
+    executorch_call_delegate_1 = torch.ops.higher_order.executorch_call_delegate(lowered_module_1, aten_clone_default);  lowered_module_1 = aten_clone_default = None
+    getitem_1 = executorch_call_delegate_1[0];  executorch_call_delegate_1 = None
+    return (getitem_1,)