|
1 | 1 | (Beta) Convert MobileNetV2 to NNAPI
|
2 | 2 | ========================================
|
3 | 3 |
|
4 |
| -Introduction |
5 |
| ------------- |
| 4 | +PyTorch Mobile is no longer actively supported. Please check out Executorch. |
6 | 5 |
|
7 |
| -This tutorial shows how to prepare a computer vision model to use |
8 |
| -`Android's Neural Networks API (NNAPI) <https://developer.android.com/ndk/guides/neuralnetworks>`_. |
9 |
| -NNAPI provides access to powerful and efficient computational cores |
10 |
| -on many modern Android devices. |
| 6 | +Redirecting in 3 seconds... |
11 | 7 |
|
12 |
| -PyTorch's NNAPI is currently in the "prototype" phase and only supports |
13 |
| -a limited range of operators, but we expect to solidify the integration |
14 |
| -and expand our operator support over time. |
| 8 | +.. raw:: html |
15 | 9 |
|
16 |
| - |
17 |
| -Environment |
18 |
| ------------ |
19 |
| - |
20 |
| -Install PyTorch and torchvision. |
21 |
| - |
22 |
| -``pip install torch==1.10.0 torchvision==0.11.1`` |
23 |
| - |
24 |
| - |
25 |
| -Model Preparation |
26 |
| ------------------ |
27 |
| - |
28 |
| -First, we must prepare our model to execute with NNAPI. |
29 |
| -This step runs on your training server or laptop. |
30 |
| -The key conversion function to call is |
31 |
| -``torch.backends._nnapi.prepare.convert_model_to_nnapi``, |
32 |
| -but some extra steps are required to ensure that |
33 |
| -the model is properly structured. |
34 |
| -Most notably, quantizing the model is required |
35 |
| -in order to run the model on certain accelerators. |
36 |
| - |
37 |
| -You can copy/paste this entire Python script and run it, |
38 |
| -or make your own modifications. |
39 |
| -By default, it will save the models to ``~/mobilenetv2-nnapi/``. |
40 |
| -Please create that directory first. |
41 |
| - |
42 |
| -.. code:: python |
43 |
| -
|
44 |
| - #!/usr/bin/env python |
45 |
| - import sys |
46 |
| - import os |
47 |
| - import torch |
48 |
| - import torch.utils.bundled_inputs |
49 |
| - import torch.utils.mobile_optimizer |
50 |
| - import torch.backends._nnapi.prepare |
51 |
| - import torchvision.models.quantization.mobilenet |
52 |
| - from pathlib import Path |
53 |
| -
|
54 |
| -
|
55 |
| - # This script supports 3 modes of quantization: |
56 |
| - # - "none": Fully floating-point model. |
57 |
| - # - "core": Quantize the core of the model, but wrap it a |
58 |
| - # quantizer/dequantizer pair, so the interface uses floating point. |
59 |
| - # - "full": Quantize the model, and use quantized tensors |
60 |
| - # for input and output. |
61 |
| - # |
62 |
| - # "none" maintains maximum accuracy |
63 |
| - # "core" sacrifices some accuracy for performance, |
64 |
| - # but maintains the same interface. |
65 |
| - # "full" maximized performance (with the same accuracy as "core"), |
66 |
| - # but requires the application to use quantized tensors. |
67 |
| - # |
68 |
| - # There is a fourth option, not supported by this script, |
69 |
| - # where we include the quant/dequant steps as NNAPI operators. |
70 |
| - def make_mobilenetv2_nnapi(output_dir_path, quantize_mode): |
71 |
| - quantize_core, quantize_iface = { |
72 |
| - "none": (False, False), |
73 |
| - "core": (True, False), |
74 |
| - "full": (True, True), |
75 |
| - }[quantize_mode] |
76 |
| -
|
77 |
| - model = torchvision.models.quantization.mobilenet.mobilenet_v2(pretrained=True, quantize=quantize_core) |
78 |
| - model.eval() |
79 |
| -
|
80 |
| - # Fuse BatchNorm operators in the floating point model. |
81 |
| - # (Quantized models already have this done.) |
82 |
| - # Remove dropout for this inference-only use case. |
83 |
| - if not quantize_core: |
84 |
| - model.fuse_model() |
85 |
| - assert type(model.classifier[0]) == torch.nn.Dropout |
86 |
| - model.classifier[0] = torch.nn.Identity() |
87 |
| -
|
88 |
| - input_float = torch.zeros(1, 3, 224, 224) |
89 |
| - input_tensor = input_float |
90 |
| -
|
91 |
| - # If we're doing a quantized model, we need to trace only the quantized core. |
92 |
| - # So capture the quantizer and dequantizer, use them to prepare the input, |
93 |
| - # and replace them with identity modules so we can trace without them. |
94 |
| - if quantize_core: |
95 |
| - quantizer = model.quant |
96 |
| - dequantizer = model.dequant |
97 |
| - model.quant = torch.nn.Identity() |
98 |
| - model.dequant = torch.nn.Identity() |
99 |
| - input_tensor = quantizer(input_float) |
100 |
| -
|
101 |
| - # Many NNAPI backends prefer NHWC tensors, so convert our input to channels_last, |
102 |
| - # and set the "nnapi_nhwc" attribute for the converter. |
103 |
| - input_tensor = input_tensor.contiguous(memory_format=torch.channels_last) |
104 |
| - input_tensor.nnapi_nhwc = True |
105 |
| -
|
106 |
| - # Trace the model. NNAPI conversion only works with TorchScript models, |
107 |
| - # and traced models are more likely to convert successfully than scripted. |
108 |
| - with torch.no_grad(): |
109 |
| - traced = torch.jit.trace(model, input_tensor) |
110 |
| - nnapi_model = torch.backends._nnapi.prepare.convert_model_to_nnapi(traced, input_tensor) |
111 |
| -
|
112 |
| - # If we're not using a quantized interface, wrap a quant/dequant around the core. |
113 |
| - if quantize_core and not quantize_iface: |
114 |
| - nnapi_model = torch.nn.Sequential(quantizer, nnapi_model, dequantizer) |
115 |
| - model.quant = quantizer |
116 |
| - model.dequant = dequantizer |
117 |
| - # Switch back to float input for benchmarking. |
118 |
| - input_tensor = input_float.contiguous(memory_format=torch.channels_last) |
119 |
| -
|
120 |
| - # Optimize the CPU model to make CPU-vs-NNAPI benchmarks fair. |
121 |
| - model = torch.utils.mobile_optimizer.optimize_for_mobile(torch.jit.script(model)) |
122 |
| -
|
123 |
| - # Bundle sample inputs with the models for easier benchmarking. |
124 |
| - # This step is optional. |
125 |
| - class BundleWrapper(torch.nn.Module): |
126 |
| - def __init__(self, mod): |
127 |
| - super().__init__() |
128 |
| - self.mod = mod |
129 |
| - def forward(self, arg): |
130 |
| - return self.mod(arg) |
131 |
| - nnapi_model = torch.jit.script(BundleWrapper(nnapi_model)) |
132 |
| - torch.utils.bundled_inputs.augment_model_with_bundled_inputs( |
133 |
| - model, [(torch.utils.bundled_inputs.bundle_large_tensor(input_tensor),)]) |
134 |
| - torch.utils.bundled_inputs.augment_model_with_bundled_inputs( |
135 |
| - nnapi_model, [(torch.utils.bundled_inputs.bundle_large_tensor(input_tensor),)]) |
136 |
| -
|
137 |
| - # Save both models. |
138 |
| - model._save_for_lite_interpreter(str(output_dir_path / ("mobilenetv2-quant_{}-cpu.pt".format(quantize_mode)))) |
139 |
| - nnapi_model._save_for_lite_interpreter(str(output_dir_path / ("mobilenetv2-quant_{}-nnapi.pt".format(quantize_mode)))) |
140 |
| -
|
141 |
| -
|
142 |
| - if __name__ == "__main__": |
143 |
| - for quantize_mode in ["none", "core", "full"]: |
144 |
| - make_mobilenetv2_nnapi(Path(os.environ["HOME"]) / "mobilenetv2-nnapi", quantize_mode) |
145 |
| -
|
146 |
| -
|
147 |
| -Running Benchmarks |
148 |
| ------------------- |
149 |
| - |
150 |
| -Now that the models are ready, we can benchmark them on our Android devices. |
151 |
| -See `our performance recipe <https://pytorch.org/tutorials/recipes/mobile_perf.html#android-benchmarking-setup>`_ for details. |
152 |
| -The best-performing models are likely to be the "fully-quantized" models: |
153 |
| -``mobilenetv2-quant_full-cpu.pt`` and ``mobilenetv2-quant_full-nnapi.pt``. |
154 |
| - |
155 |
| -Because these models have bundled inputs, we can run the benchmark as follows: |
156 |
| - |
157 |
| -.. code:: shell |
158 |
| -
|
159 |
| - ./speed_benchmark_torch --pthreadpool_size=1 --model=mobilenetv2-quant_full-nnapi.pt --use_bundled_input=0 --warmup=5 --iter=200 |
160 |
| -
|
161 |
| -Adjusting increasing the thread pool size can can reduce latency, |
162 |
| -at the cost of increased CPU usage. |
163 |
| -Omitting that argument will use one thread per big core. |
164 |
| -The CPU models can get improved performance (at the cost of memory usage) |
165 |
| -by passing ``--use_caching_allocator=true``. |
166 |
| - |
167 |
| - |
168 |
| -Running model on host |
169 |
| ---------------------- |
170 |
| - |
171 |
| -We can now run models on your linux machine using the reference implementation |
172 |
| -of NNAPI. You need to build the NNAPI library from Android source code: |
173 |
| - |
174 |
| -* Make sure you have at least 200GB of disk space |
175 |
| -* Follow `these instructions <https://source.android.com/setup/develop#installing-repo>`_ to install ``repo`` |
176 |
| - |
177 |
| -.. code:: shell |
178 |
| -
|
179 |
| - mkdir ~/android-nnapi && cd ~/android-nnapi |
180 |
| - repo init -u https://android.googlesource.com/platform/manifest -b master |
181 |
| - repo sync --network-only -j 16 |
182 |
| - repo sync -l |
183 |
| - . build/envsetup.sh |
184 |
| - lunch aosp_x86_64-eng |
185 |
| - mm -j16 out/host/linux-x86/lib64/libneuralnetworks.so |
186 |
| -
|
187 |
| -
|
188 |
| -With the host build of ``libneuralnetworks.so`` you can run Pytorch NNAPI models on |
189 |
| -your linux machine: |
190 |
| - |
191 |
| -.. code:: python |
192 |
| -
|
193 |
| - #!/usr/bin/env python |
194 |
| - import ctypes |
195 |
| - import torch |
196 |
| - from pathlib import Path |
197 |
| -
|
198 |
| - ctypes.cdll.LoadLibrary(Path.home() / "android-nnapi/out/host/linux-x86/lib64/libneuralnetworks.so") |
199 |
| - model = torch.jit.load(Path.home() / "mobilenetv2-nnapi/mobilenetv2-quant_full-nnapi.pt") |
200 |
| - print(model(*model.get_all_bundled_inputs()[0])) |
201 |
| -
|
202 |
| -
|
203 |
| -Integration |
204 |
| ------------ |
205 |
| - |
206 |
| -The converted models are ordinary TorchScript models. |
207 |
| -You can use them in your app just like any other PyTorch model. |
208 |
| -See `https://pytorch.org/mobile/android/ <https://pytorch.org/mobile/android/>`_ |
209 |
| -for an introduction to using PyTorch on Android. |
210 |
| - |
211 |
| - |
212 |
| -Learn More |
213 |
| ----------- |
214 |
| - |
215 |
| -- Learn more about optimization in our |
216 |
| - `Mobile Performance Recipe <https://pytorch.org/tutorials/recipes/mobile_perf.html>`_ |
217 |
| -- `MobileNetV2 <https://pytorch.org/hub/pytorch_vision_mobilenet_v2/>`_ from torchvision |
218 |
| -- Information about `NNAPI <https://developer.android.com/ndk/guides/neuralnetworks>`_ |
| 10 | + <meta http-equiv="Refresh" content="3; url='https://pytorch.org/executorch/stable/index.html'" /> |
0 commit comments