|
13 | 13 |
|
14 | 14 | import cv2
|
15 | 15 | import executorch
|
16 |
| -import nncf.torch |
17 | 16 | import numpy as np
|
18 | 17 | import torch
|
19 | 18 | from executorch.backends.openvino.partitioner import OpenvinoPartitioner
|
|
32 | 31 | to_edge_transform_and_lower,
|
33 | 32 | )
|
34 | 33 | from executorch.exir.backend.backend_details import CompileSpec
|
35 |
| -from nncf.experimental.torch.fx import quantize_pt2e |
36 | 34 | from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
|
37 | 35 | from torch.export.exported_program import ExportedProgram
|
38 | 36 | from torch.fx.passes.graph_drawer import FxGraphDrawer
|
@@ -82,45 +80,48 @@ def lower_to_openvino(
|
82 | 80 | subset_size: int,
|
83 | 81 | quantize: bool,
|
84 | 82 | ) -> ExecutorchProgramManager:
|
85 |
| - if quantize: |
86 |
| - target_input_dims = tuple(example_args[0].shape[2:]) |
| 83 | + import nncf.torch |
| 84 | + from nncf.experimental.torch.fx import quantize_pt2e |
87 | 85 |
|
88 |
| - def ext_transform_fn(sample): |
89 |
| - sample = transform_fn(sample) |
90 |
| - return pad_to_target(sample, target_input_dims) |
| 86 | + with nncf.torch.disable_patching(): |
| 87 | + if quantize: |
| 88 | + target_input_dims = tuple(example_args[0].shape[2:]) |
91 | 89 |
|
92 |
| - quantizer = OpenVINOQuantizer(mode=QuantizationMode.INT8_TRANSFORMER) |
93 |
| - quantizer.set_ignored_scope( |
94 |
| - types=["mul", "sub", "sigmoid", "__getitem__"], |
95 |
| - subgraphs=[nncf.Subgraph(inputs=["cat_18"], outputs=["output"])] |
96 |
| - ) |
97 |
| - quantized_model = quantize_pt2e( |
98 |
| - aten_dialect.module(), |
99 |
| - quantizer, |
100 |
| - nncf.Dataset(calibration_dataset, ext_transform_fn), |
101 |
| - subset_size=subset_size, |
102 |
| - smooth_quant=True, |
103 |
| - fold_quantize=False |
104 |
| - ) |
| 90 | + def ext_transform_fn(sample): |
| 91 | + sample = transform_fn(sample) |
| 92 | + return pad_to_target(sample, target_input_dims) |
105 | 93 |
|
106 |
| - visualize_fx_model(quantized_model, "tmp_quantized_model.svg") |
107 |
| - aten_dialect = torch.export.export(quantized_model, example_args) |
108 |
| - # Convert to edge dialect and lower the module to the backend with a custom partitioner |
109 |
| - compile_spec = [CompileSpec("device", device.encode())] |
110 |
| - lowered_module: EdgeProgramManager = to_edge_transform_and_lower( |
111 |
| - aten_dialect, |
112 |
| - partitioner=[ |
113 |
| - OpenvinoPartitioner(compile_spec), |
114 |
| - ], |
115 |
| - compile_config=EdgeCompileConfig( |
116 |
| - _skip_dim_order=True, |
117 |
| - ), |
118 |
| - ) |
| 94 | + quantizer = OpenVINOQuantizer(mode=QuantizationMode.INT8_TRANSFORMER) |
| 95 | + quantizer.set_ignored_scope( |
| 96 | + types=["mul", "sub", "sigmoid", "__getitem__"], |
| 97 | + ) |
| 98 | + quantized_model = quantize_pt2e( |
| 99 | + aten_dialect.module(), |
| 100 | + quantizer, |
| 101 | + nncf.Dataset(calibration_dataset, ext_transform_fn), |
| 102 | + subset_size=subset_size, |
| 103 | + smooth_quant=True, |
| 104 | + fold_quantize=False, |
| 105 | + ) |
119 | 106 |
|
120 |
| - # Apply backend-specific passes |
121 |
| - return lowered_module.to_executorch( |
122 |
| - config=executorch.exir.ExecutorchBackendConfig() |
123 |
| - ) |
| 107 | + visualize_fx_model(quantized_model, "tmp_quantized_model.svg") |
| 108 | + aten_dialect = torch.export.export(quantized_model, example_args) |
| 109 | + # Convert to edge dialect and lower the module to the backend with a custom partitioner |
| 110 | + compile_spec = [CompileSpec("device", device.encode())] |
| 111 | + lowered_module: EdgeProgramManager = to_edge_transform_and_lower( |
| 112 | + aten_dialect, |
| 113 | + partitioner=[ |
| 114 | + OpenvinoPartitioner(compile_spec), |
| 115 | + ], |
| 116 | + compile_config=EdgeCompileConfig( |
| 117 | + _skip_dim_order=True, |
| 118 | + ), |
| 119 | + ) |
| 120 | + |
| 121 | + # Apply backend-specific passes |
| 122 | + return lowered_module.to_executorch( |
| 123 | + config=executorch.exir.ExecutorchBackendConfig() |
| 124 | + ) |
124 | 125 |
|
125 | 126 |
|
126 | 127 | def lower_to_xnnpack(
|
@@ -217,6 +218,7 @@ def main(
|
217 | 218 | model = YOLO(model_name)
|
218 | 219 |
|
219 | 220 | if quantize:
|
| 221 | + raise NotImplementedError("Quantization is comming soon!") |
220 | 222 | if video_path is None:
|
221 | 223 | raise RuntimeError(
|
222 | 224 | "Could not quantize model without the video for the calibration."
|
@@ -273,7 +275,8 @@ def transform_fn(frame):
|
273 | 275 | "--model_name",
|
274 | 276 | type=str,
|
275 | 277 | default="yolo12s",
|
276 |
| - help="Ultralytics yolo model name.", |
| 278 | + choices=["yolo12n", "yolo12s", "yolo12m", "yolo12l", "yolo12x"], |
| 279 | + help="Ultralytics yolo12 model name.", |
277 | 280 | )
|
278 | 281 | parser.add_argument(
|
279 | 282 | "--input_dims",
|
@@ -312,14 +315,12 @@ def transform_fn(frame):
|
312 | 315 | args = parser.parse_args()
|
313 | 316 |
|
314 | 317 | # Run the main function with parsed arguments
|
315 |
| - # Disable nncf patching as export of the patched model is not supported. |
316 |
| - with nncf.torch.disable_patching(): |
317 |
| - main( |
318 |
| - model_name=args.model_name, |
319 |
| - input_dims=args.input_dims, |
320 |
| - quantize=args.quantize, |
321 |
| - video_path=args.video_path, |
322 |
| - subset_size=args.subset_size, |
323 |
| - backend=args.backend, |
324 |
| - device=args.device, |
325 |
| - ) |
| 318 | + main( |
| 319 | + model_name=args.model_name, |
| 320 | + input_dims=args.input_dims, |
| 321 | + quantize=args.quantize, |
| 322 | + video_path=args.video_path, |
| 323 | + subset_size=args.subset_size, |
| 324 | + backend=args.backend, |
| 325 | + device=args.device, |
| 326 | + ) |
0 commit comments