|
13 | 13 |
|
14 | 14 | import cv2 |
15 | 15 | import executorch |
16 | | -import nncf.torch |
17 | 16 | import numpy as np |
18 | 17 | import torch |
19 | 18 | from executorch.backends.openvino.partitioner import OpenvinoPartitioner |
|
32 | 31 | to_edge_transform_and_lower, |
33 | 32 | ) |
34 | 33 | from executorch.exir.backend.backend_details import CompileSpec |
35 | | -from nncf.experimental.torch.fx import quantize_pt2e |
36 | 34 | from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e |
37 | 35 | from torch.export.exported_program import ExportedProgram |
38 | 36 | from torch.fx.passes.graph_drawer import FxGraphDrawer |
@@ -82,45 +80,48 @@ def lower_to_openvino( |
82 | 80 | subset_size: int, |
83 | 81 | quantize: bool, |
84 | 82 | ) -> ExecutorchProgramManager: |
85 | | - if quantize: |
86 | | - target_input_dims = tuple(example_args[0].shape[2:]) |
| 83 | + import nncf.torch |
| 84 | + from nncf.experimental.torch.fx import quantize_pt2e |
87 | 85 |
|
88 | | - def ext_transform_fn(sample): |
89 | | - sample = transform_fn(sample) |
90 | | - return pad_to_target(sample, target_input_dims) |
| 86 | + with nncf.torch.disable_patching(): |
| 87 | + if quantize: |
| 88 | + target_input_dims = tuple(example_args[0].shape[2:]) |
91 | 89 |
|
92 | | - quantizer = OpenVINOQuantizer(mode=QuantizationMode.INT8_TRANSFORMER) |
93 | | - quantizer.set_ignored_scope( |
94 | | - types=["mul", "sub", "sigmoid", "__getitem__"], |
95 | | - subgraphs=[nncf.Subgraph(inputs=["cat_18"], outputs=["output"])] |
96 | | - ) |
97 | | - quantized_model = quantize_pt2e( |
98 | | - aten_dialect.module(), |
99 | | - quantizer, |
100 | | - nncf.Dataset(calibration_dataset, ext_transform_fn), |
101 | | - subset_size=subset_size, |
102 | | - smooth_quant=True, |
103 | | - fold_quantize=False |
104 | | - ) |
| 90 | + def ext_transform_fn(sample): |
| 91 | + sample = transform_fn(sample) |
| 92 | + return pad_to_target(sample, target_input_dims) |
105 | 93 |
|
106 | | - visualize_fx_model(quantized_model, "tmp_quantized_model.svg") |
107 | | - aten_dialect = torch.export.export(quantized_model, example_args) |
108 | | - # Convert to edge dialect and lower the module to the backend with a custom partitioner |
109 | | - compile_spec = [CompileSpec("device", device.encode())] |
110 | | - lowered_module: EdgeProgramManager = to_edge_transform_and_lower( |
111 | | - aten_dialect, |
112 | | - partitioner=[ |
113 | | - OpenvinoPartitioner(compile_spec), |
114 | | - ], |
115 | | - compile_config=EdgeCompileConfig( |
116 | | - _skip_dim_order=True, |
117 | | - ), |
118 | | - ) |
| 94 | + quantizer = OpenVINOQuantizer(mode=QuantizationMode.INT8_TRANSFORMER) |
| 95 | + quantizer.set_ignored_scope( |
| 96 | + types=["mul", "sub", "sigmoid", "__getitem__"], |
| 97 | + ) |
| 98 | + quantized_model = quantize_pt2e( |
| 99 | + aten_dialect.module(), |
| 100 | + quantizer, |
| 101 | + nncf.Dataset(calibration_dataset, ext_transform_fn), |
| 102 | + subset_size=subset_size, |
| 103 | + smooth_quant=True, |
| 104 | + fold_quantize=False, |
| 105 | + ) |
119 | 106 |
|
120 | | - # Apply backend-specific passes |
121 | | - return lowered_module.to_executorch( |
122 | | - config=executorch.exir.ExecutorchBackendConfig() |
123 | | - ) |
| 107 | + visualize_fx_model(quantized_model, "tmp_quantized_model.svg") |
| 108 | + aten_dialect = torch.export.export(quantized_model, example_args) |
| 109 | + # Convert to edge dialect and lower the module to the backend with a custom partitioner |
| 110 | + compile_spec = [CompileSpec("device", device.encode())] |
| 111 | + lowered_module: EdgeProgramManager = to_edge_transform_and_lower( |
| 112 | + aten_dialect, |
| 113 | + partitioner=[ |
| 114 | + OpenvinoPartitioner(compile_spec), |
| 115 | + ], |
| 116 | + compile_config=EdgeCompileConfig( |
| 117 | + _skip_dim_order=True, |
| 118 | + ), |
| 119 | + ) |
| 120 | + |
| 121 | + # Apply backend-specific passes |
| 122 | + return lowered_module.to_executorch( |
| 123 | + config=executorch.exir.ExecutorchBackendConfig() |
| 124 | + ) |
124 | 125 |
|
125 | 126 |
|
126 | 127 | def lower_to_xnnpack( |
@@ -217,6 +218,7 @@ def main( |
217 | 218 | model = YOLO(model_name) |
218 | 219 |
|
219 | 220 | if quantize: |
| 221 | + raise NotImplementedError("Quantization is comming soon!") |
220 | 222 | if video_path is None: |
221 | 223 | raise RuntimeError( |
222 | 224 | "Could not quantize model without the video for the calibration." |
@@ -273,7 +275,8 @@ def transform_fn(frame): |
273 | 275 | "--model_name", |
274 | 276 | type=str, |
275 | 277 | default="yolo12s", |
276 | | - help="Ultralytics yolo model name.", |
| 278 | + choices=["yolo12n", "yolo12s", "yolo12m", "yolo12l", "yolo12x"], |
| 279 | + help="Ultralytics yolo12 model name.", |
277 | 280 | ) |
278 | 281 | parser.add_argument( |
279 | 282 | "--input_dims", |
@@ -312,14 +315,12 @@ def transform_fn(frame): |
312 | 315 | args = parser.parse_args() |
313 | 316 |
|
314 | 317 | # Run the main function with parsed arguments |
315 | | - # Disable nncf patching as export of the patched model is not supported. |
316 | | - with nncf.torch.disable_patching(): |
317 | | - main( |
318 | | - model_name=args.model_name, |
319 | | - input_dims=args.input_dims, |
320 | | - quantize=args.quantize, |
321 | | - video_path=args.video_path, |
322 | | - subset_size=args.subset_size, |
323 | | - backend=args.backend, |
324 | | - device=args.device, |
325 | | - ) |
| 318 | + main( |
| 319 | + model_name=args.model_name, |
| 320 | + input_dims=args.input_dims, |
| 321 | + quantize=args.quantize, |
| 322 | + video_path=args.video_path, |
| 323 | + subset_size=args.subset_size, |
| 324 | + backend=args.backend, |
| 325 | + device=args.device, |
| 326 | + ) |
0 commit comments