Skip to content

Commit 46770fa

Browse files
authored
添加样例文件 (#230)
1 parent a7d5efe commit 46770fa

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ PPQ 被设计为一个灵活而全面的神经网络离线量化工具,我们
2121
| | | |
2222
| 08 | 创建我们自己的量化规则!了解目标平台与量化器 | [platform](https://github.com/openppl-public/ppq/blob/master/ppq/samples/Tutorial/targetPlatform.py) |
2323
| 09 | 自定义量化优化过程 | [Optim](https://github.com/openppl-public/ppq/blob/master/ppq/samples/Tutorial/optimization.py) |
24+
| 10 | 自定义图融合过程与量化管线 | [Fusion](https://github.com/openppl-public/ppq/blob/master/ppq/samples/Tutorial/fusion.py) |
2425

2526
### PPQ Optim 优化过程文档
2627
| | **Description 介绍** | **Link 链接** |

ppq/samples/Tutorial/fusion.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from typing import Callable, Iterable
2+
3+
import torch
4+
import torchvision
5+
6+
from ppq import (BaseGraph, QuantizationOptimizationPass,
7+
QuantizationOptimizationPipeline, QuantizationSetting,
8+
TargetPlatform, TorchExecutor)
9+
from ppq.api import ENABLE_CUDA_KERNEL
10+
from ppq.executor.torch import TorchExecutor
11+
from ppq.IR.quantize import QuantableOperation
12+
from ppq.IR.search import SearchableGraph
13+
from ppq.quantization.optim import (ParameterQuantizePass,
14+
PassiveParameterQuantizePass,
15+
QuantAlignmentPass, QuantizeRefinePass,
16+
QuantizeSimplifyPass,
17+
RuntimeCalibrationPass)
18+
from ppq.quantization.quantizer import TensorRTQuantizer
19+
20+
# ------------------------------------------------------------
21+
# 在这个例子中,我们将向你介绍如何自定义量化优化过程,以及如何手动调用优化过程
22+
# ------------------------------------------------------------
23+
24+
BATCHSIZE = 32
25+
INPUT_SHAPE = [BATCHSIZE, 3, 224, 224]
26+
DEVICE = 'cuda'
27+
PLATFORM = TargetPlatform.TRT_INT8
28+
29+
# ------------------------------------------------------------
30+
# 和往常一样,我们要创建 calibration 数据,以及加载模型
31+
# ------------------------------------------------------------
32+
def load_calibration_dataset() -> Iterable:
33+
return [torch.rand(size=INPUT_SHAPE) for _ in range(32)]
34+
CALIBRATION = load_calibration_dataset()
35+
36+
def collate_fn(batch: torch.Tensor) -> torch.Tensor:
37+
return batch.to(DEVICE)
38+
39+
model = torchvision.models.mobilenet.mobilenet_v2(pretrained=True)
40+
model = model.to(DEVICE)
41+
42+
# ------------------------------------------------------------
43+
# 下面,我们将向你展示如何自定义图融合过程
44+
# 图融合过程将改变量化方案,PPQ 使用 Tensor Quantization Config
45+
# 来描述图融合的具体规则,其底层由并查集进行实现
46+
# ------------------------------------------------------------
47+
48+
# ------------------------------------------------------------
49+
# 定义我们自己的图融合过程,在这里我们将尝试进行 Conv - Clip 的融合
50+
# 但与平常不同的是,我们将关闭 Clip 之后的量化点,保留 Conv - Clip 中间的量化
51+
# 对于更为复杂的模式匹配,你可以参考 ppq.quantization.optim.refine.SwishFusionPass
52+
# ------------------------------------------------------------
53+
class MyFusion(QuantizationOptimizationPass):
54+
def optimize(self, graph: BaseGraph, dataloader: Iterable,
55+
collate_fn: Callable, executor: TorchExecutor, **kwargs) -> None:
56+
57+
# 图融合过程往往由图模式匹配开始,让我们建立一个模式匹配引擎
58+
search_engine = SearchableGraph(graph=graph)
59+
for pattern in search_engine.pattern_matching(patterns=['Conv', 'Clip'], edges=[[0, 1]], exclusive=True):
60+
conv, relu = pattern
61+
62+
# 匹配到图中的 conv - relu 对,接下来关闭不必要的量化点
63+
# 首先我们检查 conv - relu 是否都是量化算子,是否处于同一平台
64+
is_quantable = isinstance(conv, QuantableOperation) and isinstance(relu, QuantableOperation)
65+
is_same_plat = conv.platform == relu.platform
66+
67+
if is_quantable and is_same_plat:
68+
# 将 relu 输入输出的量化全部指向 conv 输出
69+
# 一旦调用 dominated_by 完成赋值,则调用 dominated_by 的同时
70+
# PPQ 会将 relu.input_quant_config[0] 与 relu.output_quant_config[0] 的状态置为 OVERLAPPED
71+
# 在后续运算中,它们所对应的量化不再起作用
72+
relu.input_quant_config[0].dominated_by = conv.output_quant_config[0]
73+
relu.output_quant_config[0].dominated_by = conv.output_quant_config[0]
74+
75+
# ------------------------------------------------------------
76+
# 自定义图融合的过程将会干预量化器逻辑,我们需要新建量化器
77+
# 此处我们继承 TensorRT Quantizer,算子的量化逻辑将使用 TensorRT 的配置
78+
# 但在生成量化管线时,我们将覆盖量化器原有的逻辑,使用我们自定义的管线
79+
# 这样我们就可以把自定义的图融合过程放置在合适的位置上,而此时 QuantizationSetting 也不再起作用
80+
# ------------------------------------------------------------
81+
class MyQuantizer(TensorRTQuantizer):
82+
def build_quant_pipeline(self, setting: QuantizationSetting) -> QuantizationOptimizationPipeline:
83+
return QuantizationOptimizationPipeline([
84+
QuantizeRefinePass(),
85+
QuantizeSimplifyPass(),
86+
ParameterQuantizePass(),
87+
MyFusion(name='My Optimization Procedure'),
88+
RuntimeCalibrationPass(),
89+
QuantAlignmentPass(),
90+
PassiveParameterQuantizePass()])
91+
92+
from ppq.api import quantize_torch_model, register_network_quantizer
93+
register_network_quantizer(quantizer=MyQuantizer, platform=TargetPlatform.EXTENSION)
94+
95+
# ------------------------------------------------------------
96+
# 如果你使用 ENABLE_CUDA_KERNEL 方法
97+
# PPQ 将会尝试编译自定义的高性能量化算子,这一过程需要编译环境的支持
98+
# 如果你在编译过程中发生错误,你可以删除此处对于 ENABLE_CUDA_KERNEL 方法的调用
99+
# 这将显著降低 PPQ 的运算速度;但即使你无法编译这些算子,你仍然可以使用 pytorch 的 gpu 算子完成量化
100+
# ------------------------------------------------------------
101+
with ENABLE_CUDA_KERNEL():
102+
quantized = quantize_torch_model(
103+
model=model, calib_dataloader=CALIBRATION,
104+
calib_steps=32, input_shape=INPUT_SHAPE,
105+
collate_fn=collate_fn, platform=TargetPlatform.EXTENSION,
106+
onnx_export_file='model.onnx', device=DEVICE, verbose=0)

0 commit comments

Comments
 (0)