Skip to content

Commit f7f5044

Browse files
authored
Merge pull request #16489 from wzzju/fix_slim_quant_bugs
Clean codes and fix some bugs.
2 parents 69cb979 + 46e1bb0 commit f7f5044

File tree

5 files changed

+93
-108
lines changed

5 files changed

+93
-108
lines changed

python/paddle/fluid/contrib/slim/quantization/quantization_pass.py

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@
2626
]
2727

2828

29+
def _init_var_node(var_node, value, scope, place):
30+
assert isinstance(value,
31+
np.ndarray), 'The type of value should be numpy array.'
32+
assert scope is not None, \
33+
'The scope cannot be set None.'
34+
assert place is not None, \
35+
'The place cannot be set None.'
36+
tensor = scope.var(var_node.name()).get_tensor()
37+
tensor.set(value, place)
38+
39+
2940
class QuantizationTransformPass(object):
3041
def __init__(self,
3142
scope=None,
@@ -88,14 +99,14 @@ def __init__(self,
8899
assert activation_quantize_type != 'channel_wise_abs_max', "The activation quantization type does not support 'channel_wise_abs_max'."
89100
if activation_quantize_type not in quant_type:
90101
raise ValueError(
91-
"Unknown activation_quantize_type : '%s'. It can only be ",
92-
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.",
93-
str(activation_quantize_type))
102+
"Unknown activation_quantize_type : '%s'. It can only be "
103+
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'." %
104+
(str(activation_quantize_type)))
94105
if weight_quantize_type not in quant_type:
95106
raise ValueError(
96-
"Unknown weight_quantize_type: '%s'. It can only be ",
97-
"'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'.",
98-
str(weight_quantize_type))
107+
"Unknown weight_quantize_type: '%s'. It can only be "
108+
"'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
109+
% (str(weight_quantize_type)))
99110

100111
self._activation_quantize_type = activation_quantize_type
101112
self._weight_quantize_type = weight_quantize_type
@@ -121,8 +132,6 @@ def apply(self, graph):
121132
"""
122133
assert isinstance(graph,
123134
IrGraph), 'graph must be the instance of IrGraph.'
124-
#sequential_execution = core.get_pass('sequential_execution_pass')
125-
#sequential_execution.apply(graph.graph)
126135
self._is_test = graph.is_test()
127136
# marked the variable which has been dequantized.
128137
dequantized_vars = collections.OrderedDict()
@@ -203,9 +212,12 @@ def _create_global_step(self, graph):
203212
var_type=core.VarDesc.VarType.LOD_TENSOR,
204213
shape=[1],
205214
var_dtype=core.VarDesc.VarType.INT64)
206-
self._init_var_node(
207-
global_step_in, np.zeros(
208-
[1], dtype='int64'))
215+
_init_var_node(
216+
global_step_in,
217+
np.zeros(
218+
[1], dtype='int64'),
219+
self._scope,
220+
self._place)
209221
global_step_out = graph.create_var_node_from_desc(
210222
global_step_in.var())
211223
# The attribute of `op_role` is needed by ParallelExecutor.
@@ -284,7 +296,12 @@ def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits):
284296
var_dtype=var_node.dtype())
285297
data_type = 'float64' if var_node.dtype(
286298
) == core.VarDesc.VarType.FP64 else 'float32'
287-
self._init_var_node(scale_in_node, np.array([0.001], dtype=data_type))
299+
_init_var_node(
300+
scale_in_node,
301+
np.array(
302+
[0.001], dtype=data_type),
303+
self._scope,
304+
self._place)
288305

289306
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
290307
inputs = {'X': var_node, 'InScale': scale_in_node}
@@ -299,9 +316,13 @@ def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits):
299316
var_dtype=var_node.dtype())
300317
data_type = 'float64' if var_node.dtype(
301318
) == core.VarDesc.VarType.FP64 else 'float32'
302-
self._init_var_node(
303-
scales_node, np.zeros(
304-
[self._window_size], dtype=data_type))
319+
_init_var_node(
320+
scales_node,
321+
np.zeros(
322+
[self._window_size], dtype=data_type),
323+
self._scope,
324+
self._place)
325+
305326
inputs['Iter'] = self._global_step
306327
outputs['OutScales'] = scales_node
307328
attrs = {
@@ -343,7 +364,12 @@ def _insert_quant_moving_average_abs_max_op(self, graph, var_node,
343364
var_dtype=var_node.dtype())
344365
data_type = 'float64' if var_node.dtype(
345366
) == core.VarDesc.VarType.FP64 else 'float32'
346-
self._init_var_node(scale_in_node, np.array([0.001], dtype=data_type))
367+
_init_var_node(
368+
scale_in_node,
369+
np.array(
370+
[0.001], dtype=data_type),
371+
self._scope,
372+
self._place)
347373

348374
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
349375
ins = {'X': var_node, 'InScale': scale_in_node}
@@ -356,13 +382,23 @@ def _insert_quant_moving_average_abs_max_op(self, graph, var_node,
356382
shape=[1])
357383
data_type = 'float64' if var_node.dtype(
358384
) == core.VarDesc.VarType.FP64 else 'float32'
359-
self._init_var_node(scale_in_node, np.ones([1], dtype=data_type))
385+
_init_var_node(
386+
scale_in_node,
387+
np.ones(
388+
[1], dtype=data_type),
389+
self._scope,
390+
self._place)
360391
accum_in_node = graph.create_persistable_node(
361392
name=unique_name.generate('accum'),
362393
var_type=core.VarDesc.VarType.LOD_TENSOR,
363394
var_dtype=var_node.dtype(),
364395
shape=[1])
365-
self._init_var_node(accum_in_node, np.ones([1], dtype=data_type))
396+
_init_var_node(
397+
accum_in_node,
398+
np.ones(
399+
[1], dtype=data_type),
400+
self._scope,
401+
self._place)
366402
state_out_node = graph.create_var_node_from_desc(state_in_node.var(
367403
))
368404
accum_out_node = graph.create_var_node_from_desc(accum_in_node.var(
@@ -482,16 +518,6 @@ def _insert_channel_dequant_op(self, graph, var_node, scale_var_nodes,
482518
graph.link_to(dequant_op_node, dequant_var_node)
483519
return dequant_var_node
484520

485-
def _init_var_node(self, var_node, value):
486-
assert isinstance(
487-
value, np.ndarray), 'The type of value should be numpy array.'
488-
assert self._scope is not None, \
489-
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
490-
assert self._place is not None, \
491-
'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
492-
tensor = self._scope.var(var_node.name()).get_tensor()
493-
tensor.set(value, self._place)
494-
495521
def _quantized_var_name(self, var_name):
496522
"""
497523
Return quantized variable name for the input `var_name`.
@@ -594,8 +620,8 @@ def apply(self, graph):
594620
self._weight_bits)
595621
self._restore_var(input_arg_name, quantized_param_v)
596622
else:
597-
scale_v = self._to_node(op_node.outputs,
598-
op_node.output('OutScale')[0])
623+
scale_v = graph._find_node_by_name(
624+
op_node.outputs, op_node.output('OutScale')[0])
599625
self._var_scale_map[input_arg_name] = scale_v
600626

601627
ops = graph.all_op_nodes()
@@ -627,8 +653,8 @@ def apply(self, graph):
627653
return graph
628654

629655
def _remove_fake_quant_and_dequant_op(self, graph, op_node):
630-
k = self._to_node(op_node.outputs, op_node.output('Out')[0])
631-
v = self._to_node(op_node.inputs, op_node.input('X')[0])
656+
k = graph._find_node_by_name(op_node.outputs, op_node.output('Out')[0])
657+
v = graph._find_node_by_name(op_node.inputs, op_node.input('X')[0])
632658
if v.node not in self._op_input_rename_map:
633659
self._op_input_rename_map[k.node] = v
634660
else:
@@ -663,16 +689,18 @@ def _insert_post_channel_dequant_op(self, graph, op_node):
663689
raise ValueError("Only support one output, but op %s has"
664690
" more than one output." % (op_node.name()))
665691

666-
output_var_node = self._to_node(op_node.outputs,
667-
op_node.output_arg_names()[0])
692+
output_var_node = graph._find_node_by_name(
693+
op_node.outputs, op_node.output_arg_names()[0])
668694
weight_scale_node = graph.create_persistable_node(
669695
name=unique_name.generate('channel_scale'),
670696
var_type=core.VarDesc.VarType.LOD_TENSOR,
671697
shape=[channel_scale.shape[0]],
672698
var_dtype=output_var_node.dtype())
673699
data_type = 'float64' if output_var_node.dtype(
674700
) == core.VarDesc.VarType.FP64 else 'float32'
675-
self._init_var_node(weight_scale_node, channel_scale.astype(data_type))
701+
_init_var_node(weight_scale_node,
702+
channel_scale.astype(data_type), self._scope,
703+
self._place)
676704
dequant_var_node = graph.create_var_node(
677705
name=self._dequantized_var_name(output_var_node.name()),
678706
var_type=output_var_node.type(),
@@ -724,8 +752,8 @@ def _insert_post_dequant_op(self, graph, op_node):
724752
raise ValueError("Only support one output, but op %s has"
725753
" more than one output." % (op_node.name()))
726754

727-
output_var_node = self._to_node(op_node.outputs,
728-
op_node.output_arg_names()[0])
755+
output_var_node = graph._find_node_by_name(
756+
op_node.outputs, op_node.output_arg_names()[0])
729757
dequant_var_node = graph.create_var_node(
730758
name=self._dequantized_var_name(output_var_node.name()),
731759
var_type=output_var_node.type(),
@@ -746,24 +774,6 @@ def _insert_post_dequant_op(self, graph, op_node):
746774
self._op_output_rename_map[output_var_node.node] = dequant_var_node
747775
return dequant_var_node
748776

749-
def _init_var_node(self, var_node, value):
750-
assert isinstance(
751-
value, np.ndarray), 'The type of value should be numpy array.'
752-
assert self._scope is not None, \
753-
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
754-
assert self._place is not None, \
755-
'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
756-
tensor = self._scope.var(var_node.name()).get_tensor()
757-
tensor.set(value, self._place)
758-
759-
def _to_node(self, nodes, node_name):
760-
target_node = None
761-
for n in nodes:
762-
if n.name() == node_name:
763-
target_node = n
764-
assert target_node is not None, "Cannot find the target node in the giving set."
765-
return target_node
766-
767777
def _load_var(self, name):
768778
return np.array(self._scope.find_var(name).get_tensor())
769779

python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,14 @@ def __init__(self,
4545
activation_bits=8,
4646
weight_bits=8,
4747
activation_quantize_type='abs_max',
48+
weight_quantize_type='abs_max',
4849
save_in_nodes=None,
4950
save_out_nodes=None):
5051
"""
5152
Args:
5253
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
5354
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
54-
float_model_save_path(str): The path to save model with float weights.
55+
float_model_save_path(str): The path to save model with float weights.
5556
None means it doesn't save float model. defalut: None.
5657
mobile_model_save_path(str): The path to save model for paddle-mobile execution.
5758
None means it doesn't save mobile model. defalut: None.
@@ -66,9 +67,11 @@ def __init__(self,
6667
dynamically each step in both training and testing period. If use
6768
'range_abs_max', a static quantization scale will be calculated
6869
during training and used in inference.
69-
save_in_nodes(list<str>): A list of variable names used to prune graph
70+
weight_quantize_type (str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'.
71+
The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained.
72+
save_in_nodes(list<str>): A list of variable names used to prune graph
7073
for saving inference model.
71-
save_out_nodes(list<str>): A list of variable names used to prune graph
74+
save_out_nodes(list<str>): A list of variable names used to prune graph
7275
for saving inference model.
7376
7477
"""
@@ -81,6 +84,7 @@ def __init__(self,
8184
self.activation_bits = activation_bits
8285
self.weight_bits = weight_bits
8386
self.activation_quantize_type = activation_quantize_type
87+
self.weight_quantize_type = weight_quantize_type
8488
self.save_out_nodes = save_out_nodes
8589
self.save_in_nodes = save_in_nodes
8690

@@ -100,7 +104,8 @@ def on_epoch_begin(self, context):
100104
place=context.place,
101105
weight_bits=self.weight_bits,
102106
activation_bits=self.activation_bits,
103-
activation_quantize_type=self.activation_quantize_type)
107+
activation_quantize_type=self.activation_quantize_type,
108+
weight_quantize_type=self.weight_quantize_type)
104109
transform_pass.apply(train_ir_graph)
105110
transform_pass.apply(test_ir_graph)
106111

@@ -134,7 +139,8 @@ def on_epoch_end(self, context):
134139
scope=context.scope,
135140
place=context.place,
136141
weight_bits=self.weight_bits,
137-
activation_bits=self.activation_bits)
142+
activation_bits=self.activation_bits,
143+
weight_quantize_type=self.weight_quantize_type)
138144
freeze_pass.apply(test_ir_graph)
139145

140146
# for other strategies

python/paddle/fluid/contrib/slim/tests/quantization/compress.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ strategies:
3535
start_epoch: 0
3636
end_epoch: 0
3737
float_model_save_path: './output/float'
38+
mobile_model_save_path: './output/mobile'
39+
int8_model_save_path: './output/int8'
3840
weight_bits: 8
3941
activation_bits: 8
4042
weight_quantize_type: 'abs_max'

python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,6 @@ def build_program(main, startup, is_test):
256256
place=place,
257257
activation_quantize_type=activation_quant_type,
258258
weight_quantize_type=weight_quant_type)
259-
#transform_pass = QuantizationTransformPass(
260-
# scope=scope, place=place, activation_quantize_type=activation_quant_type)
261259
transform_pass.apply(main_graph)
262260
transform_pass.apply(test_graph)
263261
dev_name = '_gpu_' if use_cuda else '_cpu_'
@@ -315,7 +313,6 @@ def build_program(main, startup, is_test):
315313
# Freeze graph for inference, but the weight of fc/conv is still float type.
316314
freeze_pass = QuantizationFreezePass(
317315
scope=scope, place=place, weight_quantize_type=weight_quant_type)
318-
#freeze_pass = QuantizationFreezePass(scope=scope, place=place)
319316
freeze_pass.apply(test_graph)
320317
if not for_ci:
321318
marked_nodes = set()

0 commit comments

Comments
 (0)