@@ -54,17 +54,19 @@ def _set_variable_data(scope, place, var_name, np_value):
54
54
55
55
class PostTrainingQuantization (object ):
56
56
def __init__ (self ,
57
- executor ,
58
- sample_generator ,
59
- model_dir ,
57
+ executor = None ,
58
+ scope = None ,
59
+ model_dir = None ,
60
60
model_filename = None ,
61
61
params_filename = None ,
62
+ sample_generator = None ,
62
63
batch_size = 10 ,
63
64
batch_nums = None ,
64
- scope = None ,
65
65
algo = "KL" ,
66
66
quantizable_op_type = ["conv2d" , "depthwise_conv2d" , "mul" ],
67
67
is_full_quantize = False ,
68
+ weight_bits = 8 ,
69
+ activation_bits = 8 ,
68
70
is_use_cache_file = False ,
69
71
cache_dir = "./temp_post_training" ):
70
72
'''
@@ -76,9 +78,8 @@ def __init__(self,
76
78
Args:
77
79
executor(fluid.Executor): The executor to load, run and save the
78
80
quantized model.
79
- sample_generator(Python Generator): The sample generator provides
80
- calibrate data for DataLoader, and it only returns a sample every
81
- time.
81
+ scope(fluid.Scope, optional): The scope of the program, use it to load
82
+ and save variables. If scope=None, get scope by global_scope().
82
83
model_dir(str): The path of the fp32 model that will be quantized,
83
84
and the model and params files are under the path.
84
85
model_filename(str, optional): The name of file to load the inference
@@ -88,12 +89,13 @@ def __init__(self,
88
89
When all parameters were saved in a single binary file, set it
89
90
as the real filename. If parameters were saved in separate files,
90
91
set it as 'None'. Default is 'None'.
92
+ sample_generator(Python Generator): The sample generator provides
93
+ calibrate data for DataLoader, and it only returns a sample every
94
+ time.
91
95
batch_size(int, optional): The batch size of DataLoader. Default is 10.
92
96
batch_nums(int, optional): If batch_nums is not None, the number of
93
97
calibrate data is batch_size*batch_nums. If batch_nums is None, use
94
98
all data provided by sample_generator as calibrate data.
95
- scope(fluid.Scope, optional): The scope of the program, use it to load
96
- and save variables. If scope=None, get scope by global_scope().
97
99
algo(str, optional): If algo=KL, use KL-divergenc method to
98
100
get the more precise scale factor. If algo='direct', use
99
101
abs_max methon to get the scale factor. Default is KL.
@@ -104,6 +106,8 @@ def __init__(self,
104
106
apply quantization to all supported quantizable op type. If set
105
107
is_full_quantized as False, only apply quantization to the op type
106
108
according to the input quantizable_op_type.
109
+ weight_bits(int, optional): quantization bit number for weights.
110
+ activation_bits(int): quantization bit number for activation.
107
111
is_use_cache_file(bool, optional): If set is_use_cache_file as False,
108
112
all temp data will be saved in memory. If set is_use_cache_file as True,
109
113
it will save temp data to disk. When the fp32 model is complex or
@@ -150,14 +154,20 @@ def __init__(self,
150
154
ptq.quantize()
151
155
ptq.save_quantized_model(save_model_path)
152
156
'''
157
+
158
+ assert executor is not None , "The executor cannot be None."
159
+ assert model_dir is not None , "The model_dir cannot be None."
160
+ assert sample_generator is not None , \
161
+ "The sample_generator cannot be None."
162
+
153
163
self ._executor = executor
154
- self ._sample_generator = sample_generator
164
+ self ._scope = global_scope () if scope == None else scope
155
165
self ._model_dir = model_dir
156
166
self ._model_filename = model_filename
157
167
self ._params_filename = params_filename
168
+ self ._sample_generator = sample_generator
158
169
self ._batch_size = batch_size
159
170
self ._batch_nums = batch_nums
160
- self ._scope = global_scope () if scope == None else scope
161
171
self ._algo = algo
162
172
self ._is_use_cache_file = is_use_cache_file
163
173
self ._cache_dir = cache_dir
@@ -604,7 +614,7 @@ def quantize_weight_to_int(self,
604
614
save_model_filename = None ,
605
615
save_params_filename = None ,
606
616
quantizable_op_type = ["conv2d" , "mul" ],
607
- quantize_weight_bits = 8 ,
617
+ weight_bits = 8 ,
608
618
threshold_rate = 0.0 ):
609
619
'''
610
620
In order to reduce the size of model, this api quantizes the weight
@@ -624,8 +634,8 @@ def quantize_weight_to_int(self,
624
634
that will be quantized, and the quantized ops should be
625
635
contained in ["conv2d", "depthwise_conv2d", "mul"].
626
636
Default is ["conv2d","mul"].
627
- quantize_weight_bits (int, optional): The bits for the quantized
628
- weight, and it should be 8 or 16. Default is 8.
637
+ weight_bits (int, optional): The bits for the quantized weight,
638
+ and it should be 8 or 16. Default is 8.
629
639
threshold_rate(float, optional): This api uses abs_max methd to
630
640
quantize the weight from float32 to int8/16, and the abs max
631
641
value is important for quantization diff. When the abs_max
@@ -637,10 +647,10 @@ def quantize_weight_to_int(self,
637
647
assert op_type in self ._supported_quantizable_op_type , \
638
648
"input error:" + op_type + \
639
649
" is not supported for weight quantization."
640
- assert quantize_weight_bits in [8 , 16 ], \
641
- "input error: quantize_weight_bits should be 8 or 16."
642
- quantize_range = (1 << (quantize_weight_bits - 1 )) - 1
643
- save_weight_dtype = np .int8 if quantize_weight_bits == 8 else np .int16
650
+ assert weight_bits in [8 , 16 ], \
651
+ "input error: weight_bits should be 8 or 16."
652
+ quantize_range = (1 << (weight_bits - 1 )) - 1
653
+ save_weight_dtype = np .int8 if weight_bits == 8 else np .int16
644
654
645
655
place = core .CPUPlace ()
646
656
exe = Executor (place )
@@ -677,8 +687,7 @@ def quantize_weight_to_int(self,
677
687
_set_variable_data (scope , place , var_name ,
678
688
quantized_var_tensor_data )
679
689
op ._set_attr (var_name + "_quant_scale" , [scale ])
680
- op ._set_attr ('quantize_weight_bits' ,
681
- quantize_weight_bits )
690
+ op ._set_attr ('quantize_weight_bits' , weight_bits )
682
691
683
692
io .save_inference_model (
684
693
dirname = save_model_dir ,
0 commit comments