22
33#include " caffe/layers/conv_layer.hpp"
44#include " caffe/util/math_functions.hpp"
5+ #define W this ->blobs_[0 ]
6+ #define B this ->blobs_[1 ]
57
68namespace caffe {
79
@@ -52,34 +54,42 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
5254template <typename Dtype>
5355void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
5456 const vector<Blob<Dtype>*>& top) {
55- // set up quantization parameters: scale + zero_point
5657 const Dtype input_scale = this ->input_scale_ ;
5758 const Dtype output_scale = this ->output_scale_ ;
5859 const Dtype weight_scale = this ->weight_scale_ ;
59- const Dtype bias_scale = this ->bias_scale_ ;
60+ const Dtype bias_scale = this ->bias_scale_ ; // bias_scale = input_scale * weight_scale
6061 const int input_zero_point = this ->input_zero_point_ ;
6162 const int output_zero_point = this ->output_zero_point_ ;
6263 const int weight_zero_point = this ->weight_zero_point_ ;
6364 const int bias_zero_point = this ->bias_zero_point_ ;
6465 const Dtype saturate = this ->saturate_ ;
65- const bool quant_in = (input_scale != Dtype (1.0 ) || input_zero_point != 0 );
66- const bool quant_out = (output_scale != Dtype (1.0 ) || output_zero_point != 0 );
67- const bool quant_w = (weight_scale != Dtype (1.0 ) || weight_zero_point != 0 );
68- const bool quant_b = (this ->bias_term_ && (bias_scale != Dtype (1.0 ) || bias_zero_point != 0 ));
69- if (quant_w) {
70- Dtype *qw = this ->blobs_ [0 ]->mutable_cpu_data ();
71- caffe_cpu_dequantize<Dtype>(this ->blobs_ [0 ]->count (), qw, weight_scale, weight_zero_point);
66+ /* ** Quantization Computation
67+ (1) shift input/weight/bias w.r.t corresponding zero_point
68+ (2) compute Convolution+Bias on the integer value range
69+ (3) scale the output by input_scale*weight_scale/output_scale, and
70+ (4) shift the output by output_zero_point
71+ *Assumption is that bias_scale = input_scale*weight_scale
72+ For a floating-value model, only (2) is computed with floating values
73+ ***/
74+ const bool shift_input = (input_zero_point != 0 );
75+ const bool shift_weight = (weight_zero_point != 0 );
76+ const bool shift_bias = (bias_zero_point != 0 );
77+ const bool scale_output = (input_scale != Dtype (1.0 ) || weight_scale != Dtype (1.0 ) ||
78+ output_scale != Dtype (1.0 ));
79+ const bool shift_output = (output_zero_point != 0 );
80+
81+ if (shift_weight) { // shift the quantized weight
82+ caffe_add_scalar<Dtype>(W->count (), Dtype (-weight_zero_point), W->mutable_cpu_data ());
7283 }
73- if (quant_b) {
74- Dtype *qb = this ->blobs_ [1 ]->mutable_cpu_data ();
75- caffe_cpu_dequantize<Dtype>(this ->blobs_ [1 ]->count (), qb, bias_scale, bias_zero_point);
84+ if (shift_bias) {
85+ caffe_add_scalar<Dtype>(B->count (), Dtype (-bias_zero_point), B->mutable_cpu_data ());
7686 }
7787
7888 const Dtype* weight = this ->blobs_ [0 ]->cpu_data ();
7989 for (int i = 0 ; i < bottom.size (); ++i) {
80- if (quant_in ) {
81- Dtype* qin = bottom[i]->mutable_cpu_data ();
82- caffe_cpu_dequantize< Dtype>( bottom[i]->count (), qin, input_scale, input_zero_point );
90+ if (shift_input ) {
91+ caffe_add_scalar< Dtype>( bottom[i]->count (),
92+ Dtype (-input_zero_point), bottom[i]->mutable_cpu_data () );
8393 }
8494
8595 const Dtype* bottom_data = bottom[i]->cpu_data ();
@@ -95,8 +105,14 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
95105 }
96106
97107 const int count_t = top[i]->count ();
98- if (quant_out) {
99- caffe_cpu_quantize<Dtype>(count_t , top_data, output_scale, output_zero_point);
108+ if (scale_output) {
109+ // Dtype out_scal = input_scale * weight_scale / output_scale;
110+ Dtype out_scal = bias_scale / output_scale;
111+ caffe_scal<Dtype>(count_t , out_scal, top_data);
112+ caffe_cpu_round<Dtype>(count_t , top_data);
113+ }
114+ if (shift_output) {
115+ caffe_add_scalar<Dtype>(count_t , Dtype (output_zero_point), top_data);
100116 }
101117 if (saturate == ConvolutionParameter_SaturateMethod_Signed)
102118 caffe_cpu_signed_saturate (count_t , top_data);
@@ -107,19 +123,17 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
107123 if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
108124 caffe_cpu_unsigned_8bit_saturate (count_t , top_data);
109125
110- if (quant_in ) { // restore the quantized input blob
111- Dtype* qin = bottom[i]->mutable_cpu_data ();
112- caffe_cpu_quantize< Dtype>( bottom[i]->count (), qin, input_scale, input_zero_point );
126+ if (shift_input ) { // shift the quantized input blob back to correct range
127+ caffe_add_scalar< Dtype>( bottom[i]->count (),
128+ Dtype (input_zero_point), bottom[i]->mutable_cpu_data () );
113129 }
114130 }
115- // restore quantized weight/bias
116- if (quant_w) {
117- Dtype *qw = this ->blobs_ [0 ]->mutable_cpu_data ();
118- caffe_cpu_quantize<Dtype>(this ->blobs_ [0 ]->count (), qw, weight_scale, weight_zero_point);
131+ // shift quantized weight/bias back to correct range
132+ if (shift_weight) {
133+ caffe_add_scalar<Dtype>(W->count (), Dtype (weight_zero_point), W->mutable_cpu_data ());
119134 }
120- if (quant_b) {
121- Dtype *qb = this ->blobs_ [1 ]->mutable_cpu_data ();
122- caffe_cpu_quantize<Dtype>(this ->blobs_ [1 ]->count (), qb, bias_scale, bias_zero_point);
135+ if (shift_bias) {
136+ caffe_add_scalar<Dtype>(B->count (), Dtype (bias_zero_point), B->mutable_cpu_data ());
123137 }
124138}
125139
0 commit comments