@@ -43,8 +43,32 @@ void DeconvolutionLayer<Dtype>::compute_output_shape() {
4343template <typename Dtype>
4444void DeconvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
4545 const vector<Blob<Dtype>*>& top) {
46+ const Dtype input_scale = this ->input_scale_ ;
47+ const Dtype output_scale = this ->output_scale_ ;
48+ const Dtype weight_scale = this ->weight_scale_ ;
49+ const Dtype bias_scale = this ->bias_scale_ ;
50+ const int input_zero_point = this ->input_zero_point_ ;
51+ const int output_zero_point = this ->output_zero_point_ ;
52+ const int weight_zero_point = this ->weight_zero_point_ ;
53+ const int bias_zero_point = this ->bias_zero_point_ ;
54+ const Dtype saturate = this ->saturate_ ;
55+ const bool quant_in = (input_scale != Dtype (1.0 ) || input_zero_point != 0 );
56+ const bool quant_out = (output_scale != Dtype (1.0 ) || output_zero_point != 0 );
57+ const bool quant_w = (weight_scale != Dtype (1.0 ) || weight_zero_point != 0 );
58+ const bool quant_b = (this ->bias_term_ && (bias_scale != Dtype (1.0 ) || bias_zero_point != 0 ));
59+ if (quant_w) {
60+ Dtype *qw = this ->blobs_ [0 ]->mutable_cpu_data ();
61+ caffe_cpu_dequantize<Dtype>(this ->blobs_ [0 ]->count (), qw, weight_scale, weight_zero_point);
62+ }
63+ if (quant_b) {
64+ Dtype *qb = this ->blobs_ [1 ]->mutable_cpu_data ();
65+ caffe_cpu_dequantize<Dtype>(this ->blobs_ [1 ]->count (), qb, bias_scale, bias_zero_point);
66+ }
4667 const Dtype* weight = this ->blobs_ [0 ]->cpu_data ();
4768 for (int i = 0 ; i < bottom.size (); ++i) {
69+ if (quant_in) {
70+ caffe_cpu_dequantize<Dtype>(bottom[i]->count (), bottom[i]->mutable_cpu_data (), input_scale, input_zero_point);
71+ }
4872 const Dtype* bottom_data = bottom[i]->cpu_data ();
4973 Dtype* top_data = top[i]->mutable_cpu_data ();
5074 for (int n = 0 ; n < this ->num_ ; ++n) {
@@ -55,6 +79,30 @@ void DeconvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
5579 this ->forward_cpu_bias (top_data + n * this ->top_dim_ , bias);
5680 }
5781 }
82+ const int count_t = top[i]->count ();
83+ if (quant_out) {
84+ caffe_cpu_quantize<Dtype>(count_t , top_data, output_scale, output_zero_point);
85+ }
86+ if (saturate == ConvolutionParameter_SaturateMethod_Signed)
87+ caffe_cpu_signed_saturate (count_t , top_data);
88+ if (saturate == ConvolutionParameter_SaturateMethod_Unsigned)
89+ caffe_cpu_unsigned_saturate (count_t , top_data);
90+ if (saturate == ConvolutionParameter_SaturateMethod_Signed_8bit)
91+ caffe_cpu_signed_8bit_saturate (count_t , top_data);
92+ if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
93+ caffe_cpu_unsigned_8bit_saturate (count_t , top_data);
94+ if (quant_in) {
95+ caffe_cpu_quantize<Dtype>(bottom[i]->count (), bottom[i]->mutable_cpu_data (), input_scale, input_zero_point);
96+ }
97+ }
98+ // restore quantized weight/bias
99+ if (quant_w) {
100+ Dtype *qw = this ->blobs_ [0 ]->mutable_cpu_data ();
101+ caffe_cpu_quantize<Dtype>(this ->blobs_ [0 ]->count (), qw, weight_scale, weight_zero_point);
102+ }
103+ if (quant_b) {
104+ Dtype *qb = this ->blobs_ [1 ]->mutable_cpu_data ();
105+ caffe_cpu_quantize<Dtype>(this ->blobs_ [1 ]->count (), qb, bias_scale, bias_zero_point);
58106 }
59107}
60108
0 commit comments