@@ -11,11 +11,23 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1111 const Dtype* weight = this ->blobs_ [0 ]->gpu_data ();
1212 Dtype input_scale = this ->input_scale_ ; // CUSTOMIZATION
1313 Dtype output_scale = this ->output_scale_ ; // CUSTOMIZATION
14+ int input_zero_point = this ->input_zero_point_ ; // CUSTOMIZATION
15+ int output_zero_point = this ->output_zero_point_ ; // CUSTOMIZATION
1416 Dtype saturate = this ->saturate_ ; // CUSTOMIZATION
1517 for (int i = 0 ; i < bottom.size (); ++i) {
1618 Dtype* bottom_data = bottom[i]->mutable_gpu_data ();
1719 // <--CUSTOMIZATION
1820 const int count_b = bottom[i]->count ();
21+ /* ** Denote input_scale=s0,input_zero_point=z0,input_blob=x0;
22+ output_scale=s1,output_zero_point=z1;
23+ Weight=W0, Bias=B0, X=Convolution
24+ ( (x0-z0)*s0 X W0 + B0 ) / s1 + z1
25+ = ( (x0-z0) X W0 + B0/S0)) * s0/s1 + z1
26+ Tried both computation, neither achieve bit-wise precision referring to Caffe2
27+ ***/
28+ if (input_zero_point != 0 ) {
29+ caffe_gpu_add_scalar (count_b, Dtype (-input_zero_point), bottom_data);
30+ }
1931 if (input_scale != Dtype (1 )) {
2032 caffe_gpu_scal (count_b, input_scale, bottom_data);
2133 }
@@ -35,6 +47,9 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3547 caffe_gpu_scal (count_t , output_scale, top_data);
3648 caffe_gpu_round (count_t , top_data);
3749 }
50+ if (output_zero_point != 0 ) {
51+ caffe_gpu_add_scalar (count_t , Dtype (output_zero_point), top_data);
52+ }
3853 if (saturate == ConvolutionParameter_SaturateMethod_Signed)
3954 caffe_gpu_signed_saturate (count_t , top_data);
4055 if (saturate == ConvolutionParameter_SaturateMethod_Unsigned)
@@ -43,6 +58,17 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
4358 caffe_gpu_signed_8bit_saturate (count_t , top_data);
4459 if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
4560 caffe_gpu_unsigned_8bit_saturate (count_t , top_data);
61+
62+ // retrieve the quantized bottom blobs
63+ // in case some other layer consumes the same input blob
64+ if (input_scale != Dtype (1 )) {
65+ caffe_gpu_scal (count_b, Dtype (1.0 ) / input_scale, bottom_data);
66+ caffe_gpu_round (count_b, bottom_data);
67+ }
68+ if (input_zero_point != 0 ) {
69+ caffe_gpu_add_scalar (count_b, Dtype (input_zero_point), bottom_data);
70+ }
71+ // caffe_gpu_unsigned_8bit_saturate(count_b, bottom_data);
4672 // CUSTOMIZATION-->
4773 }
4874}
0 commit comments