@@ -43,6 +43,7 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
4343 output_scale_ = this ->layer_param_ .eltwise_param ().output_scale ();
4444 output_zero_point_ = this ->layer_param_ .eltwise_param ().output_zero_point ();
4545 saturate_ = this ->layer_param_ .eltwise_param ().saturate ();
46+ quantize_method_ = this ->layer_param_ .eltwise_param ().quantize_method ();
4647
4748 // <--CUSTOMIZATION, for broadcasting
4849 const EltwiseParameter& param = this ->layer_param_ .eltwise_param ();
@@ -100,13 +101,6 @@ void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
100101 // CUSTOMIZATION-->
101102}
102103
103- template <typename Dtype>
104- int affine_and_shift (const Dtype x, const int zp_in, const double mul, const int shift) {
105- int r = (int ) std::round ((x - zp_in) * mul);
106- r = r << shift;
107- return r;
108- }
109-
110104template <typename Dtype>
111105void tflite_add_kernel (const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top,
112106 const vector<double > &input_scale, const vector<int > &input_zero_point,
@@ -151,17 +145,18 @@ void tflite_add_kernel(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dty
151145 }
152146}
153147
154- typedef double Stype; // scale type
148+ typedef float Stype; // scale type; Caffe2 use float as floating-point representation (of tensors and scales)
155149template <typename Dtype>
156150void caffe2_int8add_kernel (const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top,
157151 const vector<double > &input_scale, const vector<int > &input_zero_point,
158152 const double &output_scale, const int &output_zero_point) {
159153 // refer to https://github.com/pytorch/pytorch/pull/14089#issuecomment-439545562
160- Stype max_scale = std::max (input_scale[0 ], input_scale[1 ]) / output_scale;
154+ Stype in_s0 = input_scale[0 ], in_s1 = input_scale[1 ], out_s = output_scale;
155+ Stype max_scale = std::max (in_s0, in_s1) / out_s;
161156 const int max_22bits = 1 << 21 ;
162157 int shift = 0 ;
163- Stype a_multiplier = input_scale[ 0 ] / output_scale ;
164- Stype b_multiplier = input_scale[ 1 ] / output_scale ;
158+ Stype a_multiplier = in_s0 / out_s ;
159+ Stype b_multiplier = in_s1 / out_s ;
165160 while (max_scale < max_22bits) {
166161 // the result will be 2^22 <= max_scale < 2^23, cast to integer it will occupy 22 bits
167162 max_scale *= 2 ;
@@ -252,8 +247,11 @@ void EltwiseLayer<Dtype>::Forward_cpu(
252247 case EltwiseParameter_EltwiseOp_SUM:
253248 if (is_quant) {
254249 // introduce custom computation
255- // caffe2_int8add_kernel(bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
256- tflite_add_kernel (bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
250+ if (quantize_method_ == ConvolutionParameter_QuantizeMethod_tflite) {
251+ tflite_add_kernel (bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
252+ } else {
253+ caffe2_int8add_kernel (bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
254+ }
257255 break ;
258256 }
259257 caffe_set (count, Dtype (0 ), top_data);
0 commit comments