Skip to content

Commit 7e21bc6

Browse files
committed
add quantized operators to match Caffe2 QNN pack
1 parent 5c900f7 commit 7e21bc6

File tree

5 files changed

+37
-16
lines changed

5 files changed

+37
-16
lines changed

include/caffe/layers/eltwise_layer.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class EltwiseLayer : public Layer<Dtype> {
5050
double output_scale_; //CUSTOMIZATION
5151
int output_zero_point_; //CUSTOMIZATION
5252
Dtype saturate_; //CUSTOMIZATION
53+
int quantize_method_; //CUSTOMIZATION
5354
int outer_dim_, inner_dim_, eltwise_dim_, dim_; //CUSTOMIZATION
5455
int axis_; //CUSTOMIZATION
5556
Blob<Dtype> eltwise_multiplier_;

src/caffe/layers/conv_layer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,20 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
155155
}
156156
}
157157
}
158-
else { // quantize_method_ == PoolingParameter_QuantizeMethod_ONNX
158+
else if (quantize_method == ConvolutionParameter_QuantizeMethod_ONNX) {
159159
float onnx_scale = (float) input_scale * (float) weight_scale / (float) output_scale;
160160
for (int k = 0; k < count_t; ++k) {
161161
top_data[k] = std::rint(top_data[k] * onnx_scale);
162162
}
163163
}
164+
else { // Caffe2
165+
float out_scal = (float)input_scale * weight_scale;
166+
out_scal /= output_scale;
167+
int q_shift;
168+
// Caffe2 uses float; cast to double to fit the function-call
169+
int q_scal = tfl_QuantizeMultiplier((double)out_scal, &q_shift);
170+
MultiplyByQuantizedMultiplierVR(count_t, top_data, q_scal, q_shift, 2);
171+
}
164172
}
165173

166174
if (shift_output) {

src/caffe/layers/eltwise_layer.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
4343
output_scale_ = this->layer_param_.eltwise_param().output_scale();
4444
output_zero_point_ = this->layer_param_.eltwise_param().output_zero_point();
4545
saturate_ = this->layer_param_.eltwise_param().saturate();
46+
quantize_method_ = this->layer_param_.eltwise_param().quantize_method();
4647

4748
//<--CUSTOMIZATION, for broadcasting
4849
const EltwiseParameter& param = this->layer_param_.eltwise_param();
@@ -100,13 +101,6 @@ void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
100101
//CUSTOMIZATION-->
101102
}
102103

103-
template <typename Dtype>
104-
int affine_and_shift(const Dtype x, const int zp_in, const double mul, const int shift) {
105-
int r = (int) std::round((x - zp_in) * mul);
106-
r = r << shift;
107-
return r;
108-
}
109-
110104
template <typename Dtype>
111105
void tflite_add_kernel(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top,
112106
const vector<double> &input_scale, const vector<int> &input_zero_point,
@@ -151,17 +145,18 @@ void tflite_add_kernel(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dty
151145
}
152146
}
153147

154-
typedef double Stype; // scale type
148+
typedef float Stype; // scale type; Caffe2 use float as floating-point representation (of tensors and scales)
155149
template <typename Dtype>
156150
void caffe2_int8add_kernel(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top,
157151
const vector<double> &input_scale, const vector<int> &input_zero_point,
158152
const double &output_scale, const int &output_zero_point) {
159153
// refer to https://github.com/pytorch/pytorch/pull/14089#issuecomment-439545562
160-
Stype max_scale = std::max(input_scale[0], input_scale[1]) / output_scale;
154+
Stype in_s0 = input_scale[0], in_s1 = input_scale[1], out_s = output_scale;
155+
Stype max_scale = std::max(in_s0, in_s1) / out_s;
161156
const int max_22bits = 1 << 21;
162157
int shift = 0;
163-
Stype a_multiplier = input_scale[0] / output_scale;
164-
Stype b_multiplier = input_scale[1] / output_scale;
158+
Stype a_multiplier = in_s0 / out_s;
159+
Stype b_multiplier = in_s1 / out_s;
165160
while (max_scale < max_22bits) {
166161
// the result will be 2^22 <= max_scale < 2^23, cast to integer it will occupy 22 bits
167162
max_scale *= 2;
@@ -252,8 +247,11 @@ void EltwiseLayer<Dtype>::Forward_cpu(
252247
case EltwiseParameter_EltwiseOp_SUM:
253248
if (is_quant) {
254249
// introduce custom computation
255-
//caffe2_int8add_kernel(bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
256-
tflite_add_kernel(bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
250+
if (quantize_method_ == ConvolutionParameter_QuantizeMethod_tflite) {
251+
tflite_add_kernel(bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
252+
} else {
253+
caffe2_int8add_kernel(bottom, top, input_scale_, input_zero_point_, output_scale_, output_zero_point_);
254+
}
257255
break;
258256
}
259257
caffe_set(count, Dtype(0), top_data);

src/caffe/layers/pooling_layer.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,14 +461,24 @@ void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
461461
acc += output_zero_point_;
462462
}
463463
top_data[ph * pooled_width_ + pw] = acc;
464-
} else { // quantize_method_ == PoolingParameter_QuantizeMethod_ONNX
464+
} else if (quantize_method_ == PoolingParameter_QuantizeMethod_ONNX) {
465465
float scale = (float) input_scale_ / ((float)output_scale_ * (float) pool_size);
466466
Dtype acc = top_data[ph * pooled_width_ + pw];
467467
acc -= input_zero_point_ * pool_size;
468468
acc = std::rint(acc * scale);
469469
acc += output_zero_point_;
470470
top_data[ph * pooled_width_ + pw] = acc;
471471
}
472+
else { // Caffe2
473+
// https://github.com/pytorch/QNNPACK/blob/7d2a4e9931a82adc3814275b6219a03e24e36b4c/src/average-pooling.c#L176-L179
474+
float scale = (float) input_scale_ / ((float)output_scale_ * (float) pool_size);
475+
Dtype acc = top_data[ph * pooled_width_ + pw];
476+
acc -= input_zero_point_ * pool_size;
477+
//acc = std::round(acc / pool_size);
478+
acc = std::round(acc * scale);
479+
acc += output_zero_point_;
480+
top_data[ph * pooled_width_ + pw] = acc;
481+
}
472482
}
473483
else {
474484
top_data[ph * pooled_width_ + pw] /= pool_size;

src/caffe/layers/power_layer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,12 @@ void PowerLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
4141
for (int i = 0; i < count_t; ++i) {
4242
top_data[i] = std::round(top_data[i]);
4343
}
44-
} else { // QuantizeMethod_ONNX
44+
} else if (quantize_method_ == ConvolutionParameter_QuantizeMethod_ONNX) { // QuantizeMethod_ONNX
4545
caffe_cpu_round(count, top_data);
46+
} else {
47+
for (int i = 0; i < count_t; ++i) {
48+
top_data[i] = std::nearbyint(top_data[i]);
49+
}
4650
}
4751
}
4852
if (shift_ != Dtype(0)) {

0 commit comments

Comments
 (0)