Skip to content

Commit 01b60fa

Browse files
author
snoopyisadog
committed
quantizedConv computes on integer value
1 parent d10d1e4 commit 01b60fa

File tree

1 file changed

+41
-27
lines changed

1 file changed

+41
-27
lines changed

src/caffe/layers/conv_layer.cpp

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#include "caffe/layers/conv_layer.hpp"
44
#include "caffe/util/math_functions.hpp"
5+
#define W this->blobs_[0]
6+
#define B this->blobs_[1]
57

68
namespace caffe {
79

@@ -52,34 +54,42 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
5254
template <typename Dtype>
5355
void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
5456
const vector<Blob<Dtype>*>& top) {
55-
// set up quantization parameters: scale + zero_point
5657
const Dtype input_scale = this->input_scale_;
5758
const Dtype output_scale = this->output_scale_;
5859
const Dtype weight_scale = this->weight_scale_;
59-
const Dtype bias_scale = this->bias_scale_;
60+
const Dtype bias_scale = this->bias_scale_; // bias_scale = input_scale * weight_scale
6061
const int input_zero_point = this->input_zero_point_;
6162
const int output_zero_point = this->output_zero_point_;
6263
const int weight_zero_point = this->weight_zero_point_;
6364
const int bias_zero_point = this->bias_zero_point_;
6465
const Dtype saturate = this->saturate_;
65-
const bool quant_in = (input_scale != Dtype(1.0) || input_zero_point != 0);
66-
const bool quant_out = (output_scale != Dtype(1.0) || output_zero_point != 0);
67-
const bool quant_w = (weight_scale != Dtype(1.0) || weight_zero_point != 0);
68-
const bool quant_b = (this->bias_term_&& (bias_scale != Dtype(1.0) || bias_zero_point != 0));
69-
if (quant_w) {
70-
Dtype *qw = this->blobs_[0]->mutable_cpu_data();
71-
caffe_cpu_dequantize<Dtype>(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point);
66+
/*** Quantization Computation
67+
(1) shift input/weight/bias w.r.t corresponding zero_point
68+
(2) compute Convolution+Bias on the integer value range
69+
(3) scale the output by input_scale*weight_scale/output_scale, and
70+
(4) shift the output by output_zero_point
71+
*Assumption is that bias_scale = input_scale*weight_scale
72+
For a floating-value model, only (2) is computed with floating values
73+
***/
74+
const bool shift_input = (input_zero_point != 0);
75+
const bool shift_weight = (weight_zero_point != 0);
76+
const bool shift_bias = (bias_zero_point != 0);
77+
const bool scale_output = (input_scale != Dtype(1.0) || weight_scale != Dtype(1.0) ||
78+
output_scale != Dtype(1.0));
79+
const bool shift_output = (output_zero_point != 0);
80+
81+
if (shift_weight) { // shift the quantized weight
82+
caffe_add_scalar<Dtype>(W->count(), Dtype(-weight_zero_point), W->mutable_cpu_data());
7283
}
73-
if (quant_b) {
74-
Dtype *qb = this->blobs_[1]->mutable_cpu_data();
75-
caffe_cpu_dequantize<Dtype>(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point);
84+
if (shift_bias) {
85+
caffe_add_scalar<Dtype>(B->count(), Dtype(-bias_zero_point), B->mutable_cpu_data());
7686
}
7787

7888
const Dtype* weight = this->blobs_[0]->cpu_data();
7989
for (int i = 0; i < bottom.size(); ++i) {
80-
if (quant_in) {
81-
Dtype* qin = bottom[i]->mutable_cpu_data();
82-
caffe_cpu_dequantize<Dtype>(bottom[i]->count(), qin, input_scale, input_zero_point);
90+
if (shift_input) {
91+
caffe_add_scalar<Dtype>(bottom[i]->count(),
92+
Dtype(-input_zero_point), bottom[i]->mutable_cpu_data());
8393
}
8494

8595
const Dtype* bottom_data = bottom[i]->cpu_data();
@@ -95,8 +105,14 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
95105
}
96106

97107
const int count_t = top[i]->count();
98-
if (quant_out) {
99-
caffe_cpu_quantize<Dtype>(count_t, top_data, output_scale, output_zero_point);
108+
if (scale_output) {
109+
//Dtype out_scal = input_scale * weight_scale / output_scale;
110+
Dtype out_scal = bias_scale / output_scale;
111+
caffe_scal<Dtype>(count_t, out_scal, top_data);
112+
caffe_cpu_round<Dtype>(count_t, top_data);
113+
}
114+
if (shift_output) {
115+
caffe_add_scalar<Dtype>(count_t, Dtype(output_zero_point), top_data);
100116
}
101117
if (saturate == ConvolutionParameter_SaturateMethod_Signed)
102118
caffe_cpu_signed_saturate(count_t, top_data);
@@ -107,19 +123,17 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
107123
if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
108124
caffe_cpu_unsigned_8bit_saturate(count_t, top_data);
109125

110-
if (quant_in) { // restore the quantized input blob
111-
Dtype* qin = bottom[i]->mutable_cpu_data();
112-
caffe_cpu_quantize<Dtype>(bottom[i]->count(), qin, input_scale, input_zero_point);
126+
if (shift_input) { // shift the quantized input blob back to correct range
127+
caffe_add_scalar<Dtype>(bottom[i]->count(),
128+
Dtype(input_zero_point), bottom[i]->mutable_cpu_data());
113129
}
114130
}
115-
// restore quantized weight/bias
116-
if (quant_w) {
117-
Dtype *qw = this->blobs_[0]->mutable_cpu_data();
118-
caffe_cpu_quantize<Dtype>(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point);
131+
// shift quantized weight/bias back to correct range
132+
if (shift_weight) {
133+
caffe_add_scalar<Dtype>(W->count(), Dtype(weight_zero_point), W->mutable_cpu_data());
119134
}
120-
if (quant_b) {
121-
Dtype *qb = this->blobs_[1]->mutable_cpu_data();
122-
caffe_cpu_quantize<Dtype>(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point);
135+
if (shift_bias) {
136+
caffe_add_scalar<Dtype>(B->count(), Dtype(bias_zero_point), B->mutable_cpu_data());
123137
}
124138
}
125139

0 commit comments

Comments
 (0)