Skip to content

Commit dd85dae

Browse files
author
snoopyisadog
committed
Make forward_gpu support quantization parameters, for Conv, Eltwise::SUM layers
1 parent 1590615 commit dd85dae

File tree

7 files changed

+79
-6
lines changed

7 files changed

+79
-6
lines changed

include/caffe/layers/base_conv_layer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
101101
int pad_t_; //CUSTOMIZATION
102102
int pad_b_; //CUSTOMIZATION
103103

104+
int input_zero_point_; //CUSTOMIZATION
105+
int output_zero_point_; //CUSTOMIZATION
104106
Dtype input_scale_; //CUSTOMIZATION
105107
Dtype output_scale_; //CUSTOMIZATION
106108
Dtype saturate_; //CUSTOMIZATION

src/caffe/layers/base_conv_layer.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,25 @@ void BaseConvolutionLayer<Dtype>::LayerSetUpInternal(LayerParam conv_param,
7777
}
7878

7979
//<--CUSTOMIZATION
80-
if (conv_param.has_input_scale()){
80+
if (conv_param.has_input_scale()) {
8181
input_scale_ = conv_param.input_scale();
82-
} else{
83-
input_scale_ = 1;
82+
} else {
83+
input_scale_ = 1;
84+
}
85+
if (conv_param.has_input_zero_point()) {
86+
input_zero_point_ = conv_param.input_zero_point();
87+
} else {
88+
input_zero_point_ = 0;
8489
}
8590
if (conv_param.has_output_scale()){
8691
output_scale_ = conv_param.output_scale();
8792
} else{
88-
output_scale_ = 1;
93+
output_scale_ = 1;
94+
}
95+
if (conv_param.has_output_zero_point()) {
96+
output_zero_point_ = conv_param.output_zero_point();
97+
} else {
98+
output_zero_point_ = 0;
8999
}
90100

91101
saturate_ = conv_param.saturate();

src/caffe/layers/conv_layer.cu

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,23 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1111
const Dtype* weight = this->blobs_[0]->gpu_data();
1212
Dtype input_scale = this->input_scale_; //CUSTOMIZATION
1313
Dtype output_scale = this->output_scale_; //CUSTOMIZATION
14+
int input_zero_point = this->input_zero_point_; //CUSTOMIZATION
15+
int output_zero_point = this->output_zero_point_; //CUSTOMIZATION
1416
Dtype saturate = this->saturate_; //CUSTOMIZATION
1517
for (int i = 0; i < bottom.size(); ++i) {
1618
Dtype* bottom_data = bottom[i]->mutable_gpu_data();
1719
//<--CUSTOMIZATION
1820
const int count_b = bottom[i]->count();
21+
/*** Denote input_scale=s0,input_zero_point=z0,input_blob=x0;
22+
output_scale=s1,output_zero_point=z1;
23+
Weight=W0, Bias=B0, X=Convolution
24+
( (x0-z0)*s0 X W0 + B0 ) / s1 + z1
25+
= ( (x0-z0) X W0 + B0/S0)) * s0/s1 + z1
26+
Tried both computation, neither achieve bit-wise precision referring to Caffe2
27+
***/
28+
if (input_zero_point != 0) {
29+
caffe_gpu_add_scalar(count_b, Dtype(-input_zero_point), bottom_data);
30+
}
1931
if (input_scale != Dtype(1)) {
2032
caffe_gpu_scal(count_b, input_scale, bottom_data);
2133
}
@@ -35,6 +47,9 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3547
caffe_gpu_scal(count_t, output_scale, top_data);
3648
caffe_gpu_round(count_t, top_data);
3749
}
50+
if (output_zero_point != 0) {
51+
caffe_gpu_add_scalar(count_t, Dtype(output_zero_point), top_data);
52+
}
3853
if(saturate == ConvolutionParameter_SaturateMethod_Signed)
3954
caffe_gpu_signed_saturate(count_t, top_data);
4055
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned)
@@ -43,6 +58,17 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
4358
caffe_gpu_signed_8bit_saturate(count_t, top_data);
4459
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
4560
caffe_gpu_unsigned_8bit_saturate(count_t, top_data);
61+
62+
// retrieve the quantized bottom blobs
63+
// in case some other layer consumes the same input blob
64+
if (input_scale != Dtype(1)) {
65+
caffe_gpu_scal(count_b, Dtype(1.0) / input_scale, bottom_data);
66+
caffe_gpu_round(count_b, bottom_data);
67+
}
68+
if (input_zero_point != 0) {
69+
caffe_gpu_add_scalar(count_b, Dtype(input_zero_point), bottom_data);
70+
}
71+
//caffe_gpu_unsigned_8bit_saturate(count_b, bottom_data);
4672
//CUSTOMIZATION-->
4773
}
4874
}

src/caffe/layers/deconv_layer.cu

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1010
const Dtype* weight = this->blobs_[0]->gpu_data();
1111
Dtype input_scale = this->input_scale_; //CUSTOMIZATION
1212
Dtype output_scale = this->output_scale_; //CUSTOMIZATION
13+
int input_zero_point = this->input_zero_point_; //CUSTOMIZATION
14+
int output_zero_point = this->output_zero_point_; //CUSTOMIZATION
1315
Dtype saturate = this->saturate_; //CUSTOMIZATION
1416
for (int i = 0; i < bottom.size(); ++i) {
1517
Dtype* bottom_data = bottom[i]->mutable_gpu_data();
1618
//<--CUSTOMIZATION
1719
const int count_b = bottom[i]->count();
20+
if (input_zero_point != 0) {
21+
caffe_gpu_add_scalar(count_b, Dtype(-input_zero_point), bottom_data);
22+
}
1823
if (input_scale != Dtype(1)) {
1924
caffe_gpu_scal(count_b, input_scale, bottom_data);
2025
}
@@ -34,6 +39,9 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3439
caffe_gpu_scal(count_t, output_scale, top_data);
3540
caffe_gpu_round(count_t, top_data);
3641
}
42+
if (output_zero_point != 0) {
43+
caffe_gpu_add_scalar(count_t, Dtype(output_zero_point), top_data);
44+
}
3745
if(saturate == ConvolutionParameter_SaturateMethod_Signed)
3846
caffe_gpu_signed_saturate(count_t, top_data);
3947
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned)
@@ -42,6 +50,13 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
4250
caffe_gpu_signed_8bit_saturate(count_t, top_data);
4351
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
4452
caffe_gpu_unsigned_8bit_saturate(count_t, top_data);
53+
if (input_scale != Dtype(1)) {
54+
caffe_gpu_scal(count_b, Dtype(1.0) / input_scale, bottom_data);
55+
caffe_gpu_round(count_b, bottom_data);
56+
}
57+
if (input_zero_point != 0) {
58+
caffe_gpu_add_scalar(count_b, Dtype(input_zero_point), bottom_data);
59+
}
4560
//CUSTOMIZATION-->
4661
}
4762
}

src/caffe/layers/eltwise_layer.cu

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,15 @@ void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
7373
caffe_gpu_mul(count, top_data, bottom[i]->gpu_data(), top_data);
7474
}
7575
break;
76-
case EltwiseParameter_EltwiseOp_SUM:
76+
case EltwiseParameter_EltwiseOp_SUM:
77+
//<--CUSTOMIZATION
78+
for (int i = 0; i < bottom.size(); ++i) {
79+
// input = (bottom - ZeroPoint) * scale; scale is given as coeffs_
80+
if (input_zero_point_[i] != 0) {
81+
caffe_gpu_add_scalar(count, Dtype(-input_zero_point_[i]), bottom[i]->mutable_gpu_data());
82+
}
83+
}
84+
//CUSTOMIZATION-->
7785
caffe_gpu_set(count, Dtype(0.), top_data);
7886
// TODO(shelhamer) does cuBLAS optimize to sum for coeff = 1?
7987
for (int i = 0; i < bottom.size(); ++i) {
@@ -84,6 +92,10 @@ void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
8492
caffe_gpu_scal(count, output_scale_, top_data);
8593
caffe_gpu_round(count, top_data);
8694
}
95+
// output = top/scale + ZeroPoint
96+
if (output_zero_point_ != 0) {
97+
caffe_gpu_add_scalar(count, Dtype(output_zero_point_), top_data);
98+
}
8799
if(saturate_ == EltwiseParameter_SaturateMethod_Signed)
88100
caffe_gpu_signed_saturate(count, top_data);
89101
if(saturate_ == EltwiseParameter_SaturateMethod_Unsigned)
@@ -92,6 +104,12 @@ void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
92104
caffe_gpu_signed_8bit_saturate(count, top_data);
93105
if(saturate_ == EltwiseParameter_SaturateMethod_Unsigned_8bit)
94106
caffe_gpu_unsigned_8bit_saturate(count, top_data);
107+
// shift the bottom blob back, in case they are input of some other residual connection
108+
for (int i = 0; i < bottom.size(); ++i) {
109+
if (input_zero_point_[i] != 0) {
110+
caffe_gpu_add_scalar(count, Dtype(input_zero_point_[i]), bottom[i]->mutable_gpu_data());
111+
}
112+
}
95113
//CUSTOMIZATION-->
96114
break;
97115
case EltwiseParameter_EltwiseOp_MAX:

src/caffe/layers/pooling_layer.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ __global__ void AvePoolForward_TF(const int nthreads,
315315
}
316316
else if(saturate == PoolingParameter_SaturateMethod_Unsigned_8bit)
317317
{
318-
top_data[index] = aveval;
318+
top_data[index] = rint(aveval / pool_size);
319319
if(top_data[index] > UNSIGNED_8BIT_SATURATE_MAX)
320320
top_data[index] = UNSIGNED_8BIT_SATURATE_MAX;
321321
if(top_data[index] < 0)

src/caffe/proto/caffe.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2583,6 +2583,8 @@ message SqueezeConvolutionParameter {
25832583

25842584
optional double input_scale = 46 [default = 1]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
25852585
optional double output_scale = 47 [default = 1]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
2586+
optional int32 input_zero_point = 49 [default = 0]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
2587+
optional int32 output_zero_point = 50 [default = 0]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
25862588
//<--CUSTOMIZATION
25872589
enum SaturateMethod {
25882590
None = 0;

0 commit comments

Comments
 (0)