Skip to content

Commit ae3b171

Browse files
committed
evquantize: fix for the signed/unnsigned saturate for conv, deconv, eltwise
1 parent 75badde commit ae3b171

File tree

10 files changed

+72
-21
lines changed

10 files changed

+72
-21
lines changed

include/caffe/layers/base_conv_layer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class BaseConvolutionLayer : public Layer<Dtype> {
103103

104104
Dtype input_scale_; //CUSTOMIZATION
105105
Dtype output_scale_; //CUSTOMIZATION
106-
bool signed_saturate_; //CUSTOMIZATION
106+
Dtype saturate_; //CUSTOMIZATION
107107

108108
private:
109109
// wrap im2col/col2im so we don't have to remember the (long) argument lists

include/caffe/layers/eltwise_layer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class EltwiseLayer : public Layer<Dtype> {
4545

4646
bool stable_prod_grad_;
4747
Dtype output_scale_; //CUSTOMIZATION
48-
bool signed_saturate_; //CUSTOMIZATION
48+
Dtype saturate_; //CUSTOMIZATION
4949
};
5050

5151
} // namespace caffe

include/caffe/util/math_functions.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,10 @@ template <typename Dtype>
200200
void caffe_gpu_int(const int N, Dtype* y);
201201

202202
template <typename Dtype>
203-
void caffe_gpu_saturate(const int N, Dtype* y);
203+
void caffe_gpu_signed_saturate(const int N, Dtype* y);
204+
205+
template <typename Dtype>
206+
void caffe_gpu_unsigned_saturate(const int N, Dtype* y);
204207

205208
template <typename Dtype>
206209
void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);

src/caffe/layers/base_conv_layer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUpInternal(LayerParam conv_param,
8888
output_scale_ = 1;
8989
}
9090

91-
signed_saturate_ = conv_param.signed_saturate();
91+
saturate_ = conv_param.saturate();
9292
//CUSTOMIZATION-->
9393

9494
// Setup pad dimensions (pad_).

src/caffe/layers/conv_layer.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1111
const Dtype* weight = this->blobs_[0]->gpu_data();
1212
Dtype input_scale = this->input_scale_; //CUSTOMIZATION
1313
Dtype output_scale = this->output_scale_; //CUSTOMIZATION
14-
bool signed_saturate = this->signed_saturate_; //CUSTOMIZATION
14+
Dtype saturate = this->saturate_; //CUSTOMIZATION
1515
for (int i = 0; i < bottom.size(); ++i) {
1616
Dtype* bottom_data = bottom[i]->mutable_gpu_data();
1717
//<--CUSTOMIZATION
@@ -35,8 +35,10 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3535
caffe_gpu_scal(count_t, output_scale, top_data);
3636
caffe_gpu_round(count_t, top_data);
3737
}
38-
if (signed_saturate)
39-
caffe_gpu_saturate(count_t, top_data);
38+
if(saturate == ReLUParameter_SaturateMethod_Signed)
39+
caffe_gpu_signed_saturate(count_t, top_data);
40+
if(saturate == ReLUParameter_SaturateMethod_Unsigned)
41+
caffe_gpu_unsigned_saturate(count_t, top_data);
4042
//CUSTOMIZATION-->
4143
}
4244
}

src/caffe/layers/deconv_layer.cu

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1010
const Dtype* weight = this->blobs_[0]->gpu_data();
1111
Dtype input_scale = this->input_scale_; //CUSTOMIZATION
1212
Dtype output_scale = this->output_scale_; //CUSTOMIZATION
13+
Dtype saturate = this->saturate_; //CUSTOMIZATION
1314
for (int i = 0; i < bottom.size(); ++i) {
1415
Dtype* bottom_data = bottom[i]->mutable_gpu_data();
1516
//<--CUSTOMIZATION
@@ -33,6 +34,10 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3334
caffe_gpu_scal(count_t, output_scale, top_data);
3435
caffe_gpu_round(count_t, top_data);
3536
}
37+
if(saturate == ReLUParameter_SaturateMethod_Signed)
38+
caffe_gpu_signed_saturate(count_t, top_data);
39+
if(saturate == ReLUParameter_SaturateMethod_Unsigned)
40+
caffe_gpu_unsigned_saturate(count_t, top_data);
3641
//CUSTOMIZATION-->
3742
}
3843
}

src/caffe/layers/eltwise_layer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
2626
}
2727
stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
2828
output_scale_ = this->layer_param_.eltwise_param().output_scale();
29-
signed_saturate_ = this->layer_param_.eltwise_param().signed_saturate();
29+
saturate_ = this->layer_param_.eltwise_param().saturate();
3030
}
3131

3232
template <typename Dtype>

src/caffe/layers/eltwise_layer.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
#include "caffe/layers/eltwise_layer.hpp"
55
#include "caffe/util/math_functions.hpp"
66

7-
#define SIGNED_SATURATE_MAX 2047
8-
#define SIGNED_SATURATE_MIN -2048
9-
107
namespace caffe {
118

129
template <typename Dtype>
@@ -57,9 +54,11 @@ void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
5754
if (output_scale_ != Dtype(1)) {
5855
caffe_gpu_scal(count, output_scale_, top_data);
5956
caffe_gpu_round(count, top_data);
60-
if (signed_saturate_)
61-
caffe_gpu_saturate(count, top_data);
6257
}
58+
if(saturate_ == ReLUParameter_SaturateMethod_Signed)
59+
caffe_gpu_signed_saturate(count, top_data);
60+
if(saturate_ == ReLUParameter_SaturateMethod_Unsigned)
61+
caffe_gpu_unsigned_saturate(count, top_data);
6362
//CUSTOMIZATION-->
6463
break;
6564
case EltwiseParameter_EltwiseOp_MAX:

src/caffe/proto/caffe.proto

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,14 @@ message ConvolutionParameter {
12981298

12991299
optional double input_scale = 30 [default = 1]; //CUSTOMIZATION
13001300
optional double output_scale = 31 [default = 1]; //CUSTOMIZATION
1301-
optional bool signed_saturate = 32 [default = false]; //CUSTOMIZATION
1301+
//<--CUSTOMIZATION
1302+
enum SaturateMethod {
1303+
None = 0;
1304+
Signed = 1;
1305+
Unsigned = 2;
1306+
}
1307+
optional SaturateMethod saturate = 32 [default = None]; //control the output in certain range
1308+
//CUSTOMIZATION-->
13021309

13031310
optional uint32 group = 5 [default = 1]; // The group size for group conv
13041311

@@ -1604,7 +1611,14 @@ message EltwiseParameter {
16041611
// of computing the gradient for the PROD operation. (No effect for SUM op.)
16051612
optional bool stable_prod_grad = 3 [default = true];
16061613
optional double output_scale = 4 [default = 1]; //CUSTOMIZATION, only valid for SUM
1607-
optional bool signed_saturate = 5 [default = false]; //CUSTOMIZATION, only valid for SUM
1614+
//<--CUSTOMIZATION
1615+
enum SaturateMethod {
1616+
None = 0;
1617+
Signed = 1;
1618+
Unsigned = 2;
1619+
}
1620+
optional SaturateMethod saturate = 5 [default = None]; //only valid for SUM, control the output in certain range
1621+
//CUSTOMIZATION-->
16081622
}
16091623

16101624
// Message that stores parameters used by ELULayer
@@ -2322,7 +2336,14 @@ message SqueezeConvolutionParameter {
23222336

23232337
optional double input_scale = 46 [default = 1]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
23242338
optional double output_scale = 47 [default = 1]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
2325-
optional bool signed_saturate = 48 [default = false]; //CUSTOMIZATION, act as dummy param in squeeze_conv layer now
2339+
//<--CUSTOMIZATION
2340+
enum SaturateMethod {
2341+
None = 0;
2342+
Signed = 1;
2343+
Unsigned = 2;
2344+
}
2345+
optional SaturateMethod saturate = 48 [default = None]; //act as dummy param in squeeze_conv layer now
2346+
//CUSTOMIZATION-->
23262347

23272348
optional uint32 group = 5 [default = 1]; // The group size for group conv
23282349

src/caffe/util/math_functions.cu

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#define SIGNED_SATURATE_MAX 2047
1212
#define SIGNED_SATURATE_MIN -2048
13+
#define UNSIGNED_SATURATE_MAX 4095
1314

1415
namespace caffe {
1516

@@ -202,7 +203,7 @@ void caffe_gpu_int<double>(const int N, double* y) {
202203
}
203204

204205
template <typename Dtype>
205-
__global__ void saturate_kernel(const int n, Dtype* y) {
206+
__global__ void signed_saturate_kernel(const int n, Dtype* y) {
206207
CUDA_KERNEL_LOOP(index, n) {
207208
if(y[index] > SIGNED_SATURATE_MAX)
208209
y[index] = SIGNED_SATURATE_MAX;
@@ -212,15 +213,35 @@ __global__ void saturate_kernel(const int n, Dtype* y) {
212213
}
213214

214215
template <>
215-
void caffe_gpu_saturate<float>(const int N, float* y) {
216+
void caffe_gpu_signed_saturate<float>(const int N, float* y) {
216217
// NOLINT_NEXT_LINE(whitespace/operators)
217-
saturate_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
218+
signed_saturate_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
218219
}
219220

220221
template <>
221-
void caffe_gpu_saturate<double>(const int N, double* y) {
222+
void caffe_gpu_signed_saturate<double>(const int N, double* y) {
222223
// NOLINT_NEXT_LINE(whitespace/operators)
223-
saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
224+
signed_saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
225+
}
226+
227+
template <typename Dtype>
228+
__global__ void unsigned_saturate_kernel(const int n, Dtype* y) {
229+
CUDA_KERNEL_LOOP(index, n) {
230+
if(y[index] > UNSIGNED_SATURATE_MAX)
231+
y[index] = SIGNED_SATURATE_MAX;
232+
}
233+
}
234+
235+
template <>
236+
void caffe_gpu_unsigned_saturate<float>(const int N, float* y) {
237+
// NOLINT_NEXT_LINE(whitespace/operators)
238+
unsigned_saturate_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
239+
}
240+
241+
template <>
242+
void caffe_gpu_unsigned_saturate<double>(const int N, double* y) {
243+
// NOLINT_NEXT_LINE(whitespace/operators)
244+
unsigned_saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
224245
}
225246

226247
template <typename Dtype>

0 commit comments

Comments
 (0)