Skip to content

Commit fd69f05

Browse files
committed
evquantize: saturate 8 biy support
1 parent fecf497 commit fd69f05

File tree

8 files changed

+154
-0
lines changed

8 files changed

+154
-0
lines changed

include/caffe/util/math_functions.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,15 @@ void caffe_gpu_int(const int N, Dtype* y);
202202
template <typename Dtype>
203203
void caffe_gpu_signed_saturate(const int N, Dtype* y);
204204

205+
template <typename Dtype>
206+
void caffe_gpu_signed_8bit_saturate(const int N, Dtype* y);
207+
205208
template <typename Dtype>
206209
void caffe_gpu_unsigned_saturate(const int N, Dtype* y);
207210

211+
template <typename Dtype>
212+
void caffe_gpu_unsigned_8bit_saturate(const int N, Dtype* y);
213+
208214
template <typename Dtype>
209215
void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
210216

src/caffe/layers/conv_layer.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3939
caffe_gpu_signed_saturate(count_t, top_data);
4040
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned)
4141
caffe_gpu_unsigned_saturate(count_t, top_data);
42+
if(saturate == ConvolutionParameter_SaturateMethod_Signed_8bit)
43+
caffe_gpu_signed_8bit_saturate(count_t, top_data);
44+
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
45+
caffe_gpu_unsigned_8bit_saturate(count_t, top_data);
4246
//CUSTOMIZATION-->
4347
}
4448
}

src/caffe/layers/deconv_layer.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3838
caffe_gpu_signed_saturate(count_t, top_data);
3939
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned)
4040
caffe_gpu_unsigned_saturate(count_t, top_data);
41+
if(saturate == ConvolutionParameter_SaturateMethod_Signed_8bit)
42+
caffe_gpu_signed_8bit_saturate(count_t, top_data);
43+
if(saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
44+
caffe_gpu_unsigned_8bit_saturate(count_t, top_data);
4145
//CUSTOMIZATION-->
4246
}
4347
}

src/caffe/layers/eltwise_layer.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
5959
caffe_gpu_signed_saturate(count, top_data);
6060
if(saturate_ == EltwiseParameter_SaturateMethod_Unsigned)
6161
caffe_gpu_unsigned_saturate(count, top_data);
62+
if(saturate_ == EltwiseParameter_SaturateMethod_Signed_8bit)
63+
caffe_gpu_signed_8bit_saturate(count, top_data);
64+
if(saturate_ == EltwiseParameter_SaturateMethod_Unsigned_8bit)
65+
caffe_gpu_unsigned_8bit_saturate(count, top_data);
6266
//CUSTOMIZATION-->
6367
break;
6468
case EltwiseParameter_EltwiseOp_MAX:

src/caffe/layers/pooling_layer.cu

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#define SIGNED_SATURATE_MAX 2047
99
#define SIGNED_SATURATE_MIN -2048
1010
#define UNSIGNED_SATURATE_MAX 4095
11+
#define SIGNED_8BIT_SATURATE_MAX 127
12+
#define SIGNED_8BIT_SATURATE_MIN -128
13+
#define UNSIGNED_8BIT_SATURATE_MAX 255
1114

1215
namespace caffe {
1316

@@ -97,13 +100,27 @@ __global__ void AvePoolForward(const int nthreads,
97100
if(top_data[index] < 0)
98101
top_data[index] = 0;
99102
}
103+
if(saturate == PoolingParameter_SaturateMethod_Unsigned_8bit)
104+
{
105+
if(top_data[index] > UNSIGNED_8BIT_SATURATE_MAX)
106+
top_data[index] = UNSIGNED_8BIT_SATURATE_MAX;
107+
if(top_data[index] < 0)
108+
top_data[index] = 0;
109+
}
100110
if(saturate == PoolingParameter_SaturateMethod_Signed)
101111
{
102112
if(top_data[index] > SIGNED_SATURATE_MAX)
103113
top_data[index] = SIGNED_SATURATE_MAX;
104114
if(top_data[index] < SIGNED_SATURATE_MIN)
105115
top_data[index] = SIGNED_SATURATE_MIN;
106116
}
117+
if(saturate == PoolingParameter_SaturateMethod_Signed_8bit)
118+
{
119+
if(top_data[index] > SIGNED_8BIT_SATURATE_MAX)
120+
top_data[index] = SIGNED_8BIT_SATURATE_MAX;
121+
if(top_data[index] < SIGNED_8BIT_SATURATE_MIN)
122+
top_data[index] = SIGNED_8BIT_SATURATE_MIN;
123+
}
107124
}
108125
else{
109126
if(saturate == PoolingParameter_SaturateMethod_Unsigned)
@@ -114,6 +131,14 @@ __global__ void AvePoolForward(const int nthreads,
114131
if(top_data[index] < 0)
115132
top_data[index] = 0;
116133
}
134+
else if(saturate == PoolingParameter_SaturateMethod_Unsigned_8bit)
135+
{
136+
top_data[index] = aveval;
137+
if(top_data[index] > UNSIGNED_8BIT_SATURATE_MAX)
138+
top_data[index] = UNSIGNED_8BIT_SATURATE_MAX;
139+
if(top_data[index] < 0)
140+
top_data[index] = 0;
141+
}
117142
else if(saturate == PoolingParameter_SaturateMethod_Signed)
118143
{
119144
top_data[index] = aveval;
@@ -122,6 +147,14 @@ __global__ void AvePoolForward(const int nthreads,
122147
if(top_data[index] < SIGNED_SATURATE_MIN)
123148
top_data[index] = SIGNED_SATURATE_MIN;
124149
}
150+
else if(saturate == PoolingParameter_SaturateMethod_Signed_8bit)
151+
{
152+
top_data[index] = aveval;
153+
if(top_data[index] > SIGNED_8BIT_SATURATE_MAX)
154+
top_data[index] = SIGNED_8BIT_SATURATE_MAX;
155+
if(top_data[index] < SIGNED_8BIT_SATURATE_MIN)
156+
top_data[index] = SIGNED_8BIT_SATURATE_MIN;
157+
}
125158
else //original implementation
126159
top_data[index] = aveval / pool_size;
127160
}
@@ -248,13 +281,27 @@ __global__ void AvePoolForward_TF(const int nthreads,
248281
if(top_data[index] < 0)
249282
top_data[index] = 0;
250283
}
284+
if(saturate == PoolingParameter_SaturateMethod_Unsigned_8bit)
285+
{
286+
if(top_data[index] > UNSIGNED_8BIT_SATURATE_MAX)
287+
top_data[index] = UNSIGNED_8BIT_SATURATE_MAX;
288+
if(top_data[index] < 0)
289+
top_data[index] = 0;
290+
}
251291
if(saturate == PoolingParameter_SaturateMethod_Signed)
252292
{
253293
if(top_data[index] > SIGNED_SATURATE_MAX)
254294
top_data[index] = SIGNED_SATURATE_MAX;
255295
if(top_data[index] < SIGNED_SATURATE_MIN)
256296
top_data[index] = SIGNED_SATURATE_MIN;
257297
}
298+
if(saturate == PoolingParameter_SaturateMethod_Signed_8bit)
299+
{
300+
if(top_data[index] > SIGNED_8BIT_SATURATE_MAX)
301+
top_data[index] = SIGNED_8BIT_SATURATE_MAX;
302+
if(top_data[index] < SIGNED_8BIT_SATURATE_MIN)
303+
top_data[index] = SIGNED_8BIT_SATURATE_MIN;
304+
}
258305
}
259306

260307
else{
@@ -266,6 +313,14 @@ __global__ void AvePoolForward_TF(const int nthreads,
266313
if(top_data[index] < 0)
267314
top_data[index] = 0;
268315
}
316+
else if(saturate == PoolingParameter_SaturateMethod_Unsigned_8bit)
317+
{
318+
top_data[index] = aveval;
319+
if(top_data[index] > UNSIGNED_8BIT_SATURATE_MAX)
320+
top_data[index] = UNSIGNED_8BIT_SATURATE_MAX;
321+
if(top_data[index] < 0)
322+
top_data[index] = 0;
323+
}
269324
else if(saturate == PoolingParameter_SaturateMethod_Signed)
270325
{
271326
top_data[index] = aveval;
@@ -274,6 +329,14 @@ __global__ void AvePoolForward_TF(const int nthreads,
274329
if(top_data[index] < SIGNED_SATURATE_MIN)
275330
top_data[index] = SIGNED_SATURATE_MIN;
276331
}
332+
else if(saturate == PoolingParameter_SaturateMethod_Signed_8bit)
333+
{
334+
top_data[index] = aveval;
335+
if(top_data[index] > SIGNED_8BIT_SATURATE_MAX)
336+
top_data[index] = SIGNED_8BIT_SATURATE_MAX;
337+
if(top_data[index] < SIGNED_8BIT_SATURATE_MIN)
338+
top_data[index] = SIGNED_8BIT_SATURATE_MIN;
339+
}
277340
else //original implementation
278341
top_data[index] = aveval / pool_size;
279342
}

src/caffe/layers/relu_layer.cu

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
#define SIGNED_SATURATE_MAX 2047
77
#define SIGNED_SATURATE_MIN -2048
88
#define UNSIGNED_SATURATE_MAX 4095
9+
#define SIGNED_8BIT_SATURATE_MAX 127
10+
#define SIGNED_8BIT_SATURATE_MIN -128
11+
#define UNSIGNED_8BIT_SATURATE_MAX 255
912

1013
namespace caffe {
1114

@@ -25,10 +28,22 @@ __global__ void ReLUForward(const int n, const Dtype* in, Dtype* out,
2528
if(out[index] < SIGNED_SATURATE_MIN)
2629
out[index] = SIGNED_SATURATE_MIN;
2730
}
31+
if(saturate == ReLUParameter_SaturateMethod_Signed_8bit){
32+
if(out[index] < 0) //only need to do the round when multiplied with negative_slope
33+
out[index] = rint(out[index]);
34+
if(out[index] > SIGNED_8BIT_SATURATE_MAX)
35+
out[index] = SIGNED_8BIT_SATURATE_MAX;
36+
if(out[index] < SIGNED_8BIT_SATURATE_MIN)
37+
out[index] = SIGNED_8BIT_SATURATE_MIN;
38+
}
2839
if(saturate == ReLUParameter_SaturateMethod_Unsigned){
2940
if(out[index] > UNSIGNED_SATURATE_MAX)
3041
out[index] = UNSIGNED_SATURATE_MAX;
3142
}
43+
if(saturate == ReLUParameter_SaturateMethod_Unsigned_8bit){
44+
if(out[index] > UNSIGNED_8BIT_SATURATE_MAX)
45+
out[index] = UNSIGNED_8BIT_SATURATE_MAX;
46+
}
3247
//CUSTOMIZATION-->
3348
}
3449
}

src/caffe/proto/caffe.proto

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,8 @@ message ConvolutionParameter {
13031303
None = 0;
13041304
Signed = 1;
13051305
Unsigned = 2;
1306+
Signed_8bit = 3;
1307+
Unsigned_8bit = 4;
13061308
}
13071309
optional SaturateMethod saturate = 32 [default = None]; //control the output in certain range
13081310
//CUSTOMIZATION-->
@@ -1616,6 +1618,8 @@ message EltwiseParameter {
16161618
None = 0;
16171619
Signed = 1;
16181620
Unsigned = 2;
1621+
Signed_8bit = 3;
1622+
Unsigned_8bit = 4;
16191623
}
16201624
optional SaturateMethod saturate = 5 [default = None]; //only valid for SUM, control the output in certain range
16211625
//CUSTOMIZATION-->
@@ -1984,6 +1988,8 @@ message PoolingParameter {
19841988
None = 0;
19851989
Signed = 1;
19861990
Unsigned = 2;
1991+
Signed_8bit = 3;
1992+
Unsigned_8bit = 4;
19871993
}
19881994
optional SaturateMethod saturate = 22 [default = None]; //control the output in certain range
19891995
//CUSTOMIZATION-->
@@ -2123,6 +2129,8 @@ message ReLUParameter {
21232129
None = 0;
21242130
Signed = 1;
21252131
Unsigned = 2;
2132+
Signed_8bit = 3;
2133+
Unsigned_8bit = 4;
21262134
}
21272135
optional SaturateMethod saturate = 4 [default = None]; //control the output in certain range
21282136
//CUSTOMIZATION-->
@@ -2348,6 +2356,8 @@ message SqueezeConvolutionParameter {
23482356
None = 0;
23492357
Signed = 1;
23502358
Unsigned = 2;
2359+
Signed_8bit = 3;
2360+
Unsigned_8bit = 4;
23512361
}
23522362
optional SaturateMethod saturate = 48 [default = None]; //act as dummy param in squeeze_conv layer now
23532363
//CUSTOMIZATION-->

src/caffe/util/math_functions.cu

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#define SIGNED_SATURATE_MAX 2047
1212
#define SIGNED_SATURATE_MIN -2048
1313
#define UNSIGNED_SATURATE_MAX 4095
14+
#define SIGNED_8BIT_SATURATE_MAX 127
15+
#define SIGNED_8BIT_SATURATE_MIN -128
16+
#define UNSIGNED_8BIT_SATURATE_MAX 255
1417

1518
namespace caffe {
1619

@@ -224,6 +227,28 @@ void caffe_gpu_signed_saturate<double>(const int N, double* y) {
224227
signed_saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
225228
}
226229

230+
template <typename Dtype>
231+
__global__ void signed_8bit_saturate_kernel(const int n, Dtype* y) {
232+
CUDA_KERNEL_LOOP(index, n) {
233+
if(y[index] > SIGNED_8BIT_SATURATE_MAX)
234+
y[index] = SIGNED_8BIT_SATURATE_MAX;
235+
if(y[index] < SIGNED_8BIT_SATURATE_MIN)
236+
y[index] = SIGNED_8BIT_SATURATE_MIN;
237+
}
238+
}
239+
240+
template <>
241+
void caffe_gpu_signed_8bit_saturate<float>(const int N, float* y) {
242+
// NOLINT_NEXT_LINE(whitespace/operators)
243+
signed_8bit_saturate_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
244+
}
245+
246+
template <>
247+
void caffe_gpu_signed_8bit_saturate<double>(const int N, double* y) {
248+
// NOLINT_NEXT_LINE(whitespace/operators)
249+
signed_8bit_saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
250+
}
251+
227252
template <typename Dtype>
228253
__global__ void unsigned_saturate_kernel(const int n, Dtype* y) {
229254
CUDA_KERNEL_LOOP(index, n) {
@@ -246,6 +271,29 @@ void caffe_gpu_unsigned_saturate<double>(const int N, double* y) {
246271
unsigned_saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
247272
}
248273

274+
275+
template <typename Dtype>
276+
__global__ void unsigned_8bit_saturate_kernel(const int n, Dtype* y) {
277+
CUDA_KERNEL_LOOP(index, n) {
278+
if(y[index] > UNSIGNED_8BIT_SATURATE_MAX)
279+
y[index] = UNSIGNED_8BIT_SATURATE_MAX;
280+
if(y[index] < 0)
281+
y[index] = 0;
282+
}
283+
}
284+
285+
template <>
286+
void caffe_gpu_unsigned_8bit_saturate<float>(const int N, float* y) {
287+
// NOLINT_NEXT_LINE(whitespace/operators)
288+
unsigned_8bit_saturate_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
289+
}
290+
291+
template <>
292+
void caffe_gpu_unsigned_8bit_saturate<double>(const int N, double* y) {
293+
// NOLINT_NEXT_LINE(whitespace/operators)
294+
unsigned_8bit_saturate_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(N, y);
295+
}
296+
249297
template <typename Dtype>
250298
__global__ void set_kernel(const int n, const Dtype alpha, Dtype* y) {
251299
CUDA_KERNEL_LOOP(index, n) {

0 commit comments

Comments
 (0)