inference acc: add the round to even handling.

vera121 · vera121 · commit 707098b0e71c · 2018-09-06T04:16:26.000-04:00
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
@@ -32,6 +32,7 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const int count_t = top[i]->count();
     if (output_scale != Dtype(1)) {
       caffe_gpu_scal(count_t, output_scale, top_data);
+      caffe_gpu_round(count_t, top_data);
     }
     //CUSTOMIZATION-->
   }
diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu
@@ -31,6 +31,7 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const int count_t = top[i]->count();
     if (output_scale != Dtype(1)) {
       caffe_gpu_scal(count_t, output_scale, top_data);
+      caffe_gpu_round(count_t, top_data);
     }
     //CUSTOMIZATION-->
   }
diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu
@@ -38,6 +38,7 @@ void InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const int count_t = top[0]->count();
     if (output_scale_ != Dtype(1)) {
       caffe_gpu_scal(count_t, output_scale_, top_data);
+      caffe_gpu_round(count_t, top_data);
     }
   //CUSTOMIZATION-->
 }
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
@@ -163,7 +163,7 @@ void caffe_gpu_scale<double>(const int n, const double alpha, const double *x,
 template <typename Dtype>
 __global__ void round_kernel(const int n, Dtype* y) {
   CUDA_KERNEL_LOOP(index, n) {
-    y[index] = round(y[index]);
+    y[index] = rint(y[index]);
   }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,`
`32`	`32`	`const int count_t = top[i]->count();`
`33`	`33`	`if (output_scale != Dtype(1)) {`
`34`	`34`	`caffe_gpu_scal(count_t, output_scale, top_data);`
	`35`	`+ caffe_gpu_round(count_t, top_data);`
`35`	`36`	`}`
`36`	`37`	`//CUSTOMIZATION-->`
`37`	`38`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,`
`31`	`31`	`const int count_t = top[i]->count();`
`32`	`32`	`if (output_scale != Dtype(1)) {`
`33`	`33`	`caffe_gpu_scal(count_t, output_scale, top_data);`
	`34`	`+ caffe_gpu_round(count_t, top_data);`
`34`	`35`	`}`
`35`	`36`	`//CUSTOMIZATION-->`
`36`	`37`	`}`
Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@ void InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,`
`38`	`38`	`const int count_t = top[0]->count();`
`39`	`39`	`if (output_scale_ != Dtype(1)) {`
`40`	`40`	`caffe_gpu_scal(count_t, output_scale_, top_data);`
	`41`	`+ caffe_gpu_round(count_t, top_data);`
`41`	`42`	`}`
`42`	`43`	`//CUSTOMIZATION-->`
`43`	`44`	`}`
Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ void caffe_gpu_scale<double>(const int n, const double alpha, const double *x,`
`163`	`163`	`template <typename Dtype>`
`164`	`164`	`__global__ void round_kernel(const int n, Dtype* y) {`
`165`	`165`	`CUDA_KERNEL_LOOP(index, n) {`
`166`		`- y[index] = round(y[index]);`
	`166`	`+ y[index] = rint(y[index]);`
`167`	`167`	`}`
`168`	`168`	`}`
`169`	`169`