Skip to content

Commit 707098b

Browse files
committed
inference acc: add the round to even handling.
1 parent 1b94acc commit 707098b

File tree

4 files changed

+4
-1
lines changed

4 files changed

+4
-1
lines changed

src/caffe/layers/conv_layer.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3232
const int count_t = top[i]->count();
3333
if (output_scale != Dtype(1)) {
3434
caffe_gpu_scal(count_t, output_scale, top_data);
35+
caffe_gpu_round(count_t, top_data);
3536
}
3637
//CUSTOMIZATION-->
3738
}

src/caffe/layers/deconv_layer.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ void DeconvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3131
const int count_t = top[i]->count();
3232
if (output_scale != Dtype(1)) {
3333
caffe_gpu_scal(count_t, output_scale, top_data);
34+
caffe_gpu_round(count_t, top_data);
3435
}
3536
//CUSTOMIZATION-->
3637
}

src/caffe/layers/inner_product_layer.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ void InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
3838
const int count_t = top[0]->count();
3939
if (output_scale_ != Dtype(1)) {
4040
caffe_gpu_scal(count_t, output_scale_, top_data);
41+
caffe_gpu_round(count_t, top_data);
4142
}
4243
//CUSTOMIZATION-->
4344
}

src/caffe/util/math_functions.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ void caffe_gpu_scale<double>(const int n, const double alpha, const double *x,
163163
template <typename Dtype>
164164
__global__ void round_kernel(const int n, Dtype* y) {
165165
CUDA_KERNEL_LOOP(index, n) {
166-
y[index] = round(y[index]);
166+
y[index] = rint(y[index]);
167167
}
168168
}
169169

0 commit comments

Comments
 (0)