1010
1111#define SIGNED_SATURATE_MAX 2047
1212#define SIGNED_SATURATE_MIN -2048
13+ #define UNSIGNED_SATURATE_MAX 4095
1314
1415namespace caffe {
1516
@@ -202,7 +203,7 @@ void caffe_gpu_int<double>(const int N, double* y) {
202203}
203204
204205template <typename Dtype>
205- __global__ void saturate_kernel (const int n, Dtype* y) {
206+ __global__ void signed_saturate_kernel (const int n, Dtype* y) {
206207 CUDA_KERNEL_LOOP (index, n) {
207208 if (y[index] > SIGNED_SATURATE_MAX)
208209 y[index] = SIGNED_SATURATE_MAX;
@@ -212,15 +213,35 @@ __global__ void saturate_kernel(const int n, Dtype* y) {
212213}
213214
214215template <>
215- void caffe_gpu_saturate <float >(const int N, float * y) {
216+ void caffe_gpu_signed_saturate <float >(const int N, float * y) {
216217 // NOLINT_NEXT_LINE(whitespace/operators)
217- saturate_kernel <float ><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>> (N, y);
218+ signed_saturate_kernel <float ><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>> (N, y);
218219}
219220
220221template <>
221- void caffe_gpu_saturate <double >(const int N, double * y) {
222+ void caffe_gpu_signed_saturate <double >(const int N, double * y) {
222223 // NOLINT_NEXT_LINE(whitespace/operators)
223- saturate_kernel<double ><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>> (N, y);
224+ signed_saturate_kernel<double ><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>> (N, y);
225+ }
226+
227+ template <typename Dtype>
228+ __global__ void unsigned_saturate_kernel (const int n, Dtype* y) {
229+ CUDA_KERNEL_LOOP (index, n) {
230+ if (y[index] > UNSIGNED_SATURATE_MAX)
231+ y[index] = SIGNED_SATURATE_MAX;
232+ }
233+ }
234+
235+ template <>
236+ void caffe_gpu_unsigned_saturate<float >(const int N, float * y) {
237+ // NOLINT_NEXT_LINE(whitespace/operators)
238+ unsigned_saturate_kernel<float ><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>> (N, y);
239+ }
240+
241+ template <>
242+ void caffe_gpu_unsigned_saturate<double >(const int N, double * y) {
243+ // NOLINT_NEXT_LINE(whitespace/operators)
244+ unsigned_saturate_kernel<double ><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>> (N, y);
224245}
225246
226247template <typename Dtype>
0 commit comments