@@ -38,10 +38,10 @@ __global__ void GPUROIPoolForward(
38
38
int index = blockIdx .x * blockDim .x + threadIdx .x ;
39
39
int offset = blockDim .x * gridDim .x ;
40
40
for (size_t i = index; i < nthreads; i += offset) {
41
- int pw = index % pooled_width;
42
- int ph = (index / pooled_width) % pooled_height;
43
- int c = (index / pooled_width / pooled_height) % channels;
44
- int n = index / pooled_width / pooled_height / channels;
41
+ int pw = i % pooled_width;
42
+ int ph = (i / pooled_width) % pooled_height;
43
+ int c = (i / pooled_width / pooled_height) % channels;
44
+ int n = i / pooled_width / pooled_height / channels;
45
45
46
46
const int64_t * offset_input_rois = input_rois + n * kROISize ;
47
47
int roi_batch_ind = roi_batch_id_data[n];
@@ -65,7 +65,6 @@ __global__ void GPUROIPoolForward(
65
65
int wend = static_cast <int >(ceil (static_cast <double >(pw + 1 ) *
66
66
static_cast <double >(roi_width) /
67
67
static_cast <double >(pooled_width)));
68
-
69
68
hstart = min (max (hstart + roi_start_h, 0 ), height);
70
69
hend = min (max (hend + roi_start_h, 0 ), height);
71
70
wstart = min (max (wstart + roi_start_w, 0 ), width);
@@ -85,9 +84,9 @@ __global__ void GPUROIPoolForward(
85
84
}
86
85
}
87
86
}
88
- output_data[index ] = maxval;
87
+ output_data[i ] = maxval;
89
88
if (argmax_data) {
90
- argmax_data[index ] = maxidx;
89
+ argmax_data[i ] = maxidx;
91
90
}
92
91
}
93
92
}
@@ -144,6 +143,7 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
144
143
int width = in_dims[3 ];
145
144
146
145
int rois_num = rois->dims ()[0 ];
146
+
147
147
if (rois_num == 0 ) return ;
148
148
149
149
int output_size = out->numel ();
0 commit comments