@@ -66,8 +66,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
66
66
public:
67
67
void operator ()(const platform::DeviceContext& context,
68
68
const framework::Tensor& im, framework::Tensor& col,
69
- int stride_height, int stride_width, int padding_height ,
70
- int padding_width ) {
69
+ int stride_height, int stride_width, int padding_up ,
70
+ int padding_down, int padding_left, int padding_right ) {
71
71
PADDLE_ENFORCE (im.dims ().size () == 3 );
72
72
PADDLE_ENFORCE (col.dims ().size () == 5 );
73
73
@@ -79,6 +79,15 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
79
79
int output_height = col.dims ()[3 ];
80
80
int output_width = col.dims ()[4 ];
81
81
82
+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
83
+ stride_height +
84
+ 1 ==
85
+ output_height);
86
+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
87
+ stride_width +
88
+ 1 ==
89
+ output_width);
90
+
82
91
int num_outputs = input_channels * output_height * output_width;
83
92
int blocks = (num_outputs + 1024 - 1 ) / 1024 ;
84
93
int block_x = 512 ;
@@ -89,8 +98,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
89
98
reinterpret_cast <const platform::CUDADeviceContext&>(context)
90
99
.stream()>>> (
91
100
im.data <T>(), num_outputs, input_height, input_width, filter_height,
92
- filter_width, stride_height, stride_width, padding_height ,
93
- padding_width, output_height, output_width, col.data <T>());
101
+ filter_width, stride_height, stride_width, padding_up, padding_left ,
102
+ output_height, output_width, col.data <T>());
94
103
}
95
104
};
96
105
@@ -152,7 +161,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
152
161
public:
153
162
void operator ()(const platform::DeviceContext& context, framework::Tensor& im,
154
163
const framework::Tensor& col, int stride_height,
155
- int stride_width, int padding_height, int padding_width) {
164
+ int stride_width, int padding_up, int padding_down,
165
+ int padding_left, int padding_right) {
156
166
PADDLE_ENFORCE (im.dims ().size () == 3 );
157
167
PADDLE_ENFORCE (col.dims ().size () == 5 );
158
168
@@ -164,8 +174,18 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
164
174
int output_height = col.dims ()[3 ];
165
175
int output_width = col.dims ()[4 ];
166
176
167
- size_t num_kernels = input_channels * (input_height + 2 * padding_height) *
168
- (input_width + 2 * padding_width);
177
+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
178
+ stride_height +
179
+ 1 ==
180
+ output_height);
181
+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
182
+ stride_width +
183
+ 1 ==
184
+ output_width);
185
+
186
+ size_t num_kernels = input_channels *
187
+ (input_height + padding_up + padding_down) *
188
+ (input_width + padding_left + padding_right);
169
189
170
190
size_t blocks = (num_kernels + 1024 - 1 ) / 1024 ;
171
191
size_t block_x = 512 ;
@@ -178,10 +198,10 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
178
198
col2im<T><<<grid, threads, 0 ,
179
199
reinterpret_cast <const platform::CUDADeviceContext&>(context)
180
200
.stream()>>> (
181
- num_kernels, col.data <T>(), input_height + 2 * padding_height ,
182
- input_width + 2 * padding_width , input_channels, filter_height ,
183
- filter_width, stride_height, stride_width, padding_height ,
184
- padding_width , output_height, output_width, im.data <T>());
201
+ num_kernels, col.data <T>(), input_height + padding_up + padding_down ,
202
+ input_width + padding_left + padding_left , input_channels,
203
+ filter_height, filter_width, stride_height, stride_width, padding_up ,
204
+ padding_left , output_height, output_width, im.data <T>());
185
205
}
186
206
};
187
207
@@ -238,8 +258,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
238
258
public:
239
259
void operator ()(const platform::DeviceContext& context,
240
260
const framework::Tensor& im, framework::Tensor& col,
241
- int stride_height, int stride_width, int padding_height ,
242
- int padding_width ) {
261
+ int stride_height, int stride_width, int padding_up ,
262
+ int padding_down, int padding_left, int padding_right ) {
243
263
PADDLE_ENFORCE (im.dims ().size () == 3 );
244
264
PADDLE_ENFORCE (col.dims ().size () == 5 );
245
265
int input_channels = im.dims ()[0 ];
@@ -250,6 +270,15 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
250
270
int output_height = col.dims ()[0 ];
251
271
int output_width = col.dims ()[1 ];
252
272
273
+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
274
+ stride_height +
275
+ 1 ==
276
+ output_height);
277
+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
278
+ stride_width +
279
+ 1 ==
280
+ output_width);
281
+
253
282
int block_dim_x = 0 ;
254
283
int block_dim_y = 0 ;
255
284
if (filter_height <= 4 && filter_width <= 4 ) {
@@ -274,8 +303,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
274
303
reinterpret_cast <const platform::CUDADeviceContext&>(context)
275
304
.stream()>>> (
276
305
im.data <T>(), col.data <T>(), input_channels, input_height, input_width,
277
- filter_height, filter_width, stride_height, stride_width,
278
- padding_height, padding_width , output_height, output_width);
306
+ filter_height, filter_width, stride_height, stride_width, padding_up,
307
+ padding_left , output_height, output_width);
279
308
}
280
309
};
281
310
@@ -322,7 +351,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
322
351
public:
323
352
void operator ()(const platform::DeviceContext& context, framework::Tensor& im,
324
353
const framework::Tensor& col, int stride_height,
325
- int stride_width, int padding_height, int padding_width) {
354
+ int stride_width, int padding_up, int padding_down,
355
+ int padding_left, int padding_right) {
326
356
PADDLE_ENFORCE (im.dims ().size () == 3 );
327
357
PADDLE_ENFORCE (col.dims ().size () == 5 );
328
358
int input_channels = im.dims ()[0 ];
@@ -333,6 +363,15 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
333
363
int output_height = col.dims ()[0 ];
334
364
int output_width = col.dims ()[1 ];
335
365
366
+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
367
+ stride_height +
368
+ 1 ==
369
+ output_height);
370
+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
371
+ stride_width +
372
+ 1 ==
373
+ output_width);
374
+
336
375
int block_dim_x = 0 ;
337
376
int block_dim_y = 0 ;
338
377
if (filter_height <= 4 && filter_width <= 4 ) {
@@ -357,8 +396,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
357
396
reinterpret_cast <const platform::CUDADeviceContext&>(context)
358
397
.stream()>>> (
359
398
im.data <T>(), col.data <T>(), input_channels, input_height, input_width,
360
- filter_height, filter_width, stride_height, stride_width,
361
- padding_height, padding_width , output_height, output_width);
399
+ filter_height, filter_width, stride_height, stride_width, padding_up,
400
+ padding_left , output_height, output_width);
362
401
}
363
402
};
364
403
0 commit comments