@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
See the License for the specific language governing permissions and
13
13
limitations under the License. */
14
14
15
- #include < algorithm>
16
- #include < vector>
17
15
#include " paddle/fluid/operators/math/pooling.h"
18
16
#include " paddle/fluid/platform/cuda_primitives.h"
19
17
@@ -22,7 +20,7 @@ namespace operators {
22
20
namespace math {
23
21
24
22
template <typename PoolProcess, typename T>
25
- __global__ void KernelPool2D (const int nthreads, const T* input_data, // NOLINT
23
+ __global__ void KernelPool2D (const int nthreads, const T* input_data,
26
24
const int channels, const int input_height,
27
25
const int input_width, const int output_height,
28
26
const int output_width, const int ksize_height,
@@ -60,8 +58,8 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, // NOLINT
60
58
61
59
template <typename PoolProcess, typename T>
62
60
__global__ void KernelPool2DGrad (
63
- const int nthreads, const T* input_data, const T* output_data, // NOLINT
64
- const T* output_grad, const int channels, const int input_height, // NOLINT
61
+ const int nthreads, const T* input_data, const T* output_data,
62
+ const T* output_grad, const int channels, const int input_height,
65
63
const int input_width, const int output_height, const int output_width,
66
64
const int ksize_height, const int ksize_width, const int stride_height,
67
65
const int stride_width, const int padding_height, const int padding_width,
@@ -108,8 +106,8 @@ __global__ void KernelPool2DGrad(
108
106
109
107
template <typename T>
110
108
__global__ void KernelMaxPool2DGrad (
111
- const int nthreads, const T* input_data, const T* output_data, // NOLINT
112
- const T* output_grad, const int channels, const int input_height, // NOLINT
109
+ const int nthreads, const T* input_data, const T* output_data,
110
+ const T* output_grad, const int channels, const int input_height,
113
111
const int input_width, const int output_height, const int output_width,
114
112
const int ksize_height, const int ksize_width, const int stride_height,
115
113
const int stride_width, const int padding_height, const int padding_width,
@@ -160,10 +158,8 @@ template <typename PoolProcess, typename T>
160
158
class Pool2dFunctor <platform::CUDADeviceContext, PoolProcess, T> {
161
159
public:
162
160
void operator ()(const platform::CUDADeviceContext& context,
163
- const framework::Tensor& input,
164
- std::vector<int >& ksize, // NOLINT
165
- std::vector<int >& strides, // NOLINT
166
- std::vector<int >& paddings, // NOLINT
161
+ const framework::Tensor& input, std::vector<int >& ksize,
162
+ std::vector<int >& strides, std::vector<int >& paddings,
167
163
PoolProcess pool_process, framework::Tensor* output) {
168
164
const int batch_size = input.dims ()[0 ];
169
165
const int input_channels = input.dims ()[1 ];
@@ -205,10 +201,8 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
205
201
void operator ()(const platform::CUDADeviceContext& context,
206
202
const framework::Tensor& input,
207
203
const framework::Tensor& output,
208
- const framework::Tensor& output_grad,
209
- std::vector<int >& ksize, // NOLINT
210
- std::vector<int >& strides, // NOLINT
211
- std::vector<int >& paddings, // NOLINT
204
+ const framework::Tensor& output_grad, std::vector<int >& ksize,
205
+ std::vector<int >& strides, std::vector<int >& paddings,
212
206
PoolProcess pool_process, framework::Tensor* input_grad) {
213
207
const int batch_size = input.dims ()[0 ];
214
208
const int input_channels = input.dims ()[1 ];
@@ -252,10 +246,8 @@ class MaxPool2dGradFunctor<platform::CUDADeviceContext, T> {
252
246
void operator ()(const platform::CUDADeviceContext& context,
253
247
const framework::Tensor& input,
254
248
const framework::Tensor& output,
255
- const framework::Tensor& output_grad,
256
- std::vector<int >& ksize, // NOLINT
257
- std::vector<int >& strides, // NOLINT
258
- std::vector<int >& paddings, // NOLINT
249
+ const framework::Tensor& output_grad, std::vector<int >& ksize,
250
+ std::vector<int >& strides, std::vector<int >& paddings,
259
251
framework::Tensor* input_grad) {
260
252
const int batch_size = input.dims ()[0 ];
261
253
const int input_channels = input.dims ()[1 ];
@@ -314,7 +306,7 @@ template class Pool2dGradFunctor<platform::CUDADeviceContext,
314
306
double >;
315
307
316
308
template <typename PoolProcess, typename T>
317
- __global__ void KernelPool3D (const int nthreads, const T* input_data, // NOLINT
309
+ __global__ void KernelPool3D (const int nthreads, const T* input_data,
318
310
const int channels, const int input_depth,
319
311
const int input_height, const int input_width,
320
312
const int output_depth, const int output_height,
@@ -360,8 +352,8 @@ __global__ void KernelPool3D(const int nthreads, const T* input_data, // NOLINT
360
352
361
353
template <typename PoolProcess, typename T>
362
354
__global__ void KernelPool3DGrad (
363
- const int nthreads, const T* input_data, const T* output_data, // NOLINT
364
- const T* output_grad, const int channels, const int input_depth, // NOLINT
355
+ const int nthreads, const T* input_data, const T* output_data,
356
+ const T* output_grad, const int channels, const int input_depth,
365
357
const int input_height, const int input_width, const int output_depth,
366
358
const int output_height, const int output_width, const int ksize_depth,
367
359
const int ksize_height, const int ksize_width, const int stride_depth,
@@ -424,8 +416,8 @@ __global__ void KernelPool3DGrad(
424
416
425
417
template <typename T>
426
418
__global__ void KernelMaxPool3DGrad (
427
- const int nthreads, const T* input_data, const T* output_data, // NOLINT
428
- const T* output_grad, const int channels, const int input_depth, // NOLINT
419
+ const int nthreads, const T* input_data, const T* output_data,
420
+ const T* output_grad, const int channels, const int input_depth,
429
421
const int input_height, const int input_width, const int output_depth,
430
422
const int output_height, const int output_width, const int ksize_depth,
431
423
const int ksize_height, const int ksize_width, const int stride_depth,
@@ -482,10 +474,8 @@ template <typename PoolProcess, class T>
482
474
class Pool3dFunctor <platform::CUDADeviceContext, PoolProcess, T> {
483
475
public:
484
476
void operator ()(const platform::CUDADeviceContext& context,
485
- const framework::Tensor& input,
486
- std::vector<int >& ksize, // NOLINT
487
- std::vector<int >& strides, // NOLINT
488
- std::vector<int >& paddings, // NOLINT
477
+ const framework::Tensor& input, std::vector<int >& ksize,
478
+ std::vector<int >& strides, std::vector<int >& paddings,
489
479
PoolProcess pool_process, framework::Tensor* output) {
490
480
const int batch_size = input.dims ()[0 ];
491
481
const int input_channels = input.dims ()[1 ];
@@ -535,10 +525,8 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
535
525
void operator ()(const platform::CUDADeviceContext& context,
536
526
const framework::Tensor& input,
537
527
const framework::Tensor& output,
538
- const framework::Tensor& output_grad,
539
- std::vector<int >& ksize, // NOLINT
540
- std::vector<int >& strides, // NOLINT
541
- std::vector<int >& paddings, // NOLINT
528
+ const framework::Tensor& output_grad, std::vector<int >& ksize,
529
+ std::vector<int >& strides, std::vector<int >& paddings,
542
530
PoolProcess pool_process, framework::Tensor* input_grad) {
543
531
const int batch_size = input.dims ()[0 ];
544
532
const int input_channels = input.dims ()[1 ];
@@ -590,10 +578,8 @@ class MaxPool3dGradFunctor<platform::CUDADeviceContext, T> {
590
578
void operator ()(const platform::CUDADeviceContext& context,
591
579
const framework::Tensor& input,
592
580
const framework::Tensor& output,
593
- const framework::Tensor& output_grad,
594
- std::vector<int >& ksize, // NOLINT
595
- std::vector<int >& strides, // NOLINT
596
- std::vector<int >& paddings, // NOLINT
581
+ const framework::Tensor& output_grad, std::vector<int >& ksize,
582
+ std::vector<int >& strides, std::vector<int >& paddings,
597
583
framework::Tensor* input_grad) {
598
584
const int batch_size = input.dims ()[0 ];
599
585
const int input_channels = input.dims ()[1 ];
@@ -750,10 +736,8 @@ template <typename T1, typename T2>
750
736
class MaxPool2dWithIndexFunctor <platform::CUDADeviceContext, T1, T2> {
751
737
public:
752
738
void operator ()(const platform::CUDADeviceContext& context,
753
- const framework::Tensor& input,
754
- std::vector<int >& ksize, // NOLINT
755
- std::vector<int >& strides, // NOLINT
756
- std::vector<int >& paddings, // NOLINT
739
+ const framework::Tensor& input, std::vector<int >& ksize,
740
+ std::vector<int >& strides, std::vector<int >& paddings,
757
741
framework::Tensor* output, framework::Tensor* mask) {
758
742
const int batch_size = input.dims ()[0 ];
759
743
const int input_channels = input.dims ()[1 ];
@@ -795,10 +779,8 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
795
779
public:
796
780
void operator ()(const platform::CUDADeviceContext& context,
797
781
const framework::Tensor& output_grad,
798
- const framework::Tensor& mask,
799
- std::vector<int >& ksize, // NOLINT
800
- std::vector<int >& strides, // NOLINT
801
- std::vector<int >& paddings, // NOLINT
782
+ const framework::Tensor& mask, std::vector<int >& ksize,
783
+ std::vector<int >& strides, std::vector<int >& paddings,
802
784
framework::Tensor* input_grad) {
803
785
const int batch_size = input_grad->dims ()[0 ];
804
786
const int input_channels = input_grad->dims ()[1 ];
@@ -955,10 +937,8 @@ template <typename T1, typename T2>
955
937
class MaxPool3dWithIndexFunctor <platform::CUDADeviceContext, T1, T2> {
956
938
public:
957
939
void operator ()(const platform::CUDADeviceContext& context,
958
- const framework::Tensor& input,
959
- std::vector<int >& ksize, // NOLINT
960
- std::vector<int >& strides, // NOLINT
961
- std::vector<int >& paddings, // NOLINT
940
+ const framework::Tensor& input, std::vector<int >& ksize,
941
+ std::vector<int >& strides, std::vector<int >& paddings,
962
942
framework::Tensor* output, framework::Tensor* mask) {
963
943
const int batch_size = input.dims ()[0 ];
964
944
const int input_channels = input.dims ()[1 ];
@@ -1007,10 +987,8 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
1007
987
public:
1008
988
void operator ()(const platform::CUDADeviceContext& context,
1009
989
const framework::Tensor& output_grad,
1010
- const framework::Tensor& mask,
1011
- std::vector<int >& ksize, // NOLINT
1012
- std::vector<int >& strides, // NOLINT
1013
- std::vector<int >& paddings, // NOLINT
990
+ const framework::Tensor& mask, std::vector<int >& ksize,
991
+ std::vector<int >& strides, std::vector<int >& paddings,
1014
992
framework::Tensor* input_grad) {
1015
993
const int batch_size = input_grad->dims ()[0 ];
1016
994
const int input_channels = input_grad->dims ()[1 ];
0 commit comments