Skip to content

Commit 73fcfc0

Browse files
authored
refine conv cudnn enforce (#12353)
* refine conv cudnn enforce * update * update all cudnn ops * fix
1 parent 91b70da commit 73fcfc0

File tree

4 files changed

+26
-26
lines changed

4 files changed

+26
-26
lines changed

paddle/fluid/operators/conv_cudnn_op.cu.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
7777
// cudnn 7 can support groups, no need to do it mannually
7878
// FIXME(typhoonzero): find a better way to disable groups
7979
// rather than setting it to 1.
80-
PADDLE_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount(
80+
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount(
8181
cudnn_conv_desc, groups));
8282
groups = 1;
8383
#endif
@@ -129,7 +129,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
129129
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
130130
auto handle = dev_ctx.cudnn_handle();
131131

132-
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
132+
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
133133
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
134134
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
135135
workspace_size_limit, &algo));
@@ -140,18 +140,18 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
140140
if (dev_ctx.GetComputeCapability() >= 70 &&
141141
std::type_index(typeid(T)) ==
142142
std::type_index(typeid(platform::float16))) {
143-
PADDLE_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
143+
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
144144
cudnn_conv_desc, CUDNN_TENSOR_OP_MATH));
145145
// Currently tensor core is only enabled using this algo
146146
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
147147
} else {
148-
PADDLE_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
148+
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
149149
cudnn_conv_desc, CUDNN_DEFAULT_MATH));
150150
}
151151
#endif
152152

153153
// get workspace size able to allocate
154-
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
154+
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
155155
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
156156
cudnn_output_desc, algo, &workspace_size_in_bytes));
157157
// It is possible for float16 on Volta GPU to allocate more memory than
@@ -165,7 +165,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
165165
// ------------------- cudnn conv forward ---------------------
166166
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
167167
for (int i = 0; i < groups; i++) {
168-
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
168+
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward(
169169
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
170170
cudnn_filter_desc, filter_data + i * group_offset_filter,
171171
cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes,
@@ -218,7 +218,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
218218
// cudnn 7 can support groups, no need to do it mannually
219219
// FIXME(typhoonzero): find a better way to disable groups
220220
// rather than setting it to 1.
221-
PADDLE_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount(
221+
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount(
222222
cudnn_conv_desc, groups));
223223
groups = 1;
224224
#endif
@@ -273,7 +273,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
273273
auto handle = dev_ctx.cudnn_handle();
274274
if (input_grad) {
275275
if (FLAGS_cudnn_deterministic) {
276-
PADDLE_ENFORCE(
276+
CUDNN_ENFORCE(
277277
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
278278
handle, cudnn_filter_desc,
279279
// dyDesc: Handle to the previously initialized input
@@ -289,7 +289,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
289289
data_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
290290
}
291291

292-
PADDLE_ENFORCE(
292+
CUDNN_ENFORCE(
293293
platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
294294
handle, cudnn_filter_desc, cudnn_output_grad_desc,
295295
cudnn_conv_desc, cudnn_input_desc, data_algo, &tmp_size));
@@ -298,7 +298,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
298298

299299
if (filter_grad) {
300300
if (FLAGS_cudnn_deterministic) {
301-
PADDLE_ENFORCE(
301+
CUDNN_ENFORCE(
302302
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
303303
handle, cudnn_input_desc, cudnn_output_grad_desc,
304304
cudnn_conv_desc, cudnn_filter_desc,
@@ -308,7 +308,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
308308
filter_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
309309
}
310310

311-
PADDLE_ENFORCE(
311+
CUDNN_ENFORCE(
312312
platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
313313
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
314314
cudnn_filter_desc, filter_algo, &tmp_size));
@@ -326,7 +326,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
326326
// Because beta is zero, it is unnecessary to reset input_grad.
327327

328328
for (int i = 0; i < groups; i++) {
329-
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
329+
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
330330
handle, &alpha, cudnn_filter_desc,
331331
filter_data + i * group_offset_filter, cudnn_output_grad_desc,
332332
output_grad_data + i * group_offset_out, cudnn_conv_desc, data_algo,
@@ -339,7 +339,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
339339
T* filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
340340
// Because beta is zero, it is unnecessary to reset filter_grad.
341341
for (int i = 0; i < groups; i++) {
342-
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
342+
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
343343
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
344344
cudnn_output_grad_desc, output_grad_data + i * group_offset_out,
345345
cudnn_conv_desc, filter_algo, cudnn_workspace,

paddle/fluid/operators/conv_transpose_cudnn_op.cu.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,15 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
8787
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
8888
auto handle = dev_ctx.cudnn_handle();
8989
// Get the algorithm
90-
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
90+
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
9191
handle, cudnn_filter_desc, cudnn_input_desc, cudnn_conv_desc,
9292
// dxDesc: Handle to the previously initialized output tensor
9393
// descriptor.
9494
cudnn_output_desc, CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
9595
workspace_size_limit, &algo));
9696

9797
// get workspace size able to allocate
98-
PADDLE_ENFORCE(
98+
CUDNN_ENFORCE(
9999
platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
100100
handle, cudnn_filter_desc, cudnn_input_desc, cudnn_conv_desc,
101101
cudnn_output_desc, algo, &workspace_size_in_bytes));
@@ -110,7 +110,7 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
110110
int filter_offset = filter->numel() / groups;
111111
T alpha = 1.0f, beta = 0.0f;
112112
for (int g = 0; g < groups; g++) {
113-
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
113+
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
114114
handle, &alpha, cudnn_filter_desc, filter_data + filter_offset * g,
115115
cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc,
116116
algo, cudnn_workspace, workspace_size_in_bytes, &beta,
@@ -178,27 +178,27 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
178178
auto handle = dev_ctx.cudnn_handle();
179179
if (input_grad) {
180180
// choose backward algorithm for data
181-
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
181+
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
182182
handle, cudnn_output_desc, cudnn_filter_desc, cudnn_conv_desc,
183183
cudnn_input_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
184184
workspace_size_limit, &data_algo));
185-
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
185+
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
186186
handle, cudnn_output_desc, cudnn_filter_desc, cudnn_conv_desc,
187187
cudnn_input_desc, data_algo, &fwd_ws_size));
188188
workspace_size_in_bytes = std::max(workspace_size_in_bytes, fwd_ws_size);
189189
}
190190

191191
if (filter_grad) {
192192
// choose backward algorithm for filter
193-
PADDLE_ENFORCE(
193+
CUDNN_ENFORCE(
194194
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
195195
handle, cudnn_output_desc, cudnn_input_desc, cudnn_conv_desc,
196196
cudnn_filter_desc,
197197
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
198198
workspace_size_limit, &filter_algo));
199199

200200
// get workspace for backwards filter algorithm
201-
PADDLE_ENFORCE(
201+
CUDNN_ENFORCE(
202202
platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
203203
handle, cudnn_output_desc, cudnn_input_desc, cudnn_conv_desc,
204204
cudnn_filter_desc, filter_algo, &bwd_filter_ws_size));
@@ -222,7 +222,7 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
222222
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
223223
// Because beta is zero, it is unnecessary to reset input_grad.
224224
for (int g = 0; g < groups; g++) {
225-
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
225+
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward(
226226
handle, &alpha, cudnn_output_desc,
227227
output_grad_data + output_grad_offset * g, cudnn_filter_desc,
228228
filter_data + filter_offset * g, cudnn_conv_desc, data_algo,
@@ -237,7 +237,7 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
237237
// Because beta is zero, it is unnecessary to reset filter_grad.
238238
// Gradient with respect to the filter
239239
for (int g = 0; g < groups; g++) {
240-
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
240+
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
241241
handle, &alpha, cudnn_output_desc,
242242
output_grad_data + output_grad_offset * g, cudnn_input_desc,
243243
input_data + input_offset * g, cudnn_conv_desc, filter_algo,

paddle/fluid/operators/math/softmax.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void SoftmaxCUDNNFunctor<T>::operator()(
5252
xDesc.descriptor<T>(layout, cudnn_tensor_dims);
5353
cudnnTensorDescriptor_t cudnn_y_desc =
5454
xDesc.descriptor<T>(layout, cudnn_tensor_dims);
55-
PADDLE_ENFORCE(platform::dynload::cudnnSoftmaxForward(
55+
CUDNN_ENFORCE(platform::dynload::cudnnSoftmaxForward(
5656
context.cudnn_handle(), CUDNN_SOFTMAX_ACCURATE,
5757
CUDNN_SOFTMAX_MODE_INSTANCE, CudnnDataType<T>::kOne(), cudnn_x_desc,
5858
X->data<T>(), CudnnDataType<T>::kZero(), cudnn_y_desc,
@@ -83,7 +83,7 @@ void SoftmaxGradCUDNNFunctor<T>::operator()(
8383
dxDesc.descriptor<T>(layout, cudnn_tensor_dims);
8484
cudnnTensorDescriptor_t cudnn_ygrad_desc =
8585
dyDesc.descriptor<T>(layout, cudnn_tensor_dims);
86-
PADDLE_ENFORCE(platform::dynload::cudnnSoftmaxBackward(
86+
CUDNN_ENFORCE(platform::dynload::cudnnSoftmaxBackward(
8787
context.cudnn_handle(), CUDNN_SOFTMAX_ACCURATE,
8888
CUDNN_SOFTMAX_MODE_INSTANCE, CudnnDataType<T>::kOne(), cudnn_y_desc,
8989
Y->data<T>(), cudnn_ygrad_desc, YGrad->data<T>(),

paddle/fluid/operators/pool_cudnn_op.cu.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel<T> {
8181
// ------------------- cudnn pool algorithm ---------------------
8282
auto handle = ctx.cuda_device_context().cudnn_handle();
8383
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
84-
PADDLE_ENFORCE(platform::dynload::cudnnPoolingForward(
84+
CUDNN_ENFORCE(platform::dynload::cudnnPoolingForward(
8585
handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,
8686
cudnn_output_desc, output_data));
8787
}
@@ -154,7 +154,7 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
154154
T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
155155
// Because beta is zero, it is unnecessary to reset input_grad.
156156

157-
PADDLE_ENFORCE(platform::dynload::cudnnPoolingBackward(
157+
CUDNN_ENFORCE(platform::dynload::cudnnPoolingBackward(
158158
handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data,
159159
cudnn_output_desc, output_grad_data, cudnn_input_desc, input_data,
160160
&beta, cudnn_input_desc, input_grad_data));

0 commit comments

Comments
 (0)