@@ -77,7 +77,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
7777 // cudnn 7 can support groups, no need to do it mannually
7878 // FIXME(typhoonzero): find a better way to disable groups
7979 // rather than setting it to 1.
80- PADDLE_ENFORCE (platform::dynload::cudnnSetConvolutionGroupCount (
80+ CUDNN_ENFORCE (platform::dynload::cudnnSetConvolutionGroupCount (
8181 cudnn_conv_desc, groups));
8282 groups = 1 ;
8383#endif
@@ -129,7 +129,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
129129 auto & dev_ctx = ctx.template device_context <platform::CUDADeviceContext>();
130130 auto handle = dev_ctx.cudnn_handle ();
131131
132- PADDLE_ENFORCE (platform::dynload::cudnnGetConvolutionForwardAlgorithm (
132+ CUDNN_ENFORCE (platform::dynload::cudnnGetConvolutionForwardAlgorithm (
133133 handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
134134 cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
135135 workspace_size_limit, &algo));
@@ -140,18 +140,18 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
140140 if (dev_ctx.GetComputeCapability () >= 70 &&
141141 std::type_index (typeid (T)) ==
142142 std::type_index (typeid (platform::float16))) {
143- PADDLE_ENFORCE (platform::dynload::cudnnSetConvolutionMathType (
143+ CUDNN_ENFORCE (platform::dynload::cudnnSetConvolutionMathType (
144144 cudnn_conv_desc, CUDNN_TENSOR_OP_MATH));
145145 // Currently tensor core is only enabled using this algo
146146 algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
147147 } else {
148- PADDLE_ENFORCE (platform::dynload::cudnnSetConvolutionMathType (
148+ CUDNN_ENFORCE (platform::dynload::cudnnSetConvolutionMathType (
149149 cudnn_conv_desc, CUDNN_DEFAULT_MATH));
150150 }
151151#endif
152152
153153 // get workspace size able to allocate
154- PADDLE_ENFORCE (platform::dynload::cudnnGetConvolutionForwardWorkspaceSize (
154+ CUDNN_ENFORCE (platform::dynload::cudnnGetConvolutionForwardWorkspaceSize (
155155 handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
156156 cudnn_output_desc, algo, &workspace_size_in_bytes));
157157 // It is possible for float16 on Volta GPU to allocate more memory than
@@ -165,7 +165,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
165165 // ------------------- cudnn conv forward ---------------------
166166 ScalingParamType<T> alpha = 1 .0f , beta = 0 .0f ;
167167 for (int i = 0 ; i < groups; i++) {
168- PADDLE_ENFORCE (platform::dynload::cudnnConvolutionForward (
168+ CUDNN_ENFORCE (platform::dynload::cudnnConvolutionForward (
169169 handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
170170 cudnn_filter_desc, filter_data + i * group_offset_filter,
171171 cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes,
@@ -218,7 +218,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
218218 // cudnn 7 can support groups, no need to do it mannually
219219 // FIXME(typhoonzero): find a better way to disable groups
220220 // rather than setting it to 1.
221- PADDLE_ENFORCE (platform::dynload::cudnnSetConvolutionGroupCount (
221+ CUDNN_ENFORCE (platform::dynload::cudnnSetConvolutionGroupCount (
222222 cudnn_conv_desc, groups));
223223 groups = 1 ;
224224#endif
@@ -273,7 +273,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
273273 auto handle = dev_ctx.cudnn_handle ();
274274 if (input_grad) {
275275 if (FLAGS_cudnn_deterministic) {
276- PADDLE_ENFORCE (
276+ CUDNN_ENFORCE (
277277 platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm (
278278 handle, cudnn_filter_desc,
279279 // dyDesc: Handle to the previously initialized input
@@ -289,7 +289,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
289289 data_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
290290 }
291291
292- PADDLE_ENFORCE (
292+ CUDNN_ENFORCE (
293293 platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize (
294294 handle, cudnn_filter_desc, cudnn_output_grad_desc,
295295 cudnn_conv_desc, cudnn_input_desc, data_algo, &tmp_size));
@@ -298,7 +298,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
298298
299299 if (filter_grad) {
300300 if (FLAGS_cudnn_deterministic) {
301- PADDLE_ENFORCE (
301+ CUDNN_ENFORCE (
302302 platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm (
303303 handle, cudnn_input_desc, cudnn_output_grad_desc,
304304 cudnn_conv_desc, cudnn_filter_desc,
@@ -308,7 +308,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
308308 filter_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
309309 }
310310
311- PADDLE_ENFORCE (
311+ CUDNN_ENFORCE (
312312 platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize (
313313 handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
314314 cudnn_filter_desc, filter_algo, &tmp_size));
@@ -326,7 +326,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
326326 // Because beta is zero, it is unnecessary to reset input_grad.
327327
328328 for (int i = 0 ; i < groups; i++) {
329- PADDLE_ENFORCE (platform::dynload::cudnnConvolutionBackwardData (
329+ CUDNN_ENFORCE (platform::dynload::cudnnConvolutionBackwardData (
330330 handle, &alpha, cudnn_filter_desc,
331331 filter_data + i * group_offset_filter, cudnn_output_grad_desc,
332332 output_grad_data + i * group_offset_out, cudnn_conv_desc, data_algo,
@@ -339,7 +339,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
339339 T* filter_grad_data = filter_grad->mutable_data <T>(ctx.GetPlace ());
340340 // Because beta is zero, it is unnecessary to reset filter_grad.
341341 for (int i = 0 ; i < groups; i++) {
342- PADDLE_ENFORCE (platform::dynload::cudnnConvolutionBackwardFilter (
342+ CUDNN_ENFORCE (platform::dynload::cudnnConvolutionBackwardFilter (
343343 handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
344344 cudnn_output_grad_desc, output_grad_data + i * group_offset_out,
345345 cudnn_conv_desc, filter_algo, cudnn_workspace,
0 commit comments