Skip to content

Commit c5774e3

Browse files
committed
add FLAGS_use_deterministic_algo
1 parent 2f53cd0 commit c5774e3

File tree

2 files changed

+38
-19
lines changed

2 files changed

+38
-19
lines changed

paddle/fluid/operators/conv_cudnn_op.cu.cc

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ limitations under the License. */
2020
#include "paddle/fluid/platform/cudnn_helper.h"
2121
#include "paddle/fluid/platform/float16.h"
2222

23+
DEFINE_bool(cudnn_algo_use_autotune, true,
24+
"Whether allow using an autotuning algorithm for convolution "
25+
"operator. The autotuning algorithm may be non-deterministic. If "
26+
"false, the algorithm is deterministic.");
27+
2328
namespace paddle {
2429
namespace operators {
2530

@@ -267,17 +272,23 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
267272
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
268273
auto handle = dev_ctx.cudnn_handle();
269274
if (input_grad) {
270-
PADDLE_ENFORCE(
271-
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
272-
handle, cudnn_filter_desc,
273-
// dyDesc: Handle to the previously initialized input differential
274-
// tensor descriptor.
275-
cudnn_output_grad_desc, cudnn_conv_desc,
276-
// dxDesc: Handle to the previously initialized output tensor
277-
// descriptor.
278-
cudnn_input_desc,
279-
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
280-
workspace_size_limit, &data_algo));
275+
if (FLAGS_cudnn_algo_use_autotune) {
276+
PADDLE_ENFORCE(
277+
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
278+
handle, cudnn_filter_desc,
279+
// dyDesc: Handle to the previously initialized input
280+
// differential
281+
// tensor descriptor.
282+
cudnn_output_grad_desc, cudnn_conv_desc,
283+
// dxDesc: Handle to the previously initialized output tensor
284+
// descriptor.
285+
cudnn_input_desc,
286+
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
287+
workspace_size_limit, &data_algo));
288+
} else {
289+
data_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
290+
}
291+
281292
PADDLE_ENFORCE(
282293
platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
283294
handle, cudnn_filter_desc, cudnn_output_grad_desc,
@@ -286,12 +297,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
286297
}
287298

288299
if (filter_grad) {
289-
PADDLE_ENFORCE(
290-
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
291-
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
292-
cudnn_filter_desc,
293-
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
294-
workspace_size_limit, &filter_algo));
300+
if (FLAGS_cudnn_algo_use_autotune) {
301+
PADDLE_ENFORCE(
302+
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
303+
handle, cudnn_input_desc, cudnn_output_grad_desc,
304+
cudnn_conv_desc, cudnn_filter_desc,
305+
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
306+
workspace_size_limit, &filter_algo));
307+
} else {
308+
filter_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
309+
}
295310

296311
PADDLE_ENFORCE(
297312
platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(

python/paddle/fluid/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,12 @@ def __bootstrap__():
107107
os.environ['OMP_NUM_THREADS'] = str(num_threads)
108108

109109
read_env_flags = [
110-
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
111-
'eager_delete_scope'
110+
'use_pinned_memory',
111+
'check_nan_inf',
112+
'benchmark',
113+
'warpctc_dir',
114+
'eager_delete_scope',
115+
'cudnn_algo_use_autotune',
112116
]
113117
if core.is_compiled_with_cuda():
114118
read_env_flags += ['fraction_of_gpu_memory_to_use']

0 commit comments

Comments
 (0)