Skip to content

Commit f61dfee

Browse files
author
chengduo
authored
Merge pull request #10263 from chengduoZH/add_FLAGS_use_deterministic_algo
Add FLAGS_cudnn_algo_use_autotune
2 parents 4434f8b + 07e46cc commit f61dfee

File tree

3 files changed

+36
-18
lines changed

3 files changed

+36
-18
lines changed

paddle/fluid/framework/details/scale_loss_grad_op_handle.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ void ScaleLossGradOpHandle::RunImpl() {
4646
->stream();
4747
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
4848
platform::CPUPlace(), &coeff_, sizeof(float), stream);
49+
VLOG(1) << place_ << "RUN Scale loss grad op";
4950
});
5051
#endif
5152
}

paddle/fluid/operators/conv_cudnn_op.cu.cc

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ limitations under the License. */
2020
#include "paddle/fluid/platform/cudnn_helper.h"
2121
#include "paddle/fluid/platform/float16.h"
2222

23+
DEFINE_bool(cudnn_algo_use_autotune, true,
24+
"Whether allow using an autotuning algorithm for convolution "
25+
"operator. The autotuning algorithm may be non-deterministic. If "
26+
"false, the algorithm is deterministic.");
27+
2328
namespace paddle {
2429
namespace operators {
2530

@@ -267,17 +272,23 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
267272
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
268273
auto handle = dev_ctx.cudnn_handle();
269274
if (input_grad) {
270-
PADDLE_ENFORCE(
271-
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
272-
handle, cudnn_filter_desc,
273-
// dyDesc: Handle to the previously initialized input differential
274-
// tensor descriptor.
275-
cudnn_output_grad_desc, cudnn_conv_desc,
276-
// dxDesc: Handle to the previously initialized output tensor
277-
// descriptor.
278-
cudnn_input_desc,
279-
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
280-
workspace_size_limit, &data_algo));
275+
if (FLAGS_cudnn_algo_use_autotune) {
276+
PADDLE_ENFORCE(
277+
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
278+
handle, cudnn_filter_desc,
279+
// dyDesc: Handle to the previously initialized input
280+
// differential
281+
// tensor descriptor.
282+
cudnn_output_grad_desc, cudnn_conv_desc,
283+
// dxDesc: Handle to the previously initialized output tensor
284+
// descriptor.
285+
cudnn_input_desc,
286+
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
287+
workspace_size_limit, &data_algo));
288+
} else {
289+
data_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
290+
}
291+
281292
PADDLE_ENFORCE(
282293
platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
283294
handle, cudnn_filter_desc, cudnn_output_grad_desc,
@@ -286,12 +297,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
286297
}
287298

288299
if (filter_grad) {
289-
PADDLE_ENFORCE(
290-
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
291-
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
292-
cudnn_filter_desc,
293-
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
294-
workspace_size_limit, &filter_algo));
300+
if (FLAGS_cudnn_algo_use_autotune) {
301+
PADDLE_ENFORCE(
302+
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
303+
handle, cudnn_input_desc, cudnn_output_grad_desc,
304+
cudnn_conv_desc, cudnn_filter_desc,
305+
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
306+
workspace_size_limit, &filter_algo));
307+
} else {
308+
filter_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
309+
}
295310

296311
PADDLE_ENFORCE(
297312
platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(

python/paddle/fluid/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def __bootstrap__():
111111
'eager_delete_scope'
112112
]
113113
if core.is_compiled_with_cuda():
114-
read_env_flags += ['fraction_of_gpu_memory_to_use']
114+
read_env_flags += [
115+
'fraction_of_gpu_memory_to_use', 'cudnn_algo_use_autotune'
116+
]
115117
core.init_gflags([sys.argv[0]] +
116118
["--tryfromenv=" + ",".join(read_env_flags)])
117119
core.init_glog(sys.argv[0])

0 commit comments

Comments
 (0)