@@ -620,7 +620,23 @@ All parameter, weight, gradient are variables in Paddle.
620
620
621
621
// -- python binds for parallel executor.
622
622
py::class_<ParallelExecutor> pe (m, " ParallelExecutor" );
623
- py::class_<ExecutionStrategy> exec_strategy (pe, " ExecutionStrategy" );
623
+ py::class_<ExecutionStrategy> exec_strategy (pe, " ExecutionStrategy" , R"DOC(
624
+ ExecutionStrategy allows the user to more preciously control how to run
625
+ the program in ParallelExecutor by setting the property.
626
+
627
+ The available properties include:
628
+ use_cuda (bool): Whether to use CUDA or not. Default True.
629
+ num_threads (int): The number of threads that used to run the
630
+ operators in ParallelExecutor. If it is not set, it will be
631
+ set in ParallelExecutor according to the device count.
632
+ Default 0.
633
+ allow_op_delay (bool): Whether to delay the communication operators
634
+ to run. Default False.
635
+ num_iteration_per_drop_scope (int): how many iterations between
636
+ the two dropping local scopes. Default 100.
637
+
638
+ )DOC" );
639
+
624
640
exec_strategy.def (py::init ())
625
641
.def_property (
626
642
" num_threads" ,
@@ -658,7 +674,25 @@ All parameter, weight, gradient are variables in Paddle.
658
674
: ExecutionStrategy::kDefault ;
659
675
});
660
676
661
- py::class_<BuildStrategy> build_strategy (pe, " BuildStrategy" );
677
+ py::class_<BuildStrategy> build_strategy (pe, " BuildStrategy" , R"DOC(
678
+ BuildStrategy allows the user to more preciously control how to
679
+ build the SSA Graph in ParallelExecutor by setting the property.
680
+
681
+ The available properties include:
682
+ reduce_strategy (str): There are two reduce strategies, 'AllReduce'
683
+ and 'Reduce'. If you want that all parameters will be optimized
684
+ on all devices, you can choose 'AllReduce'; if you choose
685
+ 'Reduce', all parameters will be evenly allocated to different
686
+ devices for optimization, and then broadcast the optimized
687
+ parameter to other devices. Default 'AllReduce'.
688
+ gradient_scale_strategy (str): There are two ways of defining loss@grad,
689
+ 'CoeffNumDevice' and 'Customized'. By default, ParallelExecutor
690
+ sets the loss@grad according to the number of devices. If you want
691
+ to customize loss@grad, you can choose 'Customized'.
692
+ Default 'CoeffNumDevice'.
693
+ debug_graphviz_path (str): Whether to write the SSA Graph to file in the
694
+ form of graphviz. It is useful for debugging. Default "".
695
+ )DOC" );
662
696
663
697
py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, " ReduceStrategy" )
664
698
.value (" Reduce" , BuildStrategy::ReduceStrategy::kReduce )
0 commit comments