Skip to content

Commit 55538c5

Browse files
authored
cherry-pick: update api format (#18413) (#18421)
1 parent 4988456 commit 55538c5

File tree

2 files changed

+73
-70
lines changed

2 files changed

+73
-70
lines changed

paddle/fluid/API.spec

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -874,14 +874,7 @@ paddle.fluid.optimizer.ExponentialMovingAverage.apply (ArgSpec(args=['self', 'ex
874874
paddle.fluid.optimizer.ExponentialMovingAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '8c8a1791608b02a1ede53d6dd3a4fcec'))
875875
paddle.fluid.optimizer.ExponentialMovingAverage.update (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ea10f08af6d7aac3b7974aa976e4085f'))
876876
paddle.fluid.optimizer.PipelineOptimizer.__init__ (ArgSpec(args=['self', 'optimizer', 'cut_list', 'place_list', 'concurrency_list', 'queue_size', 'sync_steps', 'start_cpu_core_id'], varargs=None, keywords=None, defaults=(None, None, None, 30, 1, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
877-
paddle.fluid.optimizer.PipelineOptimizer.create_vars (ArgSpec(args=['self', 'block', 'main_program'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
878-
paddle.fluid.optimizer.PipelineOptimizer.extract_section_ops (ArgSpec(args=['self', 'ops', 'cut_point_name'], varargs=None, keywords=None, defaults=None), ('document', '4a29be77da04b5c30dd7202f44c79b70'))
879-
paddle.fluid.optimizer.PipelineOptimizer.extract_section_opt_ops (ArgSpec(args=['self', 'ops', 'cut_point_name'], varargs=None, keywords=None, defaults=None), ('document', '99e0f641222c1ce4dd0d7194c3b2c653'))
880-
paddle.fluid.optimizer.PipelineOptimizer.find_input_output (ArgSpec(args=['self', 'ops', 'name', 'is_forward'], varargs=None, keywords=None, defaults=(True,)), ('document', '92d77fb262766b352746f09cca81db93'))
881-
paddle.fluid.optimizer.PipelineOptimizer.find_persistable_vars (ArgSpec(args=['self', 'ops', 'whole_parameters'], varargs=None, keywords=None, defaults=None), ('document', '877b7cc290f0647455e5e4409e825923'))
882-
paddle.fluid.optimizer.PipelineOptimizer.find_section_opt (ArgSpec(args=['self', 'ops', 'params'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
883877
paddle.fluid.optimizer.PipelineOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
884-
paddle.fluid.optimizer.PipelineOptimizer.split_program (ArgSpec(args=['self', 'main_program', 'cut_list'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
885878
paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '08a5dd9f6f376ff3d55e0b1d92115cbd'))
886879
paddle.fluid.backward.gradients (ArgSpec(args=['targets', 'inputs', 'target_gradients', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'e2097e1e0ed84ae44951437bfe269a1b'))
887880
paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))

python/paddle/fluid/optimizer.py

Lines changed: 73 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2650,57 +2650,67 @@ def restore(self, executor):
26502650
class PipelineOptimizer(object):
26512651
"""
26522652
Pipeline Optimizer
2653-
Train with pipeline mode. The program will be splited by cut_list.
2654-
If the len of cut_list is k, then the whole program (including
2655-
backward part) will be splited to 2*k-1 sections. So the length of place_list
2656-
and concurrency_list must be also 2*k-1.
2657-
Note: Though the asynchronous mode is applied in pipeline training to speed up,
2653+
2654+
Train with pipeline mode. The program will be splited by cut_list.
2655+
2656+
If the len of cut_list is k, then the whole program (including \
2657+
backward part) will be splited to 2*k-1 sections.
2658+
2659+
So the length of place_list and concurrency_list must be also 2*k-1.
2660+
2661+
Note: Though the asynchronous mode is applied in pipeline training to speed up, \
26582662
the final performance depends on the training progress of each pipeline heavily.
2659-
And we will try the synchronous mode in the future
2663+
2664+
And we will try the synchronous mode in the future.
2665+
26602666
Args:
2661-
optimizer (Optimizer): The based optimizer, such as SGD
2662-
cut_list (list of Variable list): The cut variable of the main_program
2663-
place_list (list of Place): The place where the section will run on
2664-
concurrency_list (list of int): The concurrency degree
2667+
optimizer (Optimizer): The based optimizer, such as SGD.
2668+
cut_list (list of Variable list): The cut variable of the main_program.
2669+
place_list (list of Place): The place where the section will run on.
2670+
concurrency_list (list of int): The concurrency degree.
26652671
queue_size (int): Each section will consume scopes from its in-scope queue
26662672
and produce scopes to out-scope queue. And this parameter
2667-
specify the scope queue size. [Optional. Default: 30]
2668-
sync_steps (int): The synchronization steps between different cards. [Optional. Default: 1]
2669-
start_cpu_core_id (int): specify the first cpu core id. [Optional. Default:0]
2673+
specify the scope queue size. [Optional. Default: 30].
2674+
sync_steps (int): The synchronization steps between different cards. [Optional. Default: 1].
2675+
start_cpu_core_id (int): specify the first cpu core id. [Optional. Default:0].
2676+
26702677
Examples:
26712678
.. code-block:: python
2672-
x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0)
2673-
y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0)
2674-
emb_x = layers.embedding(input=x, param_attr=fluid.ParamAttr(name="embx"), size=[10,2], is_sparse=False)
2675-
emb_y = layers.embedding(input=y, param_attr=fluid.ParamAttr(name="emby",learning_rate=0.9), size=[10,2], is_sparse=False)
2676-
concat = layers.concat([emb_x, emb_y], axis=1)
2677-
fc = layers.fc(input=concat, name="fc", size=1, num_flatten_dims=1, bias_attr=False)
2678-
loss = layers.reduce_mean(fc)
2679-
optimizer = fluid.optimizer.SGD(learning_rate=0.5)
2680-
optimizer = fluid.optimizer.PipelineOptimizer(optimizer,
2681-
cut_list=[[emb_x, emb_y], [loss]],
2682-
place_list=[fluid.CPUPlace(), fluid.CUDAPlace(0), fluid.CPUPlace()],
2683-
concurrency_list=[1, 1, 4],
2684-
queue_size=2,
2685-
sync_steps=1,
2686-
)
2687-
optimizer.minimize(loss)
2688-
place = fluid.CPUPlace()
2689-
exe = fluid.Executor(place)
2690-
exe.run(fluid.default_startup_program())
2691-
filelist = [] # you should set your own filelist, e.g. filelist = ["dataA.txt"]
2692-
dataset = fluid.DatasetFactory().create_dataset("FileInstantDataset")
2693-
dataset.set_use_var([x,y])
2694-
dataset.set_batch_size(batch_size)
2695-
dataset.set_filelist(filelist)
2696-
exe.train_from_dataset(
2697-
fluid.default_main_program(),
2698-
dataset,
2699-
thread=2,
2700-
debug=False,
2701-
fetch_list=[],
2702-
fetch_info=[],
2703-
print_period=1)
2679+
2680+
import paddle.fluid.layers as layers
2681+
2682+
x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0)
2683+
y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0)
2684+
emb_x = layers.embedding(input=x, param_attr=fluid.ParamAttr(name="embx"), size=[10,2], is_sparse=False)
2685+
emb_y = layers.embedding(input=y, param_attr=fluid.ParamAttr(name="emby",learning_rate=0.9), size=[10,2], is_sparse=False)
2686+
concat = layers.concat([emb_x, emb_y], axis=1)
2687+
fc = layers.fc(input=concat, name="fc", size=1, num_flatten_dims=1, bias_attr=False)
2688+
loss = layers.reduce_mean(fc)
2689+
optimizer = fluid.optimizer.SGD(learning_rate=0.5)
2690+
optimizer = fluid.optimizer.PipelineOptimizer(optimizer,
2691+
cut_list=[[emb_x, emb_y], [loss]],
2692+
place_list=[fluid.CPUPlace(), fluid.CUDAPlace(0), fluid.CPUPlace()],
2693+
concurrency_list=[1, 1, 4],
2694+
queue_size=2,
2695+
sync_steps=1,
2696+
)
2697+
optimizer.minimize(loss)
2698+
place = fluid.CPUPlace()
2699+
exe = fluid.Executor(place)
2700+
exe.run(fluid.default_startup_program())
2701+
filelist = [] # you should set your own filelist, e.g. filelist = ["dataA.txt"]
2702+
dataset = fluid.DatasetFactory().create_dataset("FileInstantDataset")
2703+
dataset.set_use_var([x,y])
2704+
dataset.set_batch_size(batch_size)
2705+
dataset.set_filelist(filelist)
2706+
exe.train_from_dataset(
2707+
fluid.default_main_program(),
2708+
dataset,
2709+
thread=2,
2710+
debug=False,
2711+
fetch_list=[],
2712+
fetch_info=[],
2713+
print_period=1)
27042714
"""
27052715

27062716
def __init__(self,
@@ -2720,7 +2730,7 @@ def __init__(self,
27202730
self._sync_steps = sync_steps
27212731
self._start_cpu_core_id = start_cpu_core_id
27222732

2723-
def create_vars(self, block, main_program):
2733+
def _create_vars(self, block, main_program):
27242734
used_var_set = set()
27252735
for op_idx in range(block.desc.op_size()):
27262736
op_desc = block.desc.op(op_idx)
@@ -2732,7 +2742,7 @@ def create_vars(self, block, main_program):
27322742
source_var = main_program.block(0).var(str(var))
27332743
block._clone_variable(source_var, False)
27342744

2735-
def extract_section_opt_ops(self, ops, cut_point_name):
2745+
def _extract_section_opt_ops(self, ops, cut_point_name):
27362746
"""
27372747
Extract opt ops in the given section
27382748
"""
@@ -2748,7 +2758,7 @@ def extract_section_opt_ops(self, ops, cut_point_name):
27482758
op_path = [ops[i] for i in range(len(ops)) if relevant_op_flags[i]]
27492759
return op_path
27502760

2751-
def find_input_output(self, ops, name, is_forward=True):
2761+
def _find_input_output(self, ops, name, is_forward=True):
27522762
"""
27532763
Find the inputs or outputs of a section
27542764
"""
@@ -2763,7 +2773,7 @@ def find_input_output(self, ops, name, is_forward=True):
27632773
all_set.update(op.desc.input_arg_names())
27642774
return all_set - part_set
27652775

2766-
def find_persistable_vars(self, ops, whole_parameters):
2776+
def _find_persistable_vars(self, ops, whole_parameters):
27672777
"""
27682778
find the persistable input vars in current section
27692779
"""
@@ -2791,7 +2801,7 @@ def _is_lr_role_op(self, op):
27912801
return True
27922802
return False
27932803

2794-
def extract_section_ops(self, ops, cut_point_name):
2804+
def _extract_section_ops(self, ops, cut_point_name):
27952805
"""
27962806
Extract ops in the given section
27972807
"""
@@ -2811,11 +2821,11 @@ def extract_section_ops(self, ops, cut_point_name):
28112821
op_path = [ops[i] for i in range(len(ops)) if relevant_op_flags[i]]
28122822
return op_path
28132823

2814-
def find_section_opt(self, ops, params):
2815-
res = self.extract_section_opt_ops(ops, params)
2824+
def _find_section_opt(self, ops, params):
2825+
res = self._extract_section_opt_ops(ops, params)
28162826
return res
28172827

2818-
def split_program(self, main_program, cut_list):
2828+
def _split_program(self, main_program, cut_list):
28192829
programs = []
28202830
block = main_program.block(0)
28212831
whole_parameters = [e.name for e in block.all_parameters()]
@@ -2836,24 +2846,24 @@ def split_program(self, main_program, cut_list):
28362846
"input_set": set(),
28372847
"output_set": set()
28382848
}
2839-
cur_ops = self.extract_section_ops(ops, cut_vars)
2849+
cur_ops = self._extract_section_ops(ops, cut_vars)
28402850
if i == 0:
28412851
for op in ops:
28422852
if self._is_lr_role_op(op):
28432853
cur_ops.append(op)
28442854
#prevent inplace in/out
28452855
program["input_set"].update(
2846-
self.find_input_output(
2856+
self._find_input_output(
28472857
cur_ops, [], is_forward=True))
28482858
for e in cur_ops:
28492859
ops.remove(e)
28502860

28512861
if i < cut_len:
28522862
sec_params.append(
2853-
self.find_persistable_vars(cur_ops, whole_parameters))
2863+
self._find_persistable_vars(cur_ops, whole_parameters))
28542864
if i >= cut_len - 1:
2855-
opt_ops = self.find_section_opt(ops,
2856-
sec_params[2 * cut_len - 2 - i])
2865+
opt_ops = self._find_section_opt(
2866+
ops, sec_params[2 * cut_len - 2 - i])
28572867

28582868
for e in opt_ops:
28592869
ops.remove(e)
@@ -2864,11 +2874,11 @@ def split_program(self, main_program, cut_list):
28642874
ap_op = program["program"].block(0).desc.append_op()
28652875
ap_op.copy_from(op_desc)
28662876
program["input_set"].update(
2867-
self.find_input_output(
2877+
self._find_input_output(
28682878
cur_ops, cut_vars, is_forward=True))
28692879
program["input_set"].update(sec_params[min(i, 2 * cut_len - 2 - i)])
28702880
program["output_set"].update(
2871-
self.find_input_output(
2881+
self._find_input_output(
28722882
cur_ops, cut_vars, is_forward=False))
28732883
programs.append(program)
28742884
program = {
@@ -2883,7 +2893,7 @@ def split_program(self, main_program, cut_list):
28832893
program["input_set"].update(
28842894
[cut_var.name + "@GRAD" for cut_var in cut_list[0]])
28852895
program["input_set"].update(
2886-
self.find_input_output(
2896+
self._find_input_output(
28872897
ops, [], is_forward=True))
28882898
program["input_set"].update(sec_params[0])
28892899
programs.append(program)
@@ -2904,9 +2914,9 @@ def minimize(self,
29042914
self._optimizer.minimize(loss, startup_program, parameter_list,
29052915
no_grad_set)
29062916
program = loss.block.program
2907-
program_list = self.split_program(program, self._cut_list)
2917+
program_list = self._split_program(program, self._cut_list)
29082918
for p in program_list:
2909-
self.create_vars(p["program"].block(0), program)
2919+
self._create_vars(p["program"].block(0), program)
29102920
whole_parameters = [e.name for e in program.block(0).all_parameters()]
29112921
param_need_sync = []
29122922
for i, section_p in enumerate(program_list):

0 commit comments

Comments
 (0)