Skip to content

Commit 24b00ac

Browse files
authored
Merge pull request #1313 from emailweixu/memory.set_input
Make it possible to postpone setting the layer name for a memory.
2 parents 53090e3 + 94e38bb commit 24b00ac

File tree

5 files changed

+210
-27
lines changed

5 files changed

+210
-27
lines changed

python/paddle/trainer/config_parser.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2222,7 +2222,10 @@ def Link(
22222222

22232223
# memory for recurrent layer group.
22242224
# *name* and *size* are actual layer's name and size.
2225-
# will return name of the memory,
2225+
# If *name* is None, need to provide *memory_name* and need to use
2226+
# SetMemoryInput() later to specify the layer which this memory remembers.
2227+
#
2228+
# return the name of the memory,
22262229
# use this name if you assign the memory as other layer's input
22272230
#
22282231
# boot frame of memory is zeroed by default,
@@ -2234,23 +2237,27 @@ def Link(
22342237
# can only be initailized by a *boot_layer* which is a sequence.
22352238
#
22362239
@config_func
2237-
def Memory(
2238-
name,
2239-
size,
2240-
is_sequence=False,
2241-
boot_layer=None,
2242-
boot_bias=False,
2243-
boot_bias_active_type="",
2244-
boot_with_const_id=None, ):
2245-
agent_name = name + "+delay1"
2240+
def Memory(name,
2241+
size,
2242+
is_sequence=False,
2243+
boot_layer=None,
2244+
boot_bias=False,
2245+
boot_bias_active_type="",
2246+
boot_with_const_id=None,
2247+
memory_name=None):
2248+
if not memory_name:
2249+
config_assert(name is not None, "name needs cannot be None")
2250+
memory_name = name + "+delay1"
2251+
agent_name = memory_name
22462252
if is_sequence:
22472253
agent_layer = SequenceAgentLayer(agent_name, size)
22482254
else:
22492255
agent_layer = AgentLayer(agent_name, size)
22502256
config_assert(g_current_submodel.is_recurrent_layer_group,
22512257
'Memory should be used in recurrent layer group only')
22522258
memory = g_current_submodel.memories.add()
2253-
memory.layer_name = MakeLayerNameInSubmodel(name)
2259+
if name is not None:
2260+
memory.layer_name = MakeLayerNameInSubmodel(name)
22542261
memory.link_name = MakeLayerNameInSubmodel(agent_name)
22552262
memory.is_sequence = is_sequence
22562263
options = sum((boot_layer is not None, bool(boot_bias),
@@ -2274,6 +2281,17 @@ def Memory(
22742281
return agent_name
22752282

22762283

2284+
@config_func
2285+
def SetMemoryInput(memory_name, layer_name):
2286+
memory_name = MakeLayerNameInSubmodel(memory_name)
2287+
layer_name = MakeLayerNameInSubmodel(layer_name)
2288+
for mem in g_current_submodel.memories:
2289+
if mem.link_name == memory_name:
2290+
mem.layer_name = layer_name
2291+
return
2292+
logger.fatal("Nonexistent memory name: " + memory_name)
2293+
2294+
22772295
# Generator for recurrent layer group, to use it:
22782296
# 1. define a id layer as output of layer group
22792297
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)

python/paddle/trainer_config_helpers/default_decorators.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,13 @@ def reset_hook():
9797
register_parse_config_hook(reset_hook)
9898

9999

100-
def wrap_name_default(name_prefix=None):
100+
def wrap_name_default(name_prefix=None, name_param="name"):
101101
"""
102102
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
103103
104104
.. code:: python
105105
106-
@default_name("some_name")
106+
@wrap_name_default("some_name")
107107
def func(name=None):
108108
print name # name will never be None. If name is not set,
109109
# name will be "some_name_%d"
@@ -115,7 +115,7 @@ def func(name=None):
115115
"""
116116
factory = DefaultNameFactory(name_prefix)
117117
_name_factories.append(factory)
118-
return wrap_param_default(["name"], factory)
118+
return wrap_param_default([name_param], factory)
119119

120120

121121
def wrap_param_attr_default(param_names=None, default_factory=None):

python/paddle/trainer_config_helpers/layers.py

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,14 @@ def __str__(self):
288288
"""
289289
assert False, "this method should not be invoked"
290290

291+
def set_input(self, input):
292+
"""
293+
Set the input for a memory layer. Can only be used for memory layer
294+
"""
295+
assert isinstance(input, LayerOutput)
296+
assert self.layer_type == LayerType.MEMORY
297+
SetMemoryInput(self.name, input.name)
298+
291299

292300
ERROR_CLIPPING = 'error_clipping_threshold'
293301
DROPOUT = 'drop_rate'
@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
27592767
size=a.size)
27602768

27612769

2770+
@wrap_name_default("memory", "memory_name")
27622771
def memory(name,
27632772
size,
2773+
memory_name=None,
27642774
is_seq=False,
27652775
boot_layer=None,
27662776
boot_bias=None,
@@ -2782,14 +2792,32 @@ def memory(name,
27822792
If boot_layer is not null, the memory is just the boot_layer's output.
27832793
Set :code:`is_seq` is true boot layer is sequence.
27842794
2785-
27862795
The same name layer in recurrent group will set memory on each time
27872796
step.
27882797
2789-
:param name: memory's name.
2798+
.. code-block:: python
2799+
2800+
mem = memory(size=256, name='state')
2801+
state = fc_layer(input=mem, size=256, name='state')
2802+
2803+
If you do not want to specify the name, you can equivalently use set_input()
2804+
to specify the layer needs to be remembered as the following:
2805+
2806+
.. code-block:: python
2807+
mem = memory(size=256)
2808+
state = fc_layer(input=mem, size=256)
2809+
mem.set_input(mem)
2810+
2811+
2812+
:param name: the name of the layer which this memory remembers.
2813+
If name is None, user should call set_input() to specify the
2814+
name of the layer which this memory remembers.
27902815
:type name: basestring
27912816
:param size: size of memory.
27922817
:type size: int
2818+
:param memory_name: the name of the memory.
2819+
It is ignored when name is provided.
2820+
:type memory_name: basestring
27932821
:param is_seq: is sequence for boot_layer
27942822
:type is_seq: bool
27952823
:param boot_layer: boot layer of memory.
@@ -2811,13 +2839,21 @@ def memory(name,
28112839
boot_bias = ParamAttr.to_bias(boot_bias)
28122840

28132841
assert boot_layer is None or isinstance(boot_layer, LayerOutput)
2842+
if name is not None:
2843+
memory_name = None
28142844

2815-
agent_name = Memory(name, size, is_seq, boot_layer.name
2816-
if boot_layer is not None else None, boot_bias,
2817-
boot_bias_active_type.name, boot_with_const_id)
2845+
memory_name = Memory(
2846+
name,
2847+
size,
2848+
is_sequence=is_seq,
2849+
boot_layer=boot_layer.name if boot_layer is not None else None,
2850+
boot_bias=boot_bias,
2851+
boot_bias_active_type=boot_bias_active_type.name,
2852+
boot_with_const_id=boot_with_const_id,
2853+
memory_name=memory_name)
28182854

28192855
lout = LayerOutput(
2820-
name=agent_name,
2856+
name=memory_name,
28212857
size=size,
28222858
layer_type=LayerType.MEMORY,
28232859
parents=[boot_layer] if boot_layer is not None else None)
@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None):
35653601
ipts = [Input(input.name), Input(label.name)]
35663602
parents = [input, label]
35673603
if weight is not None:
3568-
assert weight.layer_type == LayerType.DATA
3604+
assert weight.size == 1
35693605
ipts.append(Input(weight.name))
35703606
parents.append(weight)
35713607
return ipts, parents
@@ -4946,7 +4982,12 @@ def lambda_cost(input,
49464982

49474983
@wrap_name_default()
49484984
@layer_support()
4949-
def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
4985+
def cross_entropy(input,
4986+
label,
4987+
name=None,
4988+
coeff=1.0,
4989+
weight=None,
4990+
layer_attr=None):
49504991
"""
49514992
A loss layer for multi class entropy.
49524993
@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
49615002
:type input: LayerOutput.
49625003
:param name: The name of this layers. It is not necessary.
49635004
:type name: None|basestring.
4964-
:param coeff: The coefficient affects the gradient in the backward.
5005+
:param coeff: The cost is multiplied with coeff.
5006+
The coefficient affects the gradient in the backward.
49655007
:type coeff: float.
5008+
:param weight: The cost of each sample is multiplied with each weight.
5009+
The weight should be a layer with size=1. Note that gradient
5010+
will not be calculated for weight.
5011+
:type weight: LayerOutout
49665012
:param layer_attr: Extra Layer Attribute.
49675013
:type layer_attr: ExtraLayerAttribute
49685014
:return: LayerOutput object.
49695015
:rtype: LayerOutput.
49705016
"""
49715017

5018+
ipts, parents = __cost_input__(input, label, weight)
49725019
Layer(
49735020
name=name,
49745021
type=LayerType.CROSS_ENTROPY,
4975-
inputs=[input.name, label.name],
5022+
inputs=ipts,
49765023
coeff=coeff,
49775024
**ExtraLayerAttribute.to_kwargs(layer_attr))
4978-
return LayerOutput(
4979-
name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
5025+
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
49805026

49815027

49825028
@wrap_name_default()

python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,54 @@ layers {
331331
}
332332
trans_type: "non-seq"
333333
}
334+
layers {
335+
name: "__recurrent_group_3__"
336+
type: "recurrent_layer_group"
337+
active_type: ""
338+
}
339+
layers {
340+
name: "seq_input@__recurrent_group_3__"
341+
type: "scatter_agent"
342+
size: 100
343+
active_type: ""
344+
}
345+
layers {
346+
name: "__memory_6__@__recurrent_group_3__"
347+
type: "agent"
348+
size: 200
349+
active_type: ""
350+
}
351+
layers {
352+
name: "__fc_layer_0__@__recurrent_group_3__"
353+
type: "fc"
354+
size: 200
355+
active_type: "tanh"
356+
inputs {
357+
input_layer_name: "seq_input@__recurrent_group_3__"
358+
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
359+
}
360+
inputs {
361+
input_layer_name: "__memory_6__@__recurrent_group_3__"
362+
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
363+
}
364+
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
365+
}
366+
layers {
367+
name: "__fc_layer_0__"
368+
type: "gather_agent"
369+
size: 200
370+
active_type: ""
371+
}
372+
layers {
373+
name: "__last_seq_4__"
374+
type: "seqlastins"
375+
size: 200
376+
active_type: "linear"
377+
inputs {
378+
input_layer_name: "__fc_layer_0__"
379+
}
380+
trans_type: "non-seq"
381+
}
334382
parameters {
335383
name: "___mixed_0__.w0"
336384
size: 40000
@@ -481,13 +529,44 @@ parameters {
481529
initial_strategy: 0
482530
initial_smart: false
483531
}
532+
parameters {
533+
name: "___fc_layer_0__@__recurrent_group_3__.w0"
534+
size: 20000
535+
initial_mean: 0.0
536+
initial_std: 0.1
537+
dims: 100
538+
dims: 200
539+
initial_strategy: 0
540+
initial_smart: true
541+
}
542+
parameters {
543+
name: "___fc_layer_0__@__recurrent_group_3__.w1"
544+
size: 40000
545+
initial_mean: 0.0
546+
initial_std: 0.0707106781187
547+
dims: 200
548+
dims: 200
549+
initial_strategy: 0
550+
initial_smart: true
551+
}
552+
parameters {
553+
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
554+
size: 200
555+
initial_mean: 0.0
556+
initial_std: 0.0
557+
dims: 1
558+
dims: 200
559+
initial_strategy: 0
560+
initial_smart: false
561+
}
484562
input_layer_names: "seq_input"
485563
input_layer_names: "sub_seq_input"
486564
output_layer_names: "__last_seq_0__"
487565
output_layer_names: "__first_seq_0__"
488566
output_layer_names: "__last_seq_1__"
489567
output_layer_names: "__last_seq_2__"
490568
output_layer_names: "__last_seq_3__"
569+
output_layer_names: "__last_seq_4__"
491570
sub_models {
492571
name: "root"
493572
layer_names: "seq_input"
@@ -510,13 +589,17 @@ sub_models {
510589
layer_names: "__gru_group_0___recurrent_group"
511590
layer_names: "__gru_group_0__"
512591
layer_names: "__last_seq_3__"
592+
layer_names: "__recurrent_group_3__"
593+
layer_names: "__fc_layer_0__"
594+
layer_names: "__last_seq_4__"
513595
input_layer_names: "seq_input"
514596
input_layer_names: "sub_seq_input"
515597
output_layer_names: "__last_seq_0__"
516598
output_layer_names: "__first_seq_0__"
517599
output_layer_names: "__last_seq_1__"
518600
output_layer_names: "__last_seq_2__"
519601
output_layer_names: "__last_seq_3__"
602+
output_layer_names: "__last_seq_4__"
520603
is_recurrent_layer_group: false
521604
}
522605
sub_models {
@@ -647,4 +730,28 @@ sub_models {
647730
}
648731
target_inlinkid: -1
649732
}
733+
sub_models {
734+
name: "__recurrent_group_3__"
735+
layer_names: "seq_input@__recurrent_group_3__"
736+
layer_names: "__memory_6__@__recurrent_group_3__"
737+
layer_names: "__fc_layer_0__@__recurrent_group_3__"
738+
is_recurrent_layer_group: true
739+
reversed: false
740+
memories {
741+
layer_name: "__fc_layer_0__@__recurrent_group_3__"
742+
link_name: "__memory_6__@__recurrent_group_3__"
743+
is_sequence: false
744+
}
745+
in_links {
746+
layer_name: "seq_input"
747+
link_name: "seq_input@__recurrent_group_3__"
748+
has_subseq: false
749+
}
750+
out_links {
751+
layer_name: "__fc_layer_0__@__recurrent_group_3__"
752+
link_name: "__fc_layer_0__"
753+
has_subseq: false
754+
}
755+
target_inlinkid: -1
756+
}
650757

0 commit comments

Comments
 (0)