Skip to content

Commit 7d551dd

Browse files
author
xuwei06
committed
Make it possible to postpone setting the layer name for a memory.
The reason for adding the function is exemplified in the following hypothetical code: mem = memory(name=None, size=256) hidden = fc_layer(input=mem) state = hidden + x mem.set_input(state) In the above code segment, it would be very annoying if we require the user to provide the name at memory() call because the layer name of state is automatically generated and is not easy to set it. Change-Id: I918bf1d3d5c26addd88a6f7021e98b3e0e9df494
1 parent a30f6aa commit 7d551dd

File tree

5 files changed

+216
-33
lines changed

5 files changed

+216
-33
lines changed

python/paddle/trainer/config_parser.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,7 +2219,10 @@ def Link(
22192219

22202220
# memory for recurrent layer group.
22212221
# *name* and *size* are actual layer's name and size.
2222-
# will return name of the memory,
2222+
# If *name* is None, need to provide *memory_name* and need to use
2223+
# SetMemoryInput() later to specify the layer which this memory remembers.
2224+
#
2225+
# return the name of the memory,
22232226
# use this name if you assign the memory as other layer's input
22242227
#
22252228
# boot frame of memory is zeroed by default,
@@ -2231,23 +2234,27 @@ def Link(
22312234
# can only be initailized by a *boot_layer* which is a sequence.
22322235
#
22332236
@config_func
2234-
def Memory(
2235-
name,
2236-
size,
2237-
is_sequence=False,
2238-
boot_layer=None,
2239-
boot_bias=False,
2240-
boot_bias_active_type="",
2241-
boot_with_const_id=None, ):
2242-
agent_name = name + "+delay1"
2237+
def Memory(name,
2238+
size,
2239+
is_sequence=False,
2240+
boot_layer=None,
2241+
boot_bias=False,
2242+
boot_bias_active_type="",
2243+
boot_with_const_id=None,
2244+
memory_name=None):
2245+
if not memory_name:
2246+
config_assert(name is not None, "name needs cannot be None")
2247+
memory_name = name + "+delay1"
2248+
agent_name = memory_name
22432249
if is_sequence:
22442250
agent_layer = SequenceAgentLayer(agent_name, size)
22452251
else:
22462252
agent_layer = AgentLayer(agent_name, size)
22472253
config_assert(g_current_submodel.is_recurrent_layer_group,
22482254
'Memory should be used in recurrent layer group only')
22492255
memory = g_current_submodel.memories.add()
2250-
memory.layer_name = MakeLayerNameInSubmodel(name)
2256+
if name is not None:
2257+
memory.layer_name = MakeLayerNameInSubmodel(name)
22512258
memory.link_name = MakeLayerNameInSubmodel(agent_name)
22522259
memory.is_sequence = is_sequence
22532260
options = sum((boot_layer is not None, bool(boot_bias),
@@ -2271,6 +2278,17 @@ def Memory(
22712278
return agent_name
22722279

22732280

2281+
@config_func
2282+
def SetMemoryInput(memory_name, layer_name):
2283+
memory_name = MakeLayerNameInSubmodel(memory_name)
2284+
layer_name = MakeLayerNameInSubmodel(layer_name)
2285+
for mem in g_current_submodel.memories:
2286+
if mem.link_name == memory_name:
2287+
mem.layer_name = layer_name
2288+
return
2289+
logger.fatal("Nonexistent memory name: " + memory_name)
2290+
2291+
22742292
# Generator for recurrent layer group, to use it:
22752293
# 1. define a id layer as output of layer group
22762294
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)

python/paddle/trainer_config_helpers/default_decorators.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ def reset_hook():
9393
register_parse_config_hook(reset_hook)
9494

9595

96-
def wrap_name_default(name_prefix=None):
96+
def wrap_name_default(name_prefix=None, name_param="name"):
9797
"""
9898
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
9999
100100
.. code:: python
101101
102-
@default_name("some_name")
102+
@wrap_name_default("some_name")
103103
def func(name=None):
104104
print name # name will never be None. If name is not set,
105105
# name will be "some_name_%d"
@@ -111,7 +111,7 @@ def func(name=None):
111111
"""
112112
factory = DefaultNameFactory(name_prefix)
113113
_name_factories.append(factory)
114-
return wrap_param_default(["name"], factory)
114+
return wrap_param_default([name_param], factory)
115115

116116

117117
def wrap_param_attr_default(param_names=None, default_factory=None):

python/paddle/trainer_config_helpers/layers.py

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,14 @@ def __str__(self):
280280
"""
281281
assert False, "this method should not be invoked"
282282

283+
def set_input(self, input):
284+
"""
285+
Set the input for a memory layer. Can only be used for memory layer
286+
"""
287+
assert isinstance(input, LayerOutput)
288+
assert self.layer_type == LayerType.MEMORY
289+
SetMemoryInput(self.name, input.name)
290+
283291

284292
ERROR_CLIPPING = 'error_clipping_threshold'
285293
DROPOUT = 'drop_rate'
@@ -2570,8 +2578,10 @@ def __reduce_concat_type__(a, b):
25702578
size=sz)
25712579

25722580

2581+
@wrap_name_default("memory", "memory_name")
25732582
def memory(name,
25742583
size,
2584+
memory_name=None,
25752585
is_seq=False,
25762586
boot_layer=None,
25772587
boot_bias=None,
@@ -2593,14 +2603,32 @@ def memory(name,
25932603
If boot_layer is not null, the memory is just the boot_layer's output.
25942604
Set :code:`is_seq` is true boot layer is sequence.
25952605
2596-
25972606
The same name layer in recurrent group will set memory on each time
25982607
step.
25992608
2600-
:param name: memory's name.
2609+
.. code-block:: python
2610+
2611+
mem = memory(size=256, name='state')
2612+
state = fc_layer(input=mem, size=256, name='state')
2613+
2614+
If you do not want to specify the name, you can equivalently use set_input()
2615+
to specify the layer needs to be remembered as the following:
2616+
2617+
.. code-block:: python
2618+
mem = memory(size=256)
2619+
state = fc_layer(input=mem, size=256)
2620+
mem.set_input(mem)
2621+
2622+
2623+
:param name: the name of the layer which this memory remembers.
2624+
If name is None, user should call set_input() to specify the
2625+
name of the layer which this memory remembers.
26012626
:type name: basestring
26022627
:param size: size of memory.
26032628
:type size: int
2629+
:param memory_name: the name of the memory.
2630+
It is ignored when name is provided.
2631+
:type memory_name: basestring
26042632
:param is_seq: is sequence for boot_layer
26052633
:type is_seq: bool
26062634
:param boot_layer: boot layer of memory.
@@ -2622,13 +2650,21 @@ def memory(name,
26222650
boot_bias = ParamAttr.to_bias(boot_bias)
26232651

26242652
assert boot_layer is None or isinstance(boot_layer, LayerOutput)
2653+
if name is not None:
2654+
memory_name = None
26252655

2626-
agent_name = Memory(name, size, is_seq, boot_layer.name
2627-
if boot_layer is not None else None, boot_bias,
2628-
boot_bias_active_type.name, boot_with_const_id)
2656+
memory_name = Memory(
2657+
name,
2658+
size,
2659+
is_sequence=is_seq,
2660+
boot_layer=boot_layer.name if boot_layer is not None else None,
2661+
boot_bias=boot_bias,
2662+
boot_bias_active_type=boot_bias_active_type.name,
2663+
boot_with_const_id=boot_with_const_id,
2664+
memory_name=memory_name)
26292665

26302666
lout = LayerOutput(
2631-
name=agent_name,
2667+
name=memory_name,
26322668
size=size,
26332669
layer_type=LayerType.MEMORY,
26342670
parents=[boot_layer] if boot_layer is not None else None)
@@ -2754,8 +2790,8 @@ def gru_step_layer(input,
27542790
:param name:
27552791
:param gate_act:
27562792
:param bias_attr:
2757-
:param param_attr: the parameter_attribute for transforming the output_mem
2758-
from previous step.
2793+
:param param_attr: the parameter_attribute for transforming the output_mem
2794+
from previous step.
27592795
:param layer_attr:
27602796
:return: LayerOutput object.
27612797
:rtype: LayerOutput
@@ -2766,10 +2802,10 @@ def gru_step_layer(input,
27662802
Layer(
27672803
name=name,
27682804
type=LayerType.GRU_STEP_LAYER,
2769-
# The parameter here is for transforming the output_mem. The input has
2770-
# already been transformed outside this module so it does not need
2771-
# parameter associated with it.
2772-
# The parameter here is instead grouped with input is due to
2805+
# The parameter here is for transforming the output_mem. The input has
2806+
# already been transformed outside this module so it does not need
2807+
# parameter associated with it.
2808+
# The parameter here is instead grouped with input is due to
27732809
# backward model compatibility.
27742810
inputs=[Input(input.name, **param_attr.attr), output_mem.name],
27752811
bias=ParamAttr.to_bias(bias_attr),
@@ -3376,7 +3412,7 @@ def __cost_input__(input, label, weight=None):
33763412
ipts = [Input(input.name), Input(label.name)]
33773413
parents = [input, label]
33783414
if weight is not None:
3379-
assert weight.layer_type == LayerType.DATA
3415+
assert weight.size == 1
33803416
ipts.append(Input(weight.name))
33813417
parents.append(weight)
33823418
return ipts, parents
@@ -4740,7 +4776,12 @@ def lambda_cost(input,
47404776

47414777
@wrap_name_default()
47424778
@layer_support()
4743-
def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
4779+
def cross_entropy(input,
4780+
label,
4781+
name=None,
4782+
coeff=1.0,
4783+
weight=None,
4784+
layer_attr=None):
47444785
"""
47454786
A loss layer for multi class entropy.
47464787
@@ -4755,22 +4796,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
47554796
:type input: LayerOutput.
47564797
:param name: The name of this layers. It is not necessary.
47574798
:type name: None|basestring.
4758-
:param coeff: The coefficient affects the gradient in the backward.
4799+
:param coeff: The cost is multiplied with coeff.
4800+
The coefficient affects the gradient in the backward.
47594801
:type coeff: float.
4802+
:param weight: The cost of each sample is multiplied with each weight.
4803+
The weight should be a layer with size=1. Note that gradient
4804+
will not be calculated for weight.
4805+
:type weight: LayerOutout
47604806
:param layer_attr: Extra Layer Attribute.
47614807
:type layer_attr: ExtraLayerAttribute
47624808
:return: LayerOutput object.
47634809
:rtype: LayerOutput.
47644810
"""
47654811

4812+
ipts, parents = __cost_input__(input, label, weight)
47664813
Layer(
47674814
name=name,
47684815
type=LayerType.CROSS_ENTROPY,
4769-
inputs=[input.name, label.name],
4816+
inputs=ipts,
47704817
coeff=coeff,
47714818
**ExtraLayerAttribute.to_kwargs(layer_attr))
4772-
return LayerOutput(
4773-
name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
4819+
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
47744820

47754821

47764822
@wrap_name_default()

0 commit comments

Comments
 (0)