@@ -280,6 +280,14 @@ def __str__(self):
280
280
"""
281
281
assert False , "this method should not be invoked"
282
282
283
+ def set_input (self , input ):
284
+ """
285
+ Set the input for a memory layer. Can only be used for memory layer
286
+ """
287
+ assert isinstance (input , LayerOutput )
288
+ assert self .layer_type == LayerType .MEMORY
289
+ SetMemoryInput (self .name , input .name )
290
+
283
291
284
292
ERROR_CLIPPING = 'error_clipping_threshold'
285
293
DROPOUT = 'drop_rate'
@@ -2570,8 +2578,10 @@ def __reduce_concat_type__(a, b):
2570
2578
size = sz )
2571
2579
2572
2580
2581
+ @wrap_name_default ("memory" , "memory_name" )
2573
2582
def memory (name ,
2574
2583
size ,
2584
+ memory_name = None ,
2575
2585
is_seq = False ,
2576
2586
boot_layer = None ,
2577
2587
boot_bias = None ,
@@ -2593,14 +2603,32 @@ def memory(name,
2593
2603
If boot_layer is not null, the memory is just the boot_layer's output.
2594
2604
Set :code:`is_seq` is true boot layer is sequence.
2595
2605
2596
-
2597
2606
The same name layer in recurrent group will set memory on each time
2598
2607
step.
2599
2608
2600
- :param name: memory's name.
2609
+ .. code-block:: python
2610
+
2611
+ mem = memory(size=256, name='state')
2612
+ state = fc_layer(input=mem, size=256, name='state')
2613
+
2614
+ If you do not want to specify the name, you can equivalently use set_input()
2615
+ to specify the layer needs to be remembered as the following:
2616
+
2617
+ .. code-block:: python
2618
+ mem = memory(size=256)
2619
+ state = fc_layer(input=mem, size=256)
2620
+ mem.set_input(mem)
2621
+
2622
+
2623
+ :param name: the name of the layer which this memory remembers.
2624
+ If name is None, user should call set_input() to specify the
2625
+ name of the layer which this memory remembers.
2601
2626
:type name: basestring
2602
2627
:param size: size of memory.
2603
2628
:type size: int
2629
+ :param memory_name: the name of the memory.
2630
+ It is ignored when name is provided.
2631
+ :type memory_name: basestring
2604
2632
:param is_seq: is sequence for boot_layer
2605
2633
:type is_seq: bool
2606
2634
:param boot_layer: boot layer of memory.
@@ -2622,13 +2650,21 @@ def memory(name,
2622
2650
boot_bias = ParamAttr .to_bias (boot_bias )
2623
2651
2624
2652
assert boot_layer is None or isinstance (boot_layer , LayerOutput )
2653
+ if name is not None :
2654
+ memory_name = None
2625
2655
2626
- agent_name = Memory (name , size , is_seq , boot_layer .name
2627
- if boot_layer is not None else None , boot_bias ,
2628
- boot_bias_active_type .name , boot_with_const_id )
2656
+ memory_name = Memory (
2657
+ name ,
2658
+ size ,
2659
+ is_sequence = is_seq ,
2660
+ boot_layer = boot_layer .name if boot_layer is not None else None ,
2661
+ boot_bias = boot_bias ,
2662
+ boot_bias_active_type = boot_bias_active_type .name ,
2663
+ boot_with_const_id = boot_with_const_id ,
2664
+ memory_name = memory_name )
2629
2665
2630
2666
lout = LayerOutput (
2631
- name = agent_name ,
2667
+ name = memory_name ,
2632
2668
size = size ,
2633
2669
layer_type = LayerType .MEMORY ,
2634
2670
parents = [boot_layer ] if boot_layer is not None else None )
@@ -2754,8 +2790,8 @@ def gru_step_layer(input,
2754
2790
:param name:
2755
2791
:param gate_act:
2756
2792
:param bias_attr:
2757
- :param param_attr: the parameter_attribute for transforming the output_mem
2758
- from previous step.
2793
+ :param param_attr: the parameter_attribute for transforming the output_mem
2794
+ from previous step.
2759
2795
:param layer_attr:
2760
2796
:return: LayerOutput object.
2761
2797
:rtype: LayerOutput
@@ -2766,10 +2802,10 @@ def gru_step_layer(input,
2766
2802
Layer (
2767
2803
name = name ,
2768
2804
type = LayerType .GRU_STEP_LAYER ,
2769
- # The parameter here is for transforming the output_mem. The input has
2770
- # already been transformed outside this module so it does not need
2771
- # parameter associated with it.
2772
- # The parameter here is instead grouped with input is due to
2805
+ # The parameter here is for transforming the output_mem. The input has
2806
+ # already been transformed outside this module so it does not need
2807
+ # parameter associated with it.
2808
+ # The parameter here is instead grouped with input is due to
2773
2809
# backward model compatibility.
2774
2810
inputs = [Input (input .name , ** param_attr .attr ), output_mem .name ],
2775
2811
bias = ParamAttr .to_bias (bias_attr ),
@@ -3376,7 +3412,7 @@ def __cost_input__(input, label, weight=None):
3376
3412
ipts = [Input (input .name ), Input (label .name )]
3377
3413
parents = [input , label ]
3378
3414
if weight is not None :
3379
- assert weight .layer_type == LayerType . DATA
3415
+ assert weight .size == 1
3380
3416
ipts .append (Input (weight .name ))
3381
3417
parents .append (weight )
3382
3418
return ipts , parents
@@ -4740,7 +4776,12 @@ def lambda_cost(input,
4740
4776
4741
4777
@wrap_name_default ()
4742
4778
@layer_support ()
4743
- def cross_entropy (input , label , name = None , coeff = 1.0 , layer_attr = None ):
4779
+ def cross_entropy (input ,
4780
+ label ,
4781
+ name = None ,
4782
+ coeff = 1.0 ,
4783
+ weight = None ,
4784
+ layer_attr = None ):
4744
4785
"""
4745
4786
A loss layer for multi class entropy.
4746
4787
@@ -4755,22 +4796,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
4755
4796
:type input: LayerOutput.
4756
4797
:param name: The name of this layers. It is not necessary.
4757
4798
:type name: None|basestring.
4758
- :param coeff: The coefficient affects the gradient in the backward.
4799
+ :param coeff: The cost is multiplied with coeff.
4800
+ The coefficient affects the gradient in the backward.
4759
4801
:type coeff: float.
4802
+ :param weight: The cost of each sample is multiplied with each weight.
4803
+ The weight should be a layer with size=1. Note that gradient
4804
+ will not be calculated for weight.
4805
+ :type weight: LayerOutout
4760
4806
:param layer_attr: Extra Layer Attribute.
4761
4807
:type layer_attr: ExtraLayerAttribute
4762
4808
:return: LayerOutput object.
4763
4809
:rtype: LayerOutput.
4764
4810
"""
4765
4811
4812
+ ipts , parents = __cost_input__ (input , label , weight )
4766
4813
Layer (
4767
4814
name = name ,
4768
4815
type = LayerType .CROSS_ENTROPY ,
4769
- inputs = [ input . name , label . name ] ,
4816
+ inputs = ipts ,
4770
4817
coeff = coeff ,
4771
4818
** ExtraLayerAttribute .to_kwargs (layer_attr ))
4772
- return LayerOutput (
4773
- name , LayerType .CROSS_ENTROPY , parents = [input , label ], size = 1 )
4819
+ return LayerOutput (name , LayerType .CROSS_ENTROPY , parents = parents , size = 1 )
4774
4820
4775
4821
4776
4822
@wrap_name_default ()
0 commit comments