Skip to content

Commit b0205fd

Browse files
author
chengduozh
committed
fix other layer
test=develop
1 parent 458271c commit b0205fd

File tree

3 files changed

+96
-42
lines changed

3 files changed

+96
-42
lines changed

paddle/fluid/API.spec

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wai
2323
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
2424
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
2525
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
26-
paddle.fluid.DistributeTranspilerConfig.__init__
26+
paddle.fluid.DistributeTranspilerConfig.__init__
2727
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
2828
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
2929
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
@@ -95,8 +95,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
9595
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
9696
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
9797
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
98-
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples'], varargs=None, keywords=None, defaults=(None, None, None, None))
99-
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None))
98+
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None))
99+
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
100100
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
101101
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
102102
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
@@ -312,7 +312,7 @@ paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keyw
312312
paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
313313
paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
314314
paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
315-
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
315+
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
316316
paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True))
317317
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
318318
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
@@ -380,4 +380,4 @@ paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
380380
paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
381381
paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
382382
paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
383-
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
383+
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable

python/paddle/fluid/layers/nn.py

Lines changed: 86 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,11 @@ def dynamic_lstm(input,
359359
W_{fh}, W_{oh}`}
360360
- The shape is (D x 4D), where D is the hidden
361361
size.
362+
363+
If it is set to None or one attribute of ParamAttr,
364+
dynamic_lstm will create ParamAttr as param_attr.
365+
If the Initializer of the param_attr is not set, the
366+
parameter is initialized with Xavier. Default: None.
362367
bias_attr (ParamAttr|None): The bias attribute for the learnable bias
363368
weights, which contains two parts, input-hidden
364369
bias weights and peephole connections weights if
@@ -371,6 +376,11 @@ def dynamic_lstm(input,
371376
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
372377
W_{fc}, W_{oc}`}.
373378
- The shape is (1 x 7D).
379+
380+
If it is set to None or one attribute of ParamAttr,
381+
dynamic_lstm will create ParamAttr as bias_attr.
382+
If the Initializer of the bias_attr is not set,
383+
the bias is initialized zero. Default: None.
374384
use_peepholes (bool): ${use_peepholes_comment}
375385
is_reverse (bool): ${is_reverse_comment}
376386
gate_activation (str): ${gate_activation_comment}
@@ -393,7 +403,7 @@ def dynamic_lstm(input,
393403
forward, _ = fluid.layers.dynamic_lstm(
394404
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
395405
"""
396-
406+
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
397407
helper = LayerHelper('lstm', **locals())
398408
size = size // 4
399409
weight = helper.create_parameter(
@@ -528,6 +538,11 @@ def dynamic_lstmp(input,
528538
size.
529539
- Projection weight = {:math:`W_{rh}`}.
530540
- The shape of projection weight is (D x P).
541+
542+
If it is set to None or one attribute of ParamAttr,
543+
dynamic_lstm will create ParamAttr as param_attr.
544+
If the Initializer of the param_attr is not set, the
545+
parameter is initialized with Xavier. Default: None.
531546
bias_attr(ParamAttr|None): The bias attribute for the learnable bias
532547
weights, which contains two parts, input-hidden
533548
bias weights and peephole connections weights if
@@ -540,6 +555,11 @@ def dynamic_lstmp(input,
540555
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
541556
W_{fc}, W_{oc}`}.
542557
- The shape is (1 x 7D).
558+
559+
If it is set to None or one attribute of ParamAttr,
560+
dynamic_lstm will create ParamAttr as bias_attr.
561+
If the Initializer of the bias_attr is not set,
562+
the bias is initialized zero. Default: None.
543563
use_peepholes(bool): Whether to enable diagonal/peephole connections,
544564
default `True`.
545565
is_reverse(bool): Whether to compute reversed LSTM, default `False`.
@@ -584,6 +604,7 @@ def dynamic_lstmp(input,
584604
proj_activation="tanh")
585605
"""
586606

607+
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
587608
helper = LayerHelper('lstmp', **locals())
588609
size = size // 4
589610
weight = helper.create_parameter(
@@ -1283,11 +1304,12 @@ def sequence_conv(input,
12831304
If it is set to None or one attribute of ParamAttr, sequence_conv
12841305
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
12851306
is not set, the bias is initialized zero. Default: None.
1286-
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights
1307+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
12871308
of sequence_conv. If it is set to None or one attribute of ParamAttr, sequence_conv
12881309
will create ParamAttr as param_attr. If the Initializer of the param_attr
12891310
is not set, the parameter is initialized with Xavier. Default: None.
1290-
act (str): the activation type
1311+
act (str): Activation type, if it is set to None, activation is not appended.
1312+
Default: None.
12911313
name (str|None): A name for this layer(optional). If set None, the layer
12921314
will be named automatically. Default: None.
12931315
@@ -1502,7 +1524,7 @@ def conv2d(input,
15021524
the first half of the filters is only connected to the first half
15031525
of the input channels, while the second half of the filters is only
15041526
connected to the second half of the input channels. Default: groups=1.
1505-
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights
1527+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
15061528
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
15071529
will create ParamAttr as param_attr. If the Initializer of the param_attr
15081530
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
@@ -1675,7 +1697,7 @@ def conv3d(input,
16751697
the first half of the filters is only connected to the first half
16761698
of the input channels, while the second half of the filters is only
16771699
connected to the second half of the input channels. Default: groups=1
1678-
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights
1700+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
16791701
of conv3d. If it is set to None or one attribute of ParamAttr, conv3d
16801702
will create ParamAttr as param_attr. If it is set to None, the parameter
16811703
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
@@ -2137,8 +2159,14 @@ def batch_norm(input,
21372159
is_test(bool, Default False): Used for training or training.
21382160
momentum(float, Default 0.9):
21392161
epsilon(float, Default 1e-05):
2140-
param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
2141-
bias_attr(ParamAttr): The parameter attribute for Parameter `bias`.
2162+
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
2163+
of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
2164+
will create ParamAttr as param_attr. If the Initializer of the param_attr
2165+
is not set, the parameter is initialized with Xavier. Default: None.
2166+
bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
2167+
If it is set to None or one attribute of ParamAttr, batch_norm
2168+
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
2169+
is not set, the bias is initialized zero. Default: None.
21422170
data_layout(string, default NCHW): NCHW|NHWC
21432171
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
21442172
name(string, Default None): A name for this layer(optional). If set None, the layer
@@ -2158,6 +2186,7 @@ def batch_norm(input,
21582186
hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
21592187
hidden2 = fluid.layers.batch_norm(input=hidden1)
21602188
"""
2189+
assert bias_attr is not False, "bias_attr should not be False in batch_norm."
21612190
helper = LayerHelper('batch_norm', **locals())
21622191
dtype = helper.input_dtype()
21632192

@@ -2428,7 +2457,7 @@ def conv2d_transpose(input,
24282457
first half of the input channels, while the second half of the
24292458
filters is only connected to the second half of the input channels.
24302459
Default: groups = 1.
2431-
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights
2460+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
24322461
of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
24332462
will create ParamAttr as param_attr. If the Initializer of the param_attr
24342463
is not set, the parameter is initialized with Xavier. Default: None.
@@ -2457,7 +2486,7 @@ def conv2d_transpose(input,
24572486
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
24582487
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
24592488
"""
2460-
2489+
assert param_attr is not False, "param_attr should not be False in conv2d_transpose."
24612490
input_channel = input.shape[1]
24622491

24632492
op_type = 'conv2d_transpose'
@@ -2616,7 +2645,7 @@ def conv3d_transpose(input,
26162645
first half of the input channels, while the second half of the
26172646
filters is only connected to the second half of the input channels.
26182647
Default: groups=1
2619-
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights
2648+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
26202649
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
26212650
will create ParamAttr as param_attr. If the Initializer of the param_attr
26222651
is not set, the parameter is initialized with Xavier. Default: None.
@@ -2645,6 +2674,7 @@ def conv3d_transpose(input,
26452674
data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32')
26462675
conv3d_transpose = fluid.layers.conv3d_transpose(input=data, num_filters=2, filter_size=3)
26472676
"""
2677+
assert param_attr is not False, "param_attr should not be False in conv3d_transpose."
26482678
l_type = "conv3d_transpose"
26492679
helper = LayerHelper(l_type, **locals())
26502680
if not isinstance(input, Variable):
@@ -4018,7 +4048,8 @@ def nce(input,
40184048
sample_weight=None,
40194049
param_attr=None,
40204050
bias_attr=None,
4021-
num_neg_samples=None):
4051+
num_neg_samples=None,
4052+
name=None):
40224053
"""
40234054
${comment}
40244055
@@ -4029,9 +4060,18 @@ def nce(input,
40294060
sample_weight (Variable|None): A Variable of shape [batch_size, 1]
40304061
storing a weight for each sample. The default weight for each
40314062
sample is 1.0.
4032-
param_attr (ParamAttr|None): attributes for parameter
4033-
bias_attr (ParamAttr|None): attributes for bias
4063+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
4064+
of nce. If it is set to None or one attribute of ParamAttr, nce
4065+
will create ParamAttr as param_attr. If the Initializer of the param_attr
4066+
is not set, the parameter is initialized with Xavier. Default: None.
4067+
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of nce.
4068+
If it is set to False, no bias will be added to the output units.
4069+
If it is set to None or one attribute of ParamAttr, nce
4070+
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
4071+
is not set, the bias is initialized zero. Default: None.
40344072
num_neg_samples (int): ${num_neg_samples_comment}
4073+
name (str|None): A name for this layer(optional). If set None, the layer
4074+
will be named automatically. Default: None.
40354075
40364076
Returns:
40374077
Variable: The output nce loss.
@@ -4064,19 +4104,28 @@ def nce(input,
40644104
"""
40654105
helper = LayerHelper('nce', **locals())
40664106
assert isinstance(input, Variable)
4067-
dim = input.shape[1]
40684107
assert isinstance(label, Variable)
4108+
4109+
dim = input.shape[1]
40694110
num_true_class = label.shape[1]
40704111
w = helper.create_parameter(
40714112
attr=helper.param_attr,
40724113
shape=[num_total_classes, dim],
40734114
is_bias=False,
40744115
dtype=input.dtype)
4075-
b = helper.create_parameter(
4076-
attr=helper.bias_attr,
4077-
shape=[num_total_classes, 1],
4078-
is_bias=True,
4079-
dtype=input.dtype)
4116+
inputs = {
4117+
'Input': input,
4118+
'Label': label,
4119+
'Weight': w,
4120+
'SampleWeight': sample_weight if sample_weight is not None else []
4121+
}
4122+
if helper.bias_attr:
4123+
b = helper.create_parameter(
4124+
attr=helper.bias_attr,
4125+
shape=[num_total_classes, 1],
4126+
is_bias=True,
4127+
dtype=input.dtype)
4128+
inputs['Bias'] = b
40804129
cost = helper.create_tmp_variable(dtype=input.dtype)
40814130
sample_logits = helper.create_tmp_variable(dtype=input.dtype)
40824131
sample_labels = helper.create_tmp_variable(dtype=label.dtype)
@@ -4093,13 +4142,7 @@ def nce(input,
40934142

40944143
helper.append_op(
40954144
type='nce',
4096-
inputs={
4097-
'Input': input,
4098-
'Label': label,
4099-
'Weight': w,
4100-
'Bias': b,
4101-
'SampleWeight': sample_weight if sample_weight is not None else []
4102-
},
4145+
inputs=inputs,
41034146
outputs={
41044147
'Cost': cost,
41054148
'SampleLogits': sample_logits,
@@ -4109,7 +4152,12 @@ def nce(input,
41094152
return cost / (num_neg_samples + 1)
41104153

41114154

4112-
def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None):
4155+
def hsigmoid(input,
4156+
label,
4157+
num_classes,
4158+
param_attr=None,
4159+
bias_attr=None,
4160+
name=None):
41134161
"""
41144162
The hierarchical sigmoid operator is used to accelerate the training
41154163
process of language model. This operator organizes the classes into a
@@ -4130,11 +4178,17 @@ def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None):
41304178
label (Variable): The tensor variable contains labels of training data.
41314179
It's a tensor with shape is :math:`[N \\times 1]`.
41324180
num_classes: (int), The number of classes, must not be less than 2.
4133-
param_attr (ParamAttr|list of ParamAttr, default None): The parameter
4134-
attribute for learnable parameters/weights of this layer.
4135-
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter
4136-
attribute for the bias of this layer. If it is set to False, no
4137-
bias will be applied.
4181+
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
4182+
of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid
4183+
will create ParamAttr as param_attr. If the Initializer of the param_attr
4184+
is not set, the parameter is initialized with Xavier. Default: None.
4185+
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of hsigmoid.
4186+
If it is set to False, no bias will be added to the output units.
4187+
If it is set to None or one attribute of ParamAttr, hsigmoid
4188+
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
4189+
is not set, the bias is initialized zero. Default: None.
4190+
name (str|None): A name for this layer(optional). If set None, the layer
4191+
will be named automatically. Default: None.
41384192
41394193
Returns:
41404194
Out: (Tensor) The cost of hierarchical sigmoid operator. the shape is [N, 1]

0 commit comments

Comments
 (0)