Skip to content

Commit 159dd83

Browse files
Haichao-Zhangemailweixu
authored andcommitted
split dotmul_projection and dotmul_operator (#87)
* split dotmul_projection and dotmul_operator * bug fix in outputsize checking for mixed layer
1 parent 90b9cba commit 159dd83

File tree

3 files changed

+55
-43
lines changed

3 files changed

+55
-43
lines changed

python/paddle/trainer/config_parser.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,7 +2430,6 @@ def __init__(
24302430
config_assert(inputs, 'inputs cannot be empty')
24312431
super(MixedLayer, self).__init__(
24322432
name, 'mixed', size, inputs=inputs, **xargs)
2433-
24342433
operator_input_index = []
24352434
for operator in self.operators:
24362435
operator_conf = operator.operator_conf
@@ -2445,21 +2444,31 @@ def __init__(
24452444
input_layer = self.get_input_layer(input_index)
24462445
operator_conf.input_sizes.append(input_layer.size)
24472446
operator_input_index.append(input_index)
2448-
if self.config.size == 0:
2447+
if self.config.size == 0:
24492448
size = operator.calc_output_size(operator_conf.input_sizes)
24502449
if size != 0:
24512450
self.set_layer_size(size)
2452-
2451+
else:
2452+
size = operator.calc_output_size(operator_conf.input_sizes)
2453+
if size != 0:
2454+
config_assert(size == self.config.size,
2455+
"different inputs have different size: %s vs. %s" %
2456+
(size, self.config.size))
24532457
for input_index in xrange(len(self.inputs)):
24542458
input_layer = self.get_input_layer(input_index)
24552459
input = self.inputs[input_index]
24562460
if input_index not in operator_input_index:
24572461
config_assert(isinstance(input, Projection), "input should be projection or operation")
2458-
if self.config.size == 0 and isinstance(input, Projection):
2462+
if self.config.size == 0 and isinstance(input, Projection):
24592463
size = input.calc_output_size(input_layer)
24602464
if size != 0:
24612465
self.set_layer_size(size)
2462-
2466+
elif isinstance(input, Projection):
2467+
sz = input.calc_output_size(input_layer)
2468+
if sz != 0:
2469+
config_assert(sz == self.config.size,
2470+
"different inputs have different size: %s vs. %s" %
2471+
(sz, self.config.size))
24632472
config_assert(size != 0, "size is not set")
24642473

24652474
for input_index in xrange(len(self.inputs)):

python/paddle/trainer_config_helpers/layers.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import copy
2929

3030
__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
31-
"identity_projection", "dotmul_projection",
31+
"identity_projection", "dotmul_projection", "dotmul_operator",
3232
"table_projection", "mixed_layer", "data_layer",
3333
"embedding_layer", "fc_layer", "grumemory",
3434
"pooling_layer", "lstmemory", "last_seq", "first_seq",
@@ -389,7 +389,7 @@ def identity_projection(input, offset=None):
389389
@wrap_param_attr_default()
390390
def dotmul_projection(input, param_attr=None, scale=1):
391391
"""
392-
1. DotMulProjection if input is a layer.
392+
DotMulProjection with a layer as input.
393393
It performs element-wise multiplication with weight.
394394
395395
.. math::
@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1):
403403
404404
proj = dotmul_projection(input=layer)
405405
406-
2. DotMulOperator if input is a list or tuple.
407-
It takes two inputs, performs element-wise multiplication:
408-
409-
.. math::
410-
out.row[i] += scale * (in1.row[i] .* in2.row[i])
411-
412-
where :math:`.*` means element-wise multiplication, and
413-
scale is a config scalar, its default value is one.
414-
415-
The example usage is:
416-
417-
.. code-block:: python
418-
419-
op = dotmul_projection(input=[layer1, layer2],
420-
scale=2.0)
421-
422406
:param input: Input layer.
423-
:type input: LayerOutput|list|tuple
407+
:type input: LayerOutput
424408
:param param_attr: Parameter config, None if use default.
425409
:type param_attr: ParameterAttribute
426410
:param scale: config scalar, default value is one.
427411
:type scale: float
428-
:return: A DotMulProjection or DotMulOperator Object.
429-
:rtype: DotMulProjection or DotMulOperator
412+
:return: A DotMulProjection Object.
413+
:rtype: DotMulProjection
430414
"""
431-
if isinstance(input, LayerOutput):
432-
proj = DotMulProjection(input_layer_name=input.name,
415+
proj = DotMulProjection(input_layer_name=input.name,
433416
size=input.size,
434417
**param_attr.attr)
435-
proj.origin = input
436-
proj.origin.projection = "dot_mul"
437-
return proj
438-
else:
439-
assert isinstance(input, list) or isinstance(input, tuple)
440-
assert len(input) == 2
441-
assert param_attr is None
442-
op = DotMulOperator(input_layer_name=[x.name for x in input],
443-
scale=scale)
444-
op.origin = input
445-
op.origin.operator = "dot_mul"
446-
return op
418+
proj.origin = input
419+
return proj
447420

421+
def dotmul_operator(x, y, scale=1):
422+
"""
423+
DotMulOperator takes two inputs and performs element-wise multiplication:
424+
.. math::
425+
out.row[i] += scale * (in1.row[i] .* in2.row[i])
426+
where :math:`.*` means element-wise multiplication, and
427+
scale is a config scalar, its default value is one.
428+
The example usage is:
429+
.. code-block:: python
430+
op = dotmul_operator(x, y,
431+
scale=1)
432+
:param input: Input layer
433+
:type input: LayerOutput
434+
:param scale: config scalar, default value is one.
435+
:type scale: float
436+
:return: A DotMulOperator Object.
437+
:rtype: DotMulOperator
438+
"""
439+
assert isinstance(x, LayerOutput)
440+
assert isinstance(y, LayerOutput)
441+
op = DotMulOperator(input_layer_names=[x.name, y.name],
442+
scale=scale)
443+
op.origin = [x, y]
444+
return op
448445

449446
@wrap_bias_attr_default(['padding_attr'])
450447
def context_projection(input, context_len, context_start=None,
@@ -539,7 +536,10 @@ def __add__(self, other):
539536
if not self.finalized:
540537
assert isinstance(other, Projection) or isinstance(other, Operator)
541538
self.inputs.append(other)
542-
self.parents.append(other.origin)
539+
if isinstance(other, Projection):
540+
self.parents.append(other.origin)
541+
else:
542+
self.parents.extend(other.origin)
543543
return self
544544
else:
545545
raise MixedLayerType.AddToSealedMixedLayerException()
@@ -565,7 +565,7 @@ def __exit__(self, *args, **kwargs):
565565
@wrap_act_default(act=LinearActivation())
566566
@wrap_bias_attr_default(has_bias=False)
567567
@layer_support(ERROR_CLIPPING, DROPOUT)
568-
def mixed_layer(size, input=None, name=None, act=None, bias_attr=False,
568+
def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
569569
layer_attr=None):
570570
"""
571571
Mixed Layer. A mixed layer will add all inputs together, then activate.

python/paddle/trainer_config_helpers/tests/layers_test_config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,11 @@
3838

3939
outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
4040

41+
dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1),
42+
dotmul_projection(input=y1)])
43+
4144
# for ctc
42-
tmp = fc_layer(input=x1,
45+
tmp = fc_layer(input=[x1, dotmul],
4346
size=num_classes + 1,
4447
act=SoftmaxActivation())
4548
ctc = ctc_layer(input=tmp,

0 commit comments

Comments
 (0)