Skip to content

Commit 8d4c453

Browse files
Haonanemailweixu
authored andcommitted
set mixedlayer output size according to input operator (#414)
* set mixedlayer output size according to input operator * change from num_channel to num_channels for conv_operator (the old one is really misleading because all the others are num_channels) * also changed the arg name in projections.py
1 parent 5ccf84a commit 8d4c453

File tree

3 files changed

+27
-22
lines changed

3 files changed

+27
-22
lines changed

python/paddle/trainer_config_helpers/layers.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -590,14 +590,17 @@ def __enter__(self):
590590
def __exit__(self, *args, **kwargs):
591591
del args, kwargs # unused parameter to suppress warning
592592
assert len(self.inputs) != 0
593-
MixedLayer(
593+
ml = MixedLayer(
594594
name=self.name,
595595
size=self.size,
596596
active_type=self.activation.name,
597597
bias=ParamAttr.to_bias(self.bias_attr),
598598
inputs=self.inputs,
599599
**ExtraLayerAttribute.to_kwargs(self.layer_attr)
600600
)
601+
# update the size which might be computed inside MixedLayer
602+
# according to the operator's output size
603+
self.size = ml.config.size
601604

602605

603606
@wrap_name_default("mixed")
@@ -2045,7 +2048,7 @@ def __reduce_concat_type__(a, b):
20452048

20462049
if layer_type == LayerType.CONCAT_LAYER:
20472050
assert not bias_attr
2048-
2051+
20492052
Layer(
20502053
name=name, type=layer_type,
20512054
inputs=[x.name for x in input] if is_concat_layer else input,
@@ -2623,7 +2626,7 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
26232626
assert isinstance(input1, LayerOutput)
26242627
assert isinstance(input2, LayerOutput)
26252628
Layer(name=name,
2626-
type="out_prod",
2629+
type=LayerType.OUT_PROD_LAYER,
26272630
inputs=[input1.name, input2.name],
26282631
**ExtraLayerAttribute.to_kwargs(layer_attr))
26292632
return LayerOutput(name=name,
@@ -2790,7 +2793,7 @@ def __real_step__(*args):
27902793

27912794
def __cost_input__(input, label, weight=None):
27922795
"""
2793-
inputs and parents for cost layers.
2796+
inputs and parents for cost layers.
27942797
"""
27952798
ipts = [Input(input.name), Input(label.name)]
27962799
parents = [input, label]
@@ -2799,7 +2802,7 @@ def __cost_input__(input, label, weight=None):
27992802
ipts.append(Input(weight.name))
28002803
parents.append(weight)
28012804
return ipts, parents
2802-
2805+
28032806

28042807
@wrap_name_default()
28052808
@layer_support()
@@ -2884,7 +2887,7 @@ def __add_evaluator__(e):
28842887

28852888

28862889
def conv_operator(img, filter, filter_size, num_filters,
2887-
num_channel=None, stride=1, padding=0,
2890+
num_channels=None, stride=1, padding=0,
28882891
filter_size_y=None, stride_y=None, padding_y=None):
28892892
"""
28902893
Different from img_conv_layer, conv_op is an Operator, which can be used
@@ -2914,8 +2917,8 @@ def conv_operator(img, filter, filter_size, num_filters,
29142917
:type filter_size_y: int
29152918
:param num_filters: channel of output data.
29162919
:type num_filters: int
2917-
:param num_channel: channel of input data.
2918-
:type num_channel: int
2920+
:param num_channels: channel of input data.
2921+
:type num_channels: int
29192922
:param stride: The x dimension of the stride.
29202923
:type stride: int
29212924
:param stride_y: The y dimension of the stride.
@@ -2934,19 +2937,19 @@ def conv_operator(img, filter, filter_size, num_filters,
29342937
if padding_y is None:
29352938
padding_y = padding
29362939

2937-
if num_channel is None:
2938-
num_channel = img.num_filters
2940+
if num_channels is None:
2941+
num_channels = img.num_filters
29392942

29402943
assert isinstance(filter, LayerOutput)
29412944
if filter.size is not None:
2942-
filter.size = filter_size * filter_size_y * num_filters * num_channel
2945+
filter.size = filter_size * filter_size_y * num_filters * num_channels
29432946

29442947
op = ConvOperator(input_layer_names=[img.name, filter.name],
29452948
num_filters=num_filters,
29462949
conv_conf=Conv(filter_size=filter_size,
29472950
padding=padding,
29482951
stride=stride,
2949-
channels=num_channel,
2952+
channels=num_channels,
29502953
filter_size_y=filter_size_y,
29512954
padding_y=padding_y,
29522955
stride_y=stride_y,
@@ -2986,8 +2989,8 @@ def conv_projection(input, filter_size, num_filters,
29862989
:type filter_size_y: int
29872990
:param num_filters: channel of output data.
29882991
:type num_filters: int
2989-
:param num_channel: channel of input data.
2990-
:type num_channel: int
2992+
:param num_channels: channel of input data.
2993+
:type num_channels: int
29912994
:param stride: The x dimension of the stride.
29922995
:type stride: int
29932996
:param stride_y: The y dimension of the stride.
@@ -3478,15 +3481,15 @@ def maxout_layer(input,
34783481
- Input: output of a conv layer.
34793482
- Output: feature map size same as input. Channel is (input channel) / groups.
34803483
3481-
So groups should be larger than 1, and the num of channels should be able
3484+
So groups should be larger than 1, and the num of channels should be able
34823485
to devided by groups.
34833486
3484-
Please refer to Paper:
3487+
Please refer to Paper:
34853488
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
34863489
- Multi-digit Number Recognition from Street View \
34873490
Imagery using Deep Convolutional Neural Networks: \
34883491
https://arxiv.org/pdf/1312.6082v4.pdf
3489-
3492+
34903493
The simple usage is:
34913494
34923495
.. code-block:: python
@@ -3731,9 +3734,9 @@ def nce_layer(input, label, num_classes, weight=None,
37313734
:param weight: weight layer, can be None(default)
37323735
:type weight: LayerOutput
37333736
:param num_classes: number of classes.
3734-
:type num_classes: int
3737+
:type num_classes: int
37353738
:param num_neg_samples: number of negative samples. Default is 10.
3736-
:type num_neg_samples: int
3739+
:type num_neg_samples: int
37373740
:param neg_distribution: The distribution for generating the random negative labels.
37383741
A uniform distribution will be used if not provided.
37393742
If not None, its length must be equal to num_classes.
@@ -3754,7 +3757,7 @@ def nce_layer(input, label, num_classes, weight=None,
37543757
assert isinstance(neg_distribution, collections.Sequence)
37553758
assert len(neg_distribution) == num_classes
37563759
assert sum(neg_distribution) == 1
3757-
3760+
37583761
ipts_for_layer = []
37593762
parents = []
37603763
for each_input in input:

python/paddle/trainer_config_helpers/tests/configs/projections.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
with mixed_layer() as m7:
3737
m7 += conv_operator(img=img, filter=flt, num_filters=64,
38-
num_channel=1, filter_size=3)
38+
num_channels=1, filter_size=3)
3939

4040
end = mixed_layer(input=[full_matrix_projection(input=m5),
4141
trans_full_matrix_projection(input=m6),

python/paddle/trainer_config_helpers/tests/layers_test_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929
filter=y1,
3030
filter_size=1,
3131
num_filters=5,
32-
num_channel=5,
32+
num_channels=5,
3333
stride=1)])
3434

35+
assert z1.size > 0
36+
3537
y2 = fc_layer(input=y, size=15)
3638

3739
cos1 = cos_sim(a=x1, b=y1)

0 commit comments

Comments
 (0)