@@ -592,14 +592,17 @@ def __enter__(self):
592
592
def __exit__ (self , * args , ** kwargs ):
593
593
del args , kwargs # unused parameter to suppress warning
594
594
assert len (self .inputs ) != 0
595
- MixedLayer (
595
+ ml = MixedLayer (
596
596
name = self .name ,
597
597
size = self .size ,
598
598
active_type = self .activation .name ,
599
599
bias = ParamAttr .to_bias (self .bias_attr ),
600
600
inputs = self .inputs ,
601
601
** ExtraLayerAttribute .to_kwargs (self .layer_attr )
602
602
)
603
+ # update the size which might be computed inside MixedLayer
604
+ # according to the operator's output size
605
+ self .size = ml .config .size
603
606
604
607
605
608
@wrap_name_default ("mixed" )
@@ -2104,7 +2107,7 @@ def __reduce_concat_type__(a, b):
2104
2107
2105
2108
if layer_type == LayerType .CONCAT_LAYER :
2106
2109
assert not bias_attr
2107
-
2110
+
2108
2111
Layer (
2109
2112
name = name , type = layer_type ,
2110
2113
inputs = [x .name for x in input ] if is_concat_layer else input ,
@@ -2682,7 +2685,7 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
2682
2685
assert isinstance (input1 , LayerOutput )
2683
2686
assert isinstance (input2 , LayerOutput )
2684
2687
Layer (name = name ,
2685
- type = "out_prod" ,
2688
+ type = LayerType . OUT_PROD_LAYER ,
2686
2689
inputs = [input1 .name , input2 .name ],
2687
2690
** ExtraLayerAttribute .to_kwargs (layer_attr ))
2688
2691
return LayerOutput (name = name ,
@@ -2849,7 +2852,7 @@ def __real_step__(*args):
2849
2852
2850
2853
def __cost_input__ (input , label , weight = None ):
2851
2854
"""
2852
- inputs and parents for cost layers.
2855
+ inputs and parents for cost layers.
2853
2856
"""
2854
2857
ipts = [Input (input .name ), Input (label .name )]
2855
2858
parents = [input , label ]
@@ -2858,7 +2861,7 @@ def __cost_input__(input, label, weight=None):
2858
2861
ipts .append (Input (weight .name ))
2859
2862
parents .append (weight )
2860
2863
return ipts , parents
2861
-
2864
+
2862
2865
2863
2866
@wrap_name_default ()
2864
2867
@layer_support ()
@@ -2943,7 +2946,7 @@ def __add_evaluator__(e):
2943
2946
2944
2947
2945
2948
def conv_operator (img , filter , filter_size , num_filters ,
2946
- num_channel = None , stride = 1 , padding = 0 ,
2949
+ num_channels = None , stride = 1 , padding = 0 ,
2947
2950
filter_size_y = None , stride_y = None , padding_y = None ):
2948
2951
"""
2949
2952
Different from img_conv_layer, conv_op is an Operator, which can be used
@@ -2973,8 +2976,8 @@ def conv_operator(img, filter, filter_size, num_filters,
2973
2976
:type filter_size_y: int
2974
2977
:param num_filters: channel of output data.
2975
2978
:type num_filters: int
2976
- :param num_channel : channel of input data.
2977
- :type num_channel : int
2979
+ :param num_channels : channel of input data.
2980
+ :type num_channels : int
2978
2981
:param stride: The x dimension of the stride.
2979
2982
:type stride: int
2980
2983
:param stride_y: The y dimension of the stride.
@@ -2993,19 +2996,19 @@ def conv_operator(img, filter, filter_size, num_filters,
2993
2996
if padding_y is None :
2994
2997
padding_y = padding
2995
2998
2996
- if num_channel is None :
2997
- num_channel = img .num_filters
2999
+ if num_channels is None :
3000
+ num_channels = img .num_filters
2998
3001
2999
3002
assert isinstance (filter , LayerOutput )
3000
3003
if filter .size is not None :
3001
- filter .size = filter_size * filter_size_y * num_filters * num_channel
3004
+ filter .size = filter_size * filter_size_y * num_filters * num_channels
3002
3005
3003
3006
op = ConvOperator (input_layer_names = [img .name , filter .name ],
3004
3007
num_filters = num_filters ,
3005
3008
conv_conf = Conv (filter_size = filter_size ,
3006
3009
padding = padding ,
3007
3010
stride = stride ,
3008
- channels = num_channel ,
3011
+ channels = num_channels ,
3009
3012
filter_size_y = filter_size_y ,
3010
3013
padding_y = padding_y ,
3011
3014
stride_y = stride_y ,
@@ -3045,8 +3048,8 @@ def conv_projection(input, filter_size, num_filters,
3045
3048
:type filter_size_y: int
3046
3049
:param num_filters: channel of output data.
3047
3050
:type num_filters: int
3048
- :param num_channel : channel of input data.
3049
- :type num_channel : int
3051
+ :param num_channels : channel of input data.
3052
+ :type num_channels : int
3050
3053
:param stride: The x dimension of the stride.
3051
3054
:type stride: int
3052
3055
:param stride_y: The y dimension of the stride.
@@ -3537,15 +3540,15 @@ def maxout_layer(input,
3537
3540
- Input: output of a conv layer.
3538
3541
- Output: feature map size same as input. Channel is (input channel) / groups.
3539
3542
3540
- So groups should be larger than 1, and the num of channels should be able
3543
+ So groups should be larger than 1, and the num of channels should be able
3541
3544
to devided by groups.
3542
3545
3543
- Please refer to Paper:
3546
+ Please refer to Paper:
3544
3547
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
3545
3548
- Multi-digit Number Recognition from Street View \
3546
3549
Imagery using Deep Convolutional Neural Networks: \
3547
3550
https://arxiv.org/pdf/1312.6082v4.pdf
3548
-
3551
+
3549
3552
The simple usage is:
3550
3553
3551
3554
.. code-block:: python
@@ -3790,9 +3793,9 @@ def nce_layer(input, label, num_classes, weight=None,
3790
3793
:param weight: weight layer, can be None(default)
3791
3794
:type weight: LayerOutput
3792
3795
:param num_classes: number of classes.
3793
- :type num_classes: int
3796
+ :type num_classes: int
3794
3797
:param num_neg_samples: number of negative samples. Default is 10.
3795
- :type num_neg_samples: int
3798
+ :type num_neg_samples: int
3796
3799
:param neg_distribution: The distribution for generating the random negative labels.
3797
3800
A uniform distribution will be used if not provided.
3798
3801
If not None, its length must be equal to num_classes.
@@ -3813,7 +3816,7 @@ def nce_layer(input, label, num_classes, weight=None,
3813
3816
assert isinstance (neg_distribution , collections .Sequence )
3814
3817
assert len (neg_distribution ) == num_classes
3815
3818
assert sum (neg_distribution ) == 1
3816
-
3819
+
3817
3820
ipts_for_layer = []
3818
3821
parents = []
3819
3822
for each_input in input :
0 commit comments