@@ -590,14 +590,17 @@ def __enter__(self):
590
590
def __exit__ (self , * args , ** kwargs ):
591
591
del args , kwargs # unused parameter to suppress warning
592
592
assert len (self .inputs ) != 0
593
- MixedLayer (
593
+ ml = MixedLayer (
594
594
name = self .name ,
595
595
size = self .size ,
596
596
active_type = self .activation .name ,
597
597
bias = ParamAttr .to_bias (self .bias_attr ),
598
598
inputs = self .inputs ,
599
599
** ExtraLayerAttribute .to_kwargs (self .layer_attr )
600
600
)
601
+ # update the size which might be computed inside MixedLayer
602
+ # according to the operator's output size
603
+ self .size = ml .config .size
601
604
602
605
603
606
@wrap_name_default ("mixed" )
@@ -2045,7 +2048,7 @@ def __reduce_concat_type__(a, b):
2045
2048
2046
2049
if layer_type == LayerType .CONCAT_LAYER :
2047
2050
assert not bias_attr
2048
-
2051
+
2049
2052
Layer (
2050
2053
name = name , type = layer_type ,
2051
2054
inputs = [x .name for x in input ] if is_concat_layer else input ,
@@ -2623,7 +2626,7 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
2623
2626
assert isinstance (input1 , LayerOutput )
2624
2627
assert isinstance (input2 , LayerOutput )
2625
2628
Layer (name = name ,
2626
- type = "out_prod" ,
2629
+ type = LayerType . OUT_PROD_LAYER ,
2627
2630
inputs = [input1 .name , input2 .name ],
2628
2631
** ExtraLayerAttribute .to_kwargs (layer_attr ))
2629
2632
return LayerOutput (name = name ,
@@ -2790,7 +2793,7 @@ def __real_step__(*args):
2790
2793
2791
2794
def __cost_input__ (input , label , weight = None ):
2792
2795
"""
2793
- inputs and parents for cost layers.
2796
+ inputs and parents for cost layers.
2794
2797
"""
2795
2798
ipts = [Input (input .name ), Input (label .name )]
2796
2799
parents = [input , label ]
@@ -2799,7 +2802,7 @@ def __cost_input__(input, label, weight=None):
2799
2802
ipts .append (Input (weight .name ))
2800
2803
parents .append (weight )
2801
2804
return ipts , parents
2802
-
2805
+
2803
2806
2804
2807
@wrap_name_default ()
2805
2808
@layer_support ()
@@ -2884,7 +2887,7 @@ def __add_evaluator__(e):
2884
2887
2885
2888
2886
2889
def conv_operator (img , filter , filter_size , num_filters ,
2887
- num_channel = None , stride = 1 , padding = 0 ,
2890
+ num_channels = None , stride = 1 , padding = 0 ,
2888
2891
filter_size_y = None , stride_y = None , padding_y = None ):
2889
2892
"""
2890
2893
Different from img_conv_layer, conv_op is an Operator, which can be used
@@ -2914,8 +2917,8 @@ def conv_operator(img, filter, filter_size, num_filters,
2914
2917
:type filter_size_y: int
2915
2918
:param num_filters: channel of output data.
2916
2919
:type num_filters: int
2917
- :param num_channel : channel of input data.
2918
- :type num_channel : int
2920
+ :param num_channels : channel of input data.
2921
+ :type num_channels : int
2919
2922
:param stride: The x dimension of the stride.
2920
2923
:type stride: int
2921
2924
:param stride_y: The y dimension of the stride.
@@ -2934,19 +2937,19 @@ def conv_operator(img, filter, filter_size, num_filters,
2934
2937
if padding_y is None :
2935
2938
padding_y = padding
2936
2939
2937
- if num_channel is None :
2938
- num_channel = img .num_filters
2940
+ if num_channels is None :
2941
+ num_channels = img .num_filters
2939
2942
2940
2943
assert isinstance (filter , LayerOutput )
2941
2944
if filter .size is not None :
2942
- filter .size = filter_size * filter_size_y * num_filters * num_channel
2945
+ filter .size = filter_size * filter_size_y * num_filters * num_channels
2943
2946
2944
2947
op = ConvOperator (input_layer_names = [img .name , filter .name ],
2945
2948
num_filters = num_filters ,
2946
2949
conv_conf = Conv (filter_size = filter_size ,
2947
2950
padding = padding ,
2948
2951
stride = stride ,
2949
- channels = num_channel ,
2952
+ channels = num_channels ,
2950
2953
filter_size_y = filter_size_y ,
2951
2954
padding_y = padding_y ,
2952
2955
stride_y = stride_y ,
@@ -2986,8 +2989,8 @@ def conv_projection(input, filter_size, num_filters,
2986
2989
:type filter_size_y: int
2987
2990
:param num_filters: channel of output data.
2988
2991
:type num_filters: int
2989
- :param num_channel : channel of input data.
2990
- :type num_channel : int
2992
+ :param num_channels : channel of input data.
2993
+ :type num_channels : int
2991
2994
:param stride: The x dimension of the stride.
2992
2995
:type stride: int
2993
2996
:param stride_y: The y dimension of the stride.
@@ -3478,15 +3481,15 @@ def maxout_layer(input,
3478
3481
- Input: output of a conv layer.
3479
3482
- Output: feature map size same as input. Channel is (input channel) / groups.
3480
3483
3481
- So groups should be larger than 1, and the num of channels should be able
3484
+ So groups should be larger than 1, and the num of channels should be able
3482
3485
to devided by groups.
3483
3486
3484
- Please refer to Paper:
3487
+ Please refer to Paper:
3485
3488
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
3486
3489
- Multi-digit Number Recognition from Street View \
3487
3490
Imagery using Deep Convolutional Neural Networks: \
3488
3491
https://arxiv.org/pdf/1312.6082v4.pdf
3489
-
3492
+
3490
3493
The simple usage is:
3491
3494
3492
3495
.. code-block:: python
@@ -3731,9 +3734,9 @@ def nce_layer(input, label, num_classes, weight=None,
3731
3734
:param weight: weight layer, can be None(default)
3732
3735
:type weight: LayerOutput
3733
3736
:param num_classes: number of classes.
3734
- :type num_classes: int
3737
+ :type num_classes: int
3735
3738
:param num_neg_samples: number of negative samples. Default is 10.
3736
- :type num_neg_samples: int
3739
+ :type num_neg_samples: int
3737
3740
:param neg_distribution: The distribution for generating the random negative labels.
3738
3741
A uniform distribution will be used if not provided.
3739
3742
If not None, its length must be equal to num_classes.
@@ -3754,7 +3757,7 @@ def nce_layer(input, label, num_classes, weight=None,
3754
3757
assert isinstance (neg_distribution , collections .Sequence )
3755
3758
assert len (neg_distribution ) == num_classes
3756
3759
assert sum (neg_distribution ) == 1
3757
-
3760
+
3758
3761
ipts_for_layer = []
3759
3762
parents = []
3760
3763
for each_input in input :
0 commit comments