39
39
'chunk_eval' ,
40
40
'sequence_conv' ,
41
41
'conv2d' ,
42
+ 'conv3d' ,
42
43
'sequence_pool' ,
43
44
'sequence_softmax' ,
44
45
'softmax' ,
45
46
'pool2d' ,
47
+ 'pool3d' ,
46
48
'batch_norm' ,
47
49
'beam_search_decode' ,
48
50
'conv2d_transpose' ,
51
+ 'conv3d_transpose' ,
49
52
'sequence_expand' ,
50
53
'lstm_unit' ,
51
54
'reduce_sum' ,
@@ -1385,13 +1388,12 @@ def conv3d(input,
1385
1388
1386
1389
The convolution3D layer calculates the output based on the input, filter
1387
1390
and strides, paddings, dilations, groups parameters. Input(Input) and
1388
- Output(Output) are in NCHW format. Where N is batch size, C is the number of
1389
- channels, H is the height of the feature, and W is the width of the feature.
1390
- The details of convolution layer, please refer UFLDL's `convolution,
1391
- <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
1392
- If bias attribution and activation type are provided, bias is added to the
1393
- output of the convolution, and the corresponding activation function is
1394
- applied to the final result.
1391
+ Output(Output) are in NCDHW format. Where N is batch size C is the number of
1392
+ channels, D is the depth of the feature, H is the height of the feature,
1393
+ and W is the width of the feature. Convlution3D is similar with Convlution2D
1394
+ but adds one dimension(depth). If bias attribution and activation type are
1395
+ provided, bias is added to the output of the convolution, and the
1396
+ corresponding activation function is applied to the final result.
1395
1397
1396
1398
For each input :math:`X`, the equation is:
1397
1399
@@ -1401,8 +1403,8 @@ def conv3d(input,
1401
1403
1402
1404
In the above equation:
1403
1405
1404
- * :math:`X`: Input value, a tensor with NCHW format.
1405
- * :math:`W`: Filter value, a tensor with MCHW format.
1406
+ * :math:`X`: Input value, a tensor with NCDHW format.
1407
+ * :math:`W`: Filter value, a tensor with MCDHW format.
1406
1408
* :math:`\\ ast`: Convolution operation.
1407
1409
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
1408
1410
* :math:`\\ sigma`: Activation function.
@@ -1433,16 +1435,16 @@ def conv3d(input,
1433
1435
num_filters(int): The number of filter. It is as same as the output
1434
1436
image channel.
1435
1437
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
1436
- it must contain two integers, (filter_size_D, filter_size_H, filter_size_W).
1438
+ it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
1437
1439
Otherwise, the filter will be a square.
1438
1440
stride (int|tuple): The stride size. If stride is a tuple, it must
1439
- contain two integers, (stride_D, stride_H, stride_W). Otherwise, the
1441
+ contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
1440
1442
stride_D = stride_H = stride_W = stride. Default: stride = 1.
1441
1443
padding (int|tuple): The padding size. If padding is a tuple, it must
1442
- contain two integers, (padding_D, padding_H, padding_W). Otherwise, the
1444
+ contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
1443
1445
padding_D = padding_H = padding_W = padding. Default: padding = 0.
1444
1446
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
1445
- contain two integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
1447
+ contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
1446
1448
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
1447
1449
groups (int): The groups number of the Conv3d Layer. According to grouped
1448
1450
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
@@ -1528,7 +1530,7 @@ def _get_default_param_initializer():
1528
1530
'use_mkldnn' : use_mkldnn
1529
1531
})
1530
1532
1531
- pre_act = helper .append_bias_op (pre_bias , dim_start = 1 , dim_end = 3 )
1533
+ pre_act = helper .append_bias_op (pre_bias , dim_start = 1 , dim_end = 2 )
1532
1534
1533
1535
return helper .append_activation (pre_act )
1534
1536
@@ -1720,12 +1722,84 @@ def pool2d(input,
1720
1722
if not isinstance (use_cudnn , bool ):
1721
1723
raise ValueError ("use_cudnn should be True or False" )
1722
1724
1723
- helper = LayerHelper ('pool2d' , ** locals ())
1725
+ l_type = 'conv2d'
1726
+
1727
+ helper = LayerHelper (l_type , ** locals ())
1728
+ dtype = helper .input_dtype ()
1729
+ pool_out = helper .create_tmp_variable (dtype )
1730
+
1731
+ helper .append_op (
1732
+ type = l_type ,
1733
+ inputs = {"X" : input },
1734
+ outputs = {"Out" : pool_out },
1735
+ attrs = {
1736
+ "pooling_type" : pool_type ,
1737
+ "ksize" : pool_size ,
1738
+ "global_pooling" : global_pooling ,
1739
+ "strides" : pool_stride ,
1740
+ "paddings" : pool_padding ,
1741
+ "use_cudnn" : use_cudnn ,
1742
+ "ceil_mode" : ceil_mode ,
1743
+ "use_mkldnn" : use_mkldnn
1744
+ })
1745
+
1746
+ return pool_out
1747
+
1748
+
1749
+ def pool3d (input ,
1750
+ pool_size = - 1 ,
1751
+ pool_type = "max" ,
1752
+ pool_stride = 1 ,
1753
+ pool_padding = 0 ,
1754
+ global_pooling = False ,
1755
+ use_cudnn = True ,
1756
+ ceil_mode = False ,
1757
+ use_mkldnn = False ,
1758
+ name = None ):
1759
+ """
1760
+ This function adds the operator for pooling in 3-dimensions, using the
1761
+ pooling configurations mentioned in input parameters.
1762
+
1763
+ Args:
1764
+ input (Variable): ${input_comment}
1765
+ pool_size (int): ${ksize_comment}
1766
+ pool_type (str): ${pooling_type_comment}
1767
+ pool_stride (int): stride of the pooling layer.
1768
+ pool_padding (int): padding size.
1769
+ global_pooling (bool): ${global_pooling_comment}
1770
+ use_cudnn (bool): ${use_cudnn_comment}
1771
+ ceil_mode (bool): ${ceil_mode_comment}
1772
+ use_mkldnn (bool): ${use_mkldnn_comment}
1773
+ name (str): A name for this layer(optional). If set None, the layer
1774
+ will be named automatically.
1775
+
1776
+ Returns:
1777
+ Variable: output of pool3d layer.
1778
+ """
1779
+ if pool_type not in ["max" , "avg" ]:
1780
+ raise ValueError (
1781
+ "Unknown pool_type: '%s'. It can only be 'max' or 'avg'." ,
1782
+ str (pool_type ))
1783
+
1784
+ if global_pooling is False and pool_size == - 1 :
1785
+ raise ValueError (
1786
+ "When the global_pooling is False, pool_size must be passed "
1787
+ "and be a valid value. Received pool_size: " + str (pool_size ))
1788
+
1789
+ pool_size = utils .convert_to_list (pool_size , 3 , 'pool_size' )
1790
+ pool_padding = utils .convert_to_list (pool_padding , 3 , 'pool_padding' )
1791
+ pool_stride = utils .convert_to_list (pool_stride , 3 , 'pool_stride' )
1792
+
1793
+ if not isinstance (use_cudnn , bool ):
1794
+ raise ValueError ("use_cudnn should be True or False" )
1795
+
1796
+ l_type = "pool3d"
1797
+ helper = LayerHelper (l_type , ** locals ())
1724
1798
dtype = helper .input_dtype ()
1725
1799
pool_out = helper .create_tmp_variable (dtype )
1726
1800
1727
1801
helper .append_op (
1728
- type = "pool2d" ,
1802
+ type = l_type ,
1729
1803
inputs = {"X" : input },
1730
1804
outputs = {"Out" : pool_out },
1731
1805
attrs = {
@@ -2146,6 +2220,173 @@ def conv2d_transpose(input,
2146
2220
return out
2147
2221
2148
2222
2223
+ def conv3d_transpose (input ,
2224
+ num_filters ,
2225
+ output_size = None ,
2226
+ filter_size = None ,
2227
+ padding = 0 ,
2228
+ stride = 1 ,
2229
+ dilation = 1 ,
2230
+ groups = None ,
2231
+ param_attr = None ,
2232
+ bias_attr = None ,
2233
+ use_cudnn = True ,
2234
+ act = None ,
2235
+ name = None ):
2236
+ """
2237
+ **Convlution3D transpose layer**
2238
+
2239
+ The convolution3D transpose layer calculates the output based on the input,
2240
+ filter, and dilations, strides, paddings. Input(Input) and output(Output)
2241
+ are in NCDHW format. Where N is batch size, C is the number of channels,
2242
+ D is the depth of the feature, H is the height of the feature, and W
2243
+ is the width of the feature. Parameters(dilations, strides, paddings) are
2244
+ two elements. These two elements represent height and width, respectively.
2245
+ The details of convolution transpose layer, please refer to the following
2246
+ explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
2247
+
2248
+ For each input :math:`X`, the equation is:
2249
+
2250
+ .. math::
2251
+
2252
+ Out = W \\ ast X
2253
+
2254
+ In the above equation:
2255
+
2256
+ * :math:`X`: Input value, a tensor with NCDHW format.
2257
+ * :math:`W`: Filter value, a tensor with MCDHW format.
2258
+ * :math:`\\ ast` : Convolution transpose operation.
2259
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
2260
+ different.
2261
+
2262
+ Example:
2263
+
2264
+ - Input:
2265
+
2266
+ Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
2267
+
2268
+ Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$
2269
+
2270
+ - Output:
2271
+
2272
+ Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
2273
+
2274
+ Where
2275
+
2276
+ .. math::
2277
+
2278
+ D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\ \\
2279
+ H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\ \\
2280
+ W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
2281
+
2282
+ Args:
2283
+ input(Variable): The input image with [N, C, D, H, W] format.
2284
+ num_filters(int): The number of the filter. It is as same as the output
2285
+ image channel.
2286
+ output_size(int|tuple|None): The output image size. If output size is a
2287
+ tuple, it must contain three integers, (image_D, image_H, image_W). This
2288
+ parameter only works when filter_size is None.
2289
+ filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
2290
+ it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
2291
+ Otherwise, the filter will be a square. None if use output size to
2292
+ calculate filter_size.
2293
+ padding(int|tuple): The padding size. If padding is a tuple, it must
2294
+ contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
2295
+ padding_D = padding_H = padding_W = padding. Default: padding = 0.
2296
+ stride(int|tuple): The stride size. If stride is a tuple, it must
2297
+ contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
2298
+ stride_D = stride_H = stride_W = stride. Default: stride = 1.
2299
+ dilation(int|tuple): The dilation size. If dilation is a tuple, it must
2300
+ contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
2301
+ dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
2302
+ groups(int): The groups number of the Conv3d transpose layer. Inspired by
2303
+ grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
2304
+ when group=2, the first half of the filters is only connected to the
2305
+ first half of the input channels, while the second half of the
2306
+ filters is only connected to the second half of the input channels.
2307
+ Default: groups=1
2308
+ param_attr(ParamAttr): The parameters to the Conv3d_transpose Layer.
2309
+ Default: None
2310
+ bias_attr(ParamAttr): Bias parameter for the Conv3d layer. Default: None
2311
+ use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
2312
+ library is installed. Default: True
2313
+ act(str): Activation type. Default: None
2314
+ name(str|None): A name for this layer(optional). If set None, the layer
2315
+ will be named automatically.
2316
+
2317
+ Returns:
2318
+ Variable: The tensor variable storing the convolution transpose result.
2319
+
2320
+ Raises:
2321
+ ValueError: If the shapes of input, filter_size, stride, padding and
2322
+ groups mismatch.
2323
+
2324
+ Examples:
2325
+ .. code-block:: python
2326
+
2327
+ data = fluid.layers.data(
2328
+ name='data', shape=[3, 12, 32, 32], dtype='float32')
2329
+ conv2d_transpose = fluid.layers.conv3d_transpose(
2330
+ input=data, num_filters=2, filter_size=3)
2331
+ """
2332
+ l_type = "conv3d_transpose"
2333
+ helper = LayerHelper (l_type , ** locals ())
2334
+ if not isinstance (input , Variable ):
2335
+ raise TypeError ("Input of conv3d_transpose must be Variable" )
2336
+ input_channel = input .shape [1 ]
2337
+
2338
+ padding = utils .convert_to_list (padding , 3 , 'padding' )
2339
+ stride = utils .convert_to_list (stride , 3 , 'stride' )
2340
+ dilation = utils .convert_to_list (dilation , 3 , 'dilation' )
2341
+
2342
+ if not isinstance (use_cudnn , bool ):
2343
+ raise ValueError ("use_cudnn should be True or False" )
2344
+
2345
+ if filter_size is None :
2346
+ if output_size is None :
2347
+ raise ValueError ("output_size must be set when filter_size is None" )
2348
+ if isinstance (output_size , int ):
2349
+ output_size = [output_size , output_size ]
2350
+
2351
+ d_in = input .shape [2 ]
2352
+ h_in = input .shape [3 ]
2353
+ w_in = input .shape [4 ]
2354
+
2355
+ filter_size_d = (output_size [0 ] - (d_in - 1 ) * stride [0 ] + 2 *
2356
+ padding [0 ] - 1 ) / dilation [0 ] + 1
2357
+ filter_size_h = (output_size [1 ] - (h_in - 1 ) * stride [1 ] + 2 *
2358
+ padding [1 ] - 1 ) / dilation [1 ] + 1
2359
+ filter_size_w = (output_size [2 ] - (w_in - 1 ) * stride [2 ] + 2 *
2360
+ padding [2 ] - 1 ) / dilation [2 ] + 1
2361
+ filter_size = [filter_size_d , filter_size_h , filter_size_w ]
2362
+ else :
2363
+ filter_size = utils .convert_to_list (filter_size , 3 ,
2364
+ 'conv3d_transpose.filter_size' )
2365
+
2366
+ groups = 1 if groups is None else groups
2367
+ filter_shape = [input_channel , num_filters / groups ] + filter_size
2368
+ img_filter = helper .create_parameter (
2369
+ dtype = input .dtype , shape = filter_shape , attr = helper .param_attr )
2370
+
2371
+ pre_bias = helper .create_tmp_variable (dtype = input .dtype )
2372
+ helper .append_op (
2373
+ type = l_type ,
2374
+ inputs = {'Input' : [input ],
2375
+ 'Filter' : [img_filter ]},
2376
+ outputs = {'Output' : pre_bias },
2377
+ attrs = {
2378
+ 'strides' : stride ,
2379
+ 'paddings' : padding ,
2380
+ 'dilations' : dilation ,
2381
+ 'groups' : groups ,
2382
+ 'use_cudnn' : use_cudnn
2383
+ })
2384
+
2385
+ pre_act = helper .append_bias_op (pre_bias , dim_start = 1 , dim_end = 2 )
2386
+ out = helper .append_activation (pre_act )
2387
+ return out
2388
+
2389
+
2149
2390
def sequence_expand (x , y , ref_level = - 1 , name = None ):
2150
2391
"""Sequence Expand Layer. This layer will expand the input variable **x**
2151
2392
according to specified level lod of **y**. Please note that lod level of
0 commit comments