@@ -1305,8 +1305,6 @@ def conv2d(input,
1305
1305
conv2d = fluid.layers.conv2d(
1306
1306
input=data, num_filters=2, filter_size=3, act="relu")
1307
1307
"""
1308
- if stride is None :
1309
- stride = [1 , 1 ]
1310
1308
1311
1309
num_channels = input .shape [1 ]
1312
1310
@@ -1369,6 +1367,172 @@ def _get_default_param_initializer():
1369
1367
return helper .append_activation (pre_act )
1370
1368
1371
1369
1370
+ def conv3d (input ,
1371
+ num_filters ,
1372
+ filter_size ,
1373
+ stride = 1 ,
1374
+ padding = 0 ,
1375
+ dilation = 1 ,
1376
+ groups = None ,
1377
+ param_attr = None ,
1378
+ bias_attr = None ,
1379
+ use_cudnn = True ,
1380
+ use_mkldnn = False ,
1381
+ act = None ,
1382
+ name = None ):
1383
+ """
1384
+ **Convlution3D Layer**
1385
+
1386
+ The convolution3D layer calculates the output based on the input, filter
1387
+ and strides, paddings, dilations, groups parameters. Input(Input) and
1388
+ Output(Output) are in NCHW format. Where N is batch size, C is the number of
1389
+ channels, H is the height of the feature, and W is the width of the feature.
1390
+ The details of convolution layer, please refer UFLDL's `convolution,
1391
+ <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
1392
+ If bias attribution and activation type are provided, bias is added to the
1393
+ output of the convolution, and the corresponding activation function is
1394
+ applied to the final result.
1395
+
1396
+ For each input :math:`X`, the equation is:
1397
+
1398
+ .. math::
1399
+
1400
+ Out = \sigma (W \\ ast X + b)
1401
+
1402
+ In the above equation:
1403
+
1404
+ * :math:`X`: Input value, a tensor with NCHW format.
1405
+ * :math:`W`: Filter value, a tensor with MCHW format.
1406
+ * :math:`\\ ast`: Convolution operation.
1407
+ * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
1408
+ * :math:`\\ sigma`: Activation function.
1409
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
1410
+ different.
1411
+
1412
+ Example:
1413
+
1414
+ - Input:
1415
+
1416
+ Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
1417
+
1418
+ Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
1419
+
1420
+ - Output:
1421
+ Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
1422
+
1423
+ Where
1424
+
1425
+ .. math::
1426
+
1427
+ D_{out}&= \\ frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\ \\
1428
+ H_{out}&= \\ frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\ \\
1429
+ W_{out}&= \\ frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
1430
+
1431
+ Args:
1432
+ input (Variable): The input image with [N, C, D, H, W] format.
1433
+ num_filters(int): The number of filter. It is as same as the output
1434
+ image channel.
1435
+ filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
1436
+ it must contain two integers, (filter_size_D, filter_size_H, filter_size_W).
1437
+ Otherwise, the filter will be a square.
1438
+ stride (int|tuple): The stride size. If stride is a tuple, it must
1439
+ contain two integers, (stride_D, stride_H, stride_W). Otherwise, the
1440
+ stride_D = stride_H = stride_W = stride. Default: stride = 1.
1441
+ padding (int|tuple): The padding size. If padding is a tuple, it must
1442
+ contain two integers, (padding_D, padding_H, padding_W). Otherwise, the
1443
+ padding_D = padding_H = padding_W = padding. Default: padding = 0.
1444
+ dilation (int|tuple): The dilation size. If dilation is a tuple, it must
1445
+ contain two integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
1446
+ dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
1447
+ groups (int): The groups number of the Conv3d Layer. According to grouped
1448
+ convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
1449
+ the first half of the filters is only connected to the first half
1450
+ of the input channels, while the second half of the filters is only
1451
+ connected to the second half of the input channels. Default: groups=1
1452
+ param_attr (ParamAttr): The parameters to the Conv3d Layer. Default: None
1453
+ bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
1454
+ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
1455
+ library is installed. Default: True
1456
+ use_mkldnn (bool): Use mkldnn kernels or not.
1457
+ act (str): Activation type. Default: None
1458
+ name (str|None): A name for this layer(optional). If set None, the layer
1459
+ will be named automatically.
1460
+
1461
+ Returns:
1462
+ Variable: The tensor variable storing the convolution and \
1463
+ non-linearity activation result.
1464
+
1465
+ Raises:
1466
+ ValueError: If the shapes of input, filter_size, stride, padding and
1467
+ groups mismatch.
1468
+
1469
+ Examples:
1470
+ .. code-block:: python
1471
+
1472
+ data = fluid.layers.data(
1473
+ name='data', shape=[3, 12, 32, 32], dtype='float32')
1474
+ conv2d = fluid.layers.conv3d(
1475
+ input=data, num_filters=2, filter_size=3, act="relu")
1476
+ """
1477
+
1478
+ l_type = 'conv3d'
1479
+
1480
+ helper = LayerHelper (l_type , ** locals ())
1481
+ dtype = helper .input_dtype ()
1482
+
1483
+ num_channels = input .shape [1 ]
1484
+
1485
+ if groups is None :
1486
+ num_filter_channels = num_channels
1487
+ else :
1488
+ if num_channels % groups != 0 :
1489
+ raise ValueError ("num_channels must be divisible by groups." )
1490
+ num_filter_channels = num_channels / groups
1491
+
1492
+ filter_size = utils .convert_to_list (filter_size , 3 , 'filter_size' )
1493
+ stride = utils .convert_to_list (stride , 3 , 'stride' )
1494
+ padding = utils .convert_to_list (padding , 3 , 'padding' )
1495
+ dilation = utils .convert_to_list (dilation , 3 , 'dilation' )
1496
+
1497
+ if not isinstance (use_cudnn , bool ):
1498
+ raise ValueError ("use_cudnn should be True or False" )
1499
+
1500
+ input_shape = input .shape
1501
+ filter_shape = [num_filters , num_filter_channels ] + filter_size
1502
+
1503
+ def _get_default_param_initializer ():
1504
+ std = (2.0 / (filter_size [0 ]** 3 * num_channels ))** 0.5
1505
+ return Normal (0.0 , std , 0 )
1506
+
1507
+ filter_param = helper .create_parameter (
1508
+ attr = helper .param_attr ,
1509
+ shape = filter_shape ,
1510
+ dtype = dtype ,
1511
+ default_initializer = _get_default_param_initializer ())
1512
+
1513
+ pre_bias = helper .create_tmp_variable (dtype )
1514
+
1515
+ helper .append_op (
1516
+ type = l_type ,
1517
+ inputs = {
1518
+ 'Input' : input ,
1519
+ 'Filter' : filter_param ,
1520
+ },
1521
+ outputs = {"Output" : pre_bias },
1522
+ attrs = {
1523
+ 'strides' : stride ,
1524
+ 'paddings' : padding ,
1525
+ 'dilations' : dilation ,
1526
+ 'groups' : groups ,
1527
+ 'use_cudnn' : use_cudnn ,
1528
+ 'use_mkldnn' : use_mkldnn
1529
+ })
1530
+
1531
+ pre_act = helper .append_bias_op (pre_bias , dim_start = 1 , dim_end = 3 )
1532
+
1533
+ return helper .append_activation (pre_act )
1534
+
1535
+
1372
1536
def sequence_pool (input , pool_type ):
1373
1537
"""
1374
1538
This function add the operator for sequence pooling.
0 commit comments