Skip to content

Commit 3ab3253

Browse files
committed
Add conv3d Python API
1 parent 431491a commit 3ab3253

File tree

1 file changed

+166
-2
lines changed
  • python/paddle/fluid/layers

1 file changed

+166
-2
lines changed

python/paddle/fluid/layers/nn.py

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,8 +1305,6 @@ def conv2d(input,
13051305
conv2d = fluid.layers.conv2d(
13061306
input=data, num_filters=2, filter_size=3, act="relu")
13071307
"""
1308-
if stride is None:
1309-
stride = [1, 1]
13101308

13111309
num_channels = input.shape[1]
13121310

@@ -1369,6 +1367,172 @@ def _get_default_param_initializer():
13691367
return helper.append_activation(pre_act)
13701368

13711369

1370+
def conv3d(input,
1371+
num_filters,
1372+
filter_size,
1373+
stride=1,
1374+
padding=0,
1375+
dilation=1,
1376+
groups=None,
1377+
param_attr=None,
1378+
bias_attr=None,
1379+
use_cudnn=True,
1380+
use_mkldnn=False,
1381+
act=None,
1382+
name=None):
1383+
"""
1384+
**Convlution3D Layer**
1385+
1386+
The convolution3D layer calculates the output based on the input, filter
1387+
and strides, paddings, dilations, groups parameters. Input(Input) and
1388+
Output(Output) are in NCHW format. Where N is batch size, C is the number of
1389+
channels, H is the height of the feature, and W is the width of the feature.
1390+
The details of convolution layer, please refer UFLDL's `convolution,
1391+
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
1392+
If bias attribution and activation type are provided, bias is added to the
1393+
output of the convolution, and the corresponding activation function is
1394+
applied to the final result.
1395+
1396+
For each input :math:`X`, the equation is:
1397+
1398+
.. math::
1399+
1400+
Out = \sigma (W \\ast X + b)
1401+
1402+
In the above equation:
1403+
1404+
* :math:`X`: Input value, a tensor with NCHW format.
1405+
* :math:`W`: Filter value, a tensor with MCHW format.
1406+
* :math:`\\ast`: Convolution operation.
1407+
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
1408+
* :math:`\\sigma`: Activation function.
1409+
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
1410+
different.
1411+
1412+
Example:
1413+
1414+
- Input:
1415+
1416+
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
1417+
1418+
Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
1419+
1420+
- Output:
1421+
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
1422+
1423+
Where
1424+
1425+
.. math::
1426+
1427+
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
1428+
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
1429+
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
1430+
1431+
Args:
1432+
input (Variable): The input image with [N, C, D, H, W] format.
1433+
num_filters(int): The number of filter. It is as same as the output
1434+
image channel.
1435+
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
1436+
it must contain two integers, (filter_size_D, filter_size_H, filter_size_W).
1437+
Otherwise, the filter will be a square.
1438+
stride (int|tuple): The stride size. If stride is a tuple, it must
1439+
contain two integers, (stride_D, stride_H, stride_W). Otherwise, the
1440+
stride_D = stride_H = stride_W = stride. Default: stride = 1.
1441+
padding (int|tuple): The padding size. If padding is a tuple, it must
1442+
contain two integers, (padding_D, padding_H, padding_W). Otherwise, the
1443+
padding_D = padding_H = padding_W = padding. Default: padding = 0.
1444+
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
1445+
contain two integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
1446+
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
1447+
groups (int): The groups number of the Conv3d Layer. According to grouped
1448+
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
1449+
the first half of the filters is only connected to the first half
1450+
of the input channels, while the second half of the filters is only
1451+
connected to the second half of the input channels. Default: groups=1
1452+
param_attr (ParamAttr): The parameters to the Conv3d Layer. Default: None
1453+
bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
1454+
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
1455+
library is installed. Default: True
1456+
use_mkldnn (bool): Use mkldnn kernels or not.
1457+
act (str): Activation type. Default: None
1458+
name (str|None): A name for this layer(optional). If set None, the layer
1459+
will be named automatically.
1460+
1461+
Returns:
1462+
Variable: The tensor variable storing the convolution and \
1463+
non-linearity activation result.
1464+
1465+
Raises:
1466+
ValueError: If the shapes of input, filter_size, stride, padding and
1467+
groups mismatch.
1468+
1469+
Examples:
1470+
.. code-block:: python
1471+
1472+
data = fluid.layers.data(
1473+
name='data', shape=[3, 12, 32, 32], dtype='float32')
1474+
conv2d = fluid.layers.conv3d(
1475+
input=data, num_filters=2, filter_size=3, act="relu")
1476+
"""
1477+
1478+
l_type = 'conv3d'
1479+
1480+
helper = LayerHelper(l_type, **locals())
1481+
dtype = helper.input_dtype()
1482+
1483+
num_channels = input.shape[1]
1484+
1485+
if groups is None:
1486+
num_filter_channels = num_channels
1487+
else:
1488+
if num_channels % groups != 0:
1489+
raise ValueError("num_channels must be divisible by groups.")
1490+
num_filter_channels = num_channels / groups
1491+
1492+
filter_size = utils.convert_to_list(filter_size, 3, 'filter_size')
1493+
stride = utils.convert_to_list(stride, 3, 'stride')
1494+
padding = utils.convert_to_list(padding, 3, 'padding')
1495+
dilation = utils.convert_to_list(dilation, 3, 'dilation')
1496+
1497+
if not isinstance(use_cudnn, bool):
1498+
raise ValueError("use_cudnn should be True or False")
1499+
1500+
input_shape = input.shape
1501+
filter_shape = [num_filters, num_filter_channels] + filter_size
1502+
1503+
def _get_default_param_initializer():
1504+
std = (2.0 / (filter_size[0]**3 * num_channels))**0.5
1505+
return Normal(0.0, std, 0)
1506+
1507+
filter_param = helper.create_parameter(
1508+
attr=helper.param_attr,
1509+
shape=filter_shape,
1510+
dtype=dtype,
1511+
default_initializer=_get_default_param_initializer())
1512+
1513+
pre_bias = helper.create_tmp_variable(dtype)
1514+
1515+
helper.append_op(
1516+
type=l_type,
1517+
inputs={
1518+
'Input': input,
1519+
'Filter': filter_param,
1520+
},
1521+
outputs={"Output": pre_bias},
1522+
attrs={
1523+
'strides': stride,
1524+
'paddings': padding,
1525+
'dilations': dilation,
1526+
'groups': groups,
1527+
'use_cudnn': use_cudnn,
1528+
'use_mkldnn': use_mkldnn
1529+
})
1530+
1531+
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=3)
1532+
1533+
return helper.append_activation(pre_act)
1534+
1535+
13721536
def sequence_pool(input, pool_type):
13731537
"""
13741538
This function add the operator for sequence pooling.

0 commit comments

Comments
 (0)