Skip to content

Commit 9b1a17a

Browse files
chengduoabhinavarora
authored andcommitted
Refine conv2d_transpose layer doc (#6920)
* refine conv2d_transpose layer doc * fix conv2d_transpose doc * fix doc
1 parent cd25adb commit 9b1a17a

File tree

3 files changed

+121
-73
lines changed

3 files changed

+121
-73
lines changed

paddle/operators/conv_transpose_op.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,8 @@ The input(X) size and output(Out) size may be different.
160160
Output shape: $(N, C_{out}, H_{out}, W_{out})$
161161
Where
162162
$$
163-
H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + H_f \\
164-
W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + W_f
163+
H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\
164+
W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
165165
$$
166166
)DOC");
167167
}
@@ -249,9 +249,9 @@ The input(X) size and output(Out) size may be different.
249249
Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
250250
Where
251251
$$
252-
D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + D_f \\
253-
H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + H_f \\
254-
W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + W_f
252+
D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\
253+
H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\
254+
W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
255255
$$
256256
)DOC");
257257
}

paddle/operators/conv_transpose_op.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
141141
if (data_dim == 2U) {
142142
// col2im: col_matrix -> dy
143143
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
144-
col2im(dev_ctx, col, std::vector<int>{dilations[0], dilations[1]},
145-
strides, std::vector<int>{paddings[0], paddings[1], paddings[0],
146-
paddings[1]},
144+
col2im(dev_ctx, col, dilations, strides,
145+
std::vector<int>{paddings[0], paddings[1], paddings[0],
146+
paddings[1]},
147147
&output_batch);
148148
} else if (data_dim == 3U) {
149149
// col2vol: col_matrix -> dy
@@ -247,8 +247,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
247247
if (data_dim == 2U) {
248248
// im2col: dy -> col matrix
249249
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
250-
im2col(dev_ctx, output_grad_batch,
251-
std::vector<int>{dilations[0], dilations[1]}, strides,
250+
im2col(dev_ctx, output_grad_batch, dilations, strides,
252251
std::vector<int>{paddings[0], paddings[1], paddings[0],
253252
paddings[1]},
254253
&col);

python/paddle/v2/fluid/layers/nn.py

Lines changed: 112 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -790,60 +790,63 @@ def conv2d(input,
790790
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
791791
If bias attribution and activation type are provided, bias is added to the output of the convolution,
792792
and the corresponding activation function is applied to the final result.
793-
For each input :math:`X`, the equation is:
794793
794+
For each input :math:`X`, the equation is:
795795
796796
.. math::
797797
798798
Out = \sigma (W \\ast X + b)
799799
800800
In the above equation:
801801
802-
* :math:`X`: Input value, a tensor with NCHW format.
803-
* :math:`W`: Filter value, a tensor with MCHW format.
804-
* :math:`\\ast`: Convolution operation.
805-
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
806-
* :math:`\\sigma`: Activation function.
807-
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
802+
* :math:`X`: Input value, a tensor with NCHW format.
803+
* :math:`W`: Filter value, a tensor with MCHW format.
804+
* :math:`\\ast`: Convolution operation.
805+
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
806+
* :math:`\\sigma`: Activation function.
807+
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
808808
809809
Example:
810810
811-
Input:
812-
Input shape: $(N, C_{in}, H_{in}, W_{in})$
811+
- Input:
812+
813+
Input shape: $(N, C_{in}, H_{in}, W_{in})$
813814
814-
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
815+
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
816+
817+
- Output:
818+
Output shape: $(N, C_{out}, H_{out}, W_{out})$
815819
816-
Output:
817-
Output shape: $(N, C_{out}, H_{out}, W_{out})$
818820
Where
819-
.. math::
821+
822+
.. math::
820823
821824
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
822825
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
823826
824827
Args:
825-
input(Variable): The input image with [N, C, H, W] format.
826-
num_filters(int): The number of filter. It is as same as the output
827-
image channel.
828-
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
829-
it must contain two integers, (filter_size_H, filter_size_W).
830-
Otherwise, the filter will be a square.
831-
stride(int|tuple): The stride size. If stride is a tuple, it must
832-
contain two integers, (stride_H, stride_W). Otherwise, the
833-
stride_H = stride_W = stride. Default: stride = 1.
834-
padding(int|tuple): The padding size. If padding is a tuple, it must
835-
contain two integers, (padding_H, padding_W). Otherwise, the
836-
padding_H = padding_W = padding. Default: padding = 0.
837-
groups(int): The groups number of the Conv2d Layer. According to grouped
838-
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
839-
the first half of the filters is only connected to the first half
840-
of the input channels, while the second half of the filters is only
841-
connected to the second half of the input channels. Default: groups=1
842-
param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
843-
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
844-
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
845-
library is installed. Default: True
846-
act(str): Activation type. Default: None
828+
input(Variable): The input image with [N, C, H, W] format.
829+
num_filters(int): The number of filter. It is as same as the output
830+
image channel.
831+
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
832+
it must contain two integers, (filter_size_H, filter_size_W).
833+
Otherwise, the filter will be a square.
834+
stride(int|tuple): The stride size. If stride is a tuple, it must
835+
contain two integers, (stride_H, stride_W). Otherwise, the
836+
stride_H = stride_W = stride. Default: stride = 1.
837+
padding(int|tuple): The padding size. If padding is a tuple, it must
838+
contain two integers, (padding_H, padding_W). Otherwise, the
839+
padding_H = padding_W = padding. Default: padding = 0.
840+
groups(int): The groups number of the Conv2d Layer. According to grouped
841+
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
842+
the first half of the filters is only connected to the first half
843+
of the input channels, while the second half of the filters is only
844+
connected to the second half of the input channels. Default: groups=1
845+
param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
846+
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
847+
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
848+
library is installed. Default: True
849+
act(str): Activation type. Default: None
847850
848851
Returns:
849852
Variable: The tensor variable storing the convolution and \
@@ -858,7 +861,6 @@ def conv2d(input,
858861
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
859862
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
860863
"""
861-
862864
if stride is None:
863865
stride = [1, 1]
864866
helper = LayerHelper('conv2d', **locals())
@@ -1212,38 +1214,85 @@ def conv2d_transpose(input,
12121214
use_cudnn=True,
12131215
name=None):
12141216
"""
1215-
The transpose of conv2d layer.
1217+
**Convlution2D transpose layer**
1218+
1219+
The convolution2D transpose layer calculates the output based on the input,
1220+
filter, and dilations, strides, paddings. Input(Input) and output(Output)
1221+
are in NCHW format. Where N is batch size, C is the number of channels,
1222+
H is the height of the feature, and W is the width of the feature.
1223+
Parameters(dilations, strides, paddings) are two elements. These two elements
1224+
represent height and width, respectively. The details of convolution transpose
1225+
layer, please refer to the following explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
1226+
1227+
For each input :math:`X`, the equation is:
1228+
1229+
.. math::
1230+
1231+
Out = W \\ast X
1232+
1233+
In the above equation:
1234+
1235+
* :math:`X`: Input value, a tensor with NCHW format.
1236+
* :math:`W`: Filter value, a tensor with MCHW format.
1237+
* :math:`\\ast` : Convolution transpose operation.
1238+
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
1239+
1240+
Example:
12161241
1217-
This layer is also known as deconvolution layer.
1242+
- Input:
1243+
1244+
Input shape: $(N, C_{in}, H_{in}, W_{in})$
1245+
1246+
Filter shape: $(C_{in}, C_{out}, H_f, W_f)$
1247+
1248+
- Output:
1249+
1250+
Output shape: $(N, C_{out}, H_{out}, W_{out})$
1251+
1252+
Where
1253+
1254+
.. math::
1255+
1256+
H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
1257+
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
12181258
12191259
Args:
1220-
input(Variable): The input image with [N, C, H, W] format.
1221-
num_filters(int): The number of filter. It is as same as the output
1222-
image channel.
1223-
output_size(int|tuple|None): The output image size. If output size is a
1224-
tuple, it must contain two integers, (image_H, image_W). This
1225-
parameter only works when filter_size is None.
1226-
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
1227-
it must contain two integers, (filter_size_H, filter_size_W).
1228-
Otherwise, the filter will be a square. None if use output size to
1229-
calculate filter_size
1230-
padding(int|tuple): The padding size. If padding is a tuple, it must
1231-
contain two integers, (padding_H, padding_W). Otherwise, the
1232-
padding_H = padding_W = padding.
1233-
stride(int|tuple): The stride size. If stride is a tuple, it must
1234-
contain two integers, (stride_H, stride_W). Otherwise, the
1235-
stride_H = stride_W = stride.
1236-
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
1237-
contain two integers, (dilation_H, dilation_W). Otherwise, the
1238-
dilation_H = dilation_W = dilation.
1239-
param_attr: Parameter Attribute.
1240-
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
1241-
library is installed. Default: True
1242-
name(str|None): A name for this layer(optional). If set None, the layer
1243-
will be named automatically.
1260+
input(Variable): The input image with [N, C, H, W] format.
1261+
num_filters(int): The number of the filter. It is as same as the output
1262+
image channel.
1263+
output_size(int|tuple|None): The output image size. If output size is a
1264+
tuple, it must contain two integers, (image_H, image_W). This
1265+
parameter only works when filter_size is None.
1266+
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
1267+
it must contain two integers, (filter_size_H, filter_size_W).
1268+
Otherwise, the filter will be a square. None if use output size to
1269+
calculate filter_size.
1270+
padding(int|tuple): The padding size. If padding is a tuple, it must
1271+
contain two integers, (padding_H, padding_W). Otherwise, the
1272+
padding_H = padding_W = padding. Default: padding = 0.
1273+
stride(int|tuple): The stride size. If stride is a tuple, it must
1274+
contain two integers, (stride_H, stride_W). Otherwise, the
1275+
stride_H = stride_W = stride. Default: stride = 1.
1276+
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
1277+
contain two integers, (dilation_H, dilation_W). Otherwise, the
1278+
dilation_H = dilation_W = dilation. Default: dilation = 1.
1279+
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. Default: None
1280+
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
1281+
library is installed. Default: True
1282+
name(str|None): A name for this layer(optional). If set None, the layer
1283+
will be named automatically.
12441284
12451285
Returns:
1246-
Variable: Output image.
1286+
Variable: The tensor variable storing the convolution transpose result.
1287+
1288+
Raises:
1289+
ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch.
1290+
1291+
Examples:
1292+
.. code-block:: python
1293+
1294+
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
1295+
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
12471296
"""
12481297
helper = LayerHelper("conv2d_transpose", **locals())
12491298
if not isinstance(input, Variable):

0 commit comments

Comments
 (0)