Skip to content

Commit 9536c4e

Browse files
Merge pull request #7595 from wanghaoshuang/block_expand_py
Add python API for im2sequence op
2 parents d76fcb6 + d498d93 commit 9536c4e

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed

doc/api/v2/fluid/layers.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,11 @@ swish
505505
.. autofunction:: paddle.v2.fluid.layers.swish
506506
:noindex:
507507

508+
im2sequence
509+
------
510+
.. autofunction:: paddle.v2.fluid.layers.im2sequence
511+
:noindex:
512+
508513
edit_distance
509514
---------------
510515
.. autofunction:: paddle.v2.fluid.layers.edit_distance_error

python/paddle/v2/fluid/layers/nn.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
'warpctc',
6060
'sequence_reshape',
6161
'transpose',
62+
'im2sequence',
6263
'nce',
6364
]
6465

@@ -2391,3 +2392,128 @@ def transpose(x, perm, name=None):
23912392
outputs={'Out': [out]},
23922393
attrs={'axis': perm})
23932394
return out
2395+
2396+
2397+
def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
2398+
"""
2399+
Extracts image patches from the input tensor to form a tensor of shape
2400+
{input.batch_size * output_height * output_width, filter_size_H *
2401+
filter_size_W * input.channels} which is similar with im2col.
2402+
This op use filter / kernel to scan images and convert these images to
2403+
sequences. After expanding, the number of time step are
2404+
output_height * output_width for an image, in which output_height and
2405+
output_width are calculated by below equation:
2406+
2407+
.. math::
2408+
2409+
output\_size = 1 + \
2410+
(2 * padding + img\_size - block\_size + stride - 1) / stride
2411+
2412+
And the dimension of each time step is block_y * block_x * input.channels.
2413+
2414+
Args:
2415+
input (Variable): The input should be a tensor in NCHW format.
2416+
2417+
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
2418+
it must contain two integers, (filter_size_H, filter_size_W).
2419+
Otherwise, the filter will be a square.
2420+
2421+
stride(int|tuple): The stride size. If stride is a tuple, it must
2422+
contain two integers, (stride_H, stride_W). Otherwise, the
2423+
stride_H = stride_W = stride. Default: stride = 1.
2424+
2425+
padding(int|tuple): The padding size. If padding is a tuple, it can
2426+
contain two integers like (padding_H, padding_W) which means
2427+
padding_up = padding_down = padding_H and
2428+
padding_left = padding_right = padding_W. Or it can use
2429+
(padding_up, padding_left, padding_down, padding_right) to indicate
2430+
paddings of four direction. Otherwise, a scalar padding means
2431+
padding_up = padding_down = padding_left = padding_right = padding
2432+
Default: padding = 0.
2433+
2434+
name (int): The name of this layer. It is optional.
2435+
2436+
Returns:
2437+
output: The output is a LoDTensor with shape
2438+
{input.batch_size * output_height * output_width,
2439+
filter_size_H * filter_size_W * input.channels}.
2440+
If we regard output as a matrix, each row of this matrix is
2441+
a step of a sequence.
2442+
2443+
Examples:
2444+
2445+
As an example:
2446+
2447+
.. code-block:: text
2448+
2449+
Given:
2450+
2451+
x = [[[[ 6. 2. 1.]
2452+
[ 8. 3. 5.]
2453+
[ 0. 2. 6.]]
2454+
2455+
[[ 2. 4. 4.]
2456+
[ 6. 3. 0.]
2457+
[ 6. 4. 7.]]]
2458+
2459+
[[[ 6. 7. 1.]
2460+
[ 5. 7. 9.]
2461+
[ 2. 4. 8.]]
2462+
2463+
[[ 1. 2. 1.]
2464+
[ 1. 3. 5.]
2465+
[ 9. 0. 8.]]]]
2466+
2467+
x.dims = {2, 2, 3, 3}
2468+
2469+
And:
2470+
2471+
filter = [2, 2]
2472+
stride = [1, 1]
2473+
padding = [0, 0]
2474+
2475+
Then:
2476+
2477+
output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
2478+
[ 2. 1. 3. 5. 4. 4. 3. 0.]
2479+
[ 8. 3. 0. 2. 6. 3. 6. 4.]
2480+
[ 3. 5. 2. 6. 3. 0. 4. 7.]
2481+
[ 6. 7. 5. 7. 1. 2. 1. 3.]
2482+
[ 7. 1. 7. 9. 2. 1. 3. 5.]
2483+
[ 5. 7. 2. 4. 1. 3. 9. 0.]
2484+
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
2485+
2486+
output.dims = {8, 9}
2487+
2488+
output.lod = [[0, 4, 8]]
2489+
2490+
The simple usage is:
2491+
2492+
.. code-block:: python
2493+
2494+
output = fluid.layers.im2sequence(input=layer, stride=[1, 1], filter_size=[2, 2])
2495+
2496+
"""
2497+
2498+
if isinstance(filter_size, int):
2499+
filter_size = [filter_size, filter_size]
2500+
if isinstance(stride, int):
2501+
stride = [stride, stride]
2502+
if isinstance(padding, int):
2503+
padding = [padding, padding]
2504+
if len(padding) == 2:
2505+
padding.append(padding[0])
2506+
padding.append(padding[1])
2507+
2508+
helper = LayerHelper('im2sequence', **locals())
2509+
out = helper.create_tmp_variable(dtype=helper.input_dtype())
2510+
helper.append_op(
2511+
type='im2sequence',
2512+
inputs={'X': input},
2513+
outputs={'Out': out},
2514+
attrs={
2515+
'kernels': filter_size,
2516+
'strides': stride,
2517+
'paddings': padding,
2518+
})
2519+
return out

python/paddle/v2/fluid/tests/test_layers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,16 @@ def test_sequence_reshape(self):
226226
self.assertIsNotNone(out)
227227
print(str(program))
228228

229+
def test_im2sequence(self):
230+
print("test_im2sequence")
231+
program = Program()
232+
with program_guard(program):
233+
x = layers.data(name='x', shape=[3, 128, 128], dtype='float32')
234+
output = layers.im2sequence(
235+
input=x, stride=[1, 1], filter_size=[2, 2])
236+
self.assertIsNotNone(output)
237+
print(str(program))
238+
229239
@decorators.prog_scope()
230240
def test_nce(self):
231241
window_size = 5

0 commit comments

Comments
 (0)