|
59 | 59 | 'warpctc',
|
60 | 60 | 'sequence_reshape',
|
61 | 61 | 'transpose',
|
| 62 | + 'im2sequence', |
62 | 63 | 'nce',
|
63 | 64 | ]
|
64 | 65 |
|
@@ -2391,3 +2392,128 @@ def transpose(x, perm, name=None):
|
2391 | 2392 | outputs={'Out': [out]},
|
2392 | 2393 | attrs={'axis': perm})
|
2393 | 2394 | return out
|
| 2395 | + |
| 2396 | + |
| 2397 | +def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): |
| 2398 | + """ |
| 2399 | + Extracts image patches from the input tensor to form a tensor of shape |
| 2400 | + {input.batch_size * output_height * output_width, filter_size_H * |
| 2401 | + filter_size_W * input.channels} which is similar with im2col. |
| 2402 | + This op use filter / kernel to scan images and convert these images to |
| 2403 | + sequences. After expanding, the number of time step are |
| 2404 | + output_height * output_width for an image, in which output_height and |
| 2405 | + output_width are calculated by below equation: |
| 2406 | +
|
| 2407 | + .. math:: |
| 2408 | +
|
| 2409 | + output\_size = 1 + \ |
| 2410 | + (2 * padding + img\_size - block\_size + stride - 1) / stride |
| 2411 | +
|
| 2412 | + And the dimension of each time step is block_y * block_x * input.channels. |
| 2413 | +
|
| 2414 | + Args: |
| 2415 | + input (Variable): The input should be a tensor in NCHW format. |
| 2416 | +
|
| 2417 | + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, |
| 2418 | + it must contain two integers, (filter_size_H, filter_size_W). |
| 2419 | + Otherwise, the filter will be a square. |
| 2420 | +
|
| 2421 | + stride(int|tuple): The stride size. If stride is a tuple, it must |
| 2422 | + contain two integers, (stride_H, stride_W). Otherwise, the |
| 2423 | + stride_H = stride_W = stride. Default: stride = 1. |
| 2424 | +
|
| 2425 | + padding(int|tuple): The padding size. If padding is a tuple, it can |
| 2426 | + contain two integers like (padding_H, padding_W) which means |
| 2427 | + padding_up = padding_down = padding_H and |
| 2428 | + padding_left = padding_right = padding_W. Or it can use |
| 2429 | + (padding_up, padding_left, padding_down, padding_right) to indicate |
| 2430 | + paddings of four direction. Otherwise, a scalar padding means |
| 2431 | + padding_up = padding_down = padding_left = padding_right = padding |
| 2432 | + Default: padding = 0. |
| 2433 | +
|
| 2434 | + name (int): The name of this layer. It is optional. |
| 2435 | +
|
| 2436 | + Returns: |
| 2437 | + output: The output is a LoDTensor with shape |
| 2438 | + {input.batch_size * output_height * output_width, |
| 2439 | + filter_size_H * filter_size_W * input.channels}. |
| 2440 | + If we regard output as a matrix, each row of this matrix is |
| 2441 | + a step of a sequence. |
| 2442 | +
|
| 2443 | + Examples: |
| 2444 | +
|
| 2445 | + As an example: |
| 2446 | +
|
| 2447 | + .. code-block:: text |
| 2448 | +
|
| 2449 | + Given: |
| 2450 | +
|
| 2451 | + x = [[[[ 6. 2. 1.] |
| 2452 | + [ 8. 3. 5.] |
| 2453 | + [ 0. 2. 6.]] |
| 2454 | +
|
| 2455 | + [[ 2. 4. 4.] |
| 2456 | + [ 6. 3. 0.] |
| 2457 | + [ 6. 4. 7.]]] |
| 2458 | +
|
| 2459 | + [[[ 6. 7. 1.] |
| 2460 | + [ 5. 7. 9.] |
| 2461 | + [ 2. 4. 8.]] |
| 2462 | +
|
| 2463 | + [[ 1. 2. 1.] |
| 2464 | + [ 1. 3. 5.] |
| 2465 | + [ 9. 0. 8.]]]] |
| 2466 | +
|
| 2467 | + x.dims = {2, 2, 3, 3} |
| 2468 | +
|
| 2469 | + And: |
| 2470 | +
|
| 2471 | + filter = [2, 2] |
| 2472 | + stride = [1, 1] |
| 2473 | + padding = [0, 0] |
| 2474 | +
|
| 2475 | + Then: |
| 2476 | +
|
| 2477 | + output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.] |
| 2478 | + [ 2. 1. 3. 5. 4. 4. 3. 0.] |
| 2479 | + [ 8. 3. 0. 2. 6. 3. 6. 4.] |
| 2480 | + [ 3. 5. 2. 6. 3. 0. 4. 7.] |
| 2481 | + [ 6. 7. 5. 7. 1. 2. 1. 3.] |
| 2482 | + [ 7. 1. 7. 9. 2. 1. 3. 5.] |
| 2483 | + [ 5. 7. 2. 4. 1. 3. 9. 0.] |
| 2484 | + [ 7. 9. 4. 8. 3. 5. 0. 8.]] |
| 2485 | +
|
| 2486 | + output.dims = {8, 9} |
| 2487 | +
|
| 2488 | + output.lod = [[0, 4, 8]] |
| 2489 | +
|
| 2490 | + The simple usage is: |
| 2491 | +
|
| 2492 | + .. code-block:: python |
| 2493 | +
|
| 2494 | + output = fluid.layers.im2sequence(input=layer, stride=[1, 1], filter_size=[2, 2]) |
| 2495 | +
|
| 2496 | + """ |
| 2497 | + |
| 2498 | + if isinstance(filter_size, int): |
| 2499 | + filter_size = [filter_size, filter_size] |
| 2500 | + if isinstance(stride, int): |
| 2501 | + stride = [stride, stride] |
| 2502 | + if isinstance(padding, int): |
| 2503 | + padding = [padding, padding] |
| 2504 | + if len(padding) == 2: |
| 2505 | + padding.append(padding[0]) |
| 2506 | + padding.append(padding[1]) |
| 2507 | + |
| 2508 | + helper = LayerHelper('im2sequence', **locals()) |
| 2509 | + out = helper.create_tmp_variable(dtype=helper.input_dtype()) |
| 2510 | + helper.append_op( |
| 2511 | + type='im2sequence', |
| 2512 | + inputs={'X': input}, |
| 2513 | + outputs={'Out': out}, |
| 2514 | + attrs={ |
| 2515 | + 'kernels': filter_size, |
| 2516 | + 'strides': stride, |
| 2517 | + 'paddings': padding, |
| 2518 | + }) |
| 2519 | + return out |
0 commit comments