diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td index 1e48a5e3a20ee..c384e8b638382 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td @@ -93,17 +93,21 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [ tensor of rank `n + k` with a tiled and packed layout (maybe with padding) and optionally transposes the tiled source tensor dimensions. - `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are - being tiled, where `0 < k <= n`. The order of the dimensions matters: - - The tiled dimensions (of size `inner_tiles`) are added to the end of the result - tensor in the order in which they appear in `inner_dims_pos`. - - `inner_dims_pos[i]` specifies the source tensor dimension tiled by - `inner_tiles[i]`. - `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes correspond to the least significant ("inner") result tensor dimension sizes, in the same order. Tile sizes can be static or dynamic. + `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are + being tiled, where `0 <= k <= n`. + - `inner_dims_pos[i]` specifies the source tensor dimension tiled by + `inner_tiles[i]` where `0 <= i < k`. All the values in `inner_dims_pos` are + within [0, n). + - The tiled dimensions (of size `inner_tiles`) are added to the end of the + result tensor in the order in which they appear, i.e. + `shape(result)[rank(result) + i] = inner_tiles[i]` for `0 <= i < k`. + - The following relationship for the tiled dimensions holds: + `shape(result)[inner_dims_pos[i]] = shape(source)[inner_dims_pos[i]] / inner_tiles[i]`. + Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled by 16 and the 1st source dimension is tiled by 32. Other source dimensions @@ -116,7 +120,19 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [ %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32> // \ / \ / - // outer dims inner dims + // Outer Dims: 16x8 Inner Dims: 8x32 + + // CHW to CHWhw + %0 = linalg.pack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2] + into %dest : tensor<3x20x24xf32> -> tensor<3x10x6 x 4x2 xf32> + // \ / \ / + // Outer Dims: 3x10x6 Inner Dims: 4x2 + + // HCW to HCWhw + %0 = linalg.pack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2] + into %dest : tensor<18x3x32xf32> -> tensor<9x3x8 x 4x2 xf32> + // \ / \ / + // Outer Dims: 9x3x8 Inner Dims: 4x2 ``` `outer_dims_perm` (optional) specifies a permutation for the outer @@ -246,13 +262,6 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> { The "unpack" operation converts a source tensor of rank `n` with a tiled and packed layout to a result tensor of rank `n - k`. - `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with - which the last `k` source tensor dimensions are combined, where - `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`. - The order of the dimensions in `inner_dims_pos` matters: dimension - `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that - `outer_dims_perm` is not specified). - `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes correspond to the least significant ("inner") source tensor dimension sizes. The behavior of this op is undefined if: @@ -262,21 +271,50 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> { `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified) evenly. + `inner_dims_pos` (mandatory) specifies `k` result tensor (i.e. unpacked + tensor) dimensions that were tiled with the `inner_tiles` to create the + packed source tensor. The source tensor (i.e. packed tensor) dimensions can + be unpacked given `inner_dims_pos` as follows. + - For `0 <= i < k` the following relationship holds: + `shape(result)[inner_dims_pos[i]] <= shape(source)[n-k+i] * shape(source)[inner_dims_pos[i]]`. + - For `0 <= j < n-k` and `j` not in `inner_dims_pos` the following relationship holds: + `shape(result)[j] = shape(source)[j]`. + `outer_dims_perm` (optional) specifies a permutation for the outer dimensions. If specified, it must have `n - k` elements. If specified, this permutation is applied before combining any dimensions. - Example: + Note, the unpack operation may drop any padding introduced by the pack + operation and hence the following holds + `NumElementsOf(source) >= NumElementsOf(result)`. + + Examples: ```mlir // NCnc to NC: %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32] - into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32> + into %dest : tensor<16x8 x 8x32 xf32> -> tensor<128x256xf32> + // \ / \ / + // Outer Dims: 16x8 Inner Dims: 8x32 // CK to KCck: %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest - : tensor<8x16x8x32xf32> -> tensor<128x256xf32> + inner_tiles = [8, 32] + into %dest : tensor<8x16 x 8x32 xf32> -> tensor<128x256xf32> + // \ / \ / + // Outer Dims: 8x16 Inner Dims: 8x32 + + // CHW to CHWhw: + %0 = linalg.unpack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2] + into %dest : tensor<3x10x6 x 4x2 xf32> -> tensor<3x20x24xf32> + // \ / \ / + // Outer Dims: 3x10x6 Inner Dims: 4x2 + + // HCW to HCWhw + %0 = linalg.unpack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2] + into %dest : tensor<9x3x8 x 4x2 xf32> -> tensor<18x3x32xf32> + // \ / \ / + // Outer Dims: 9x3x8 Inner Dims: 4x2 ``` }]; let arguments = (ins AnyRankedTensor:$source, diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index cbc863699ba9e..17f25a800d17e 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -1824,6 +1824,16 @@ func.func @unpack_invalid_outer_dims_perm(%source: tensor<128x256xf32>, %dest: t // ----- +// The outer dims in the output tensor are incorrectly/unexpectedly transposed. +// This could be fixed by adding `outer_dims_perm = [1, 0]` (the default value assumes no transpose). +func.func @pack_invalid_result_shape(%input: tensor<256x128xf32>, %output: tensor<4x16x32x16xf32>) -> tensor<4x16x32x16xf32> { + // expected-error@+1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<16x4x32x16xf32>', got 'tensor<4x16x32x16xf32>'}} + %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [32, 16] into %output : tensor<256x128xf32> -> tensor<4x16x32x16xf32> + return %0 : tensor<4x16x32x16xf32> +} + +// ----- + func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> { // expected-error@+1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x8x16x32xf32>', got 'tensor<8x8x32x16xf32>'}} %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32> diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir index 470bc1c78640c..412f40d501154 100644 --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -2771,6 +2771,101 @@ func.func @pad_and_pack_partially_dynamic(%source: tensor, %dest: tenso // ----- +func.func @pack_transposed_inner_dims_with_padding(%source: tensor<1x5x7xf32>, %dest: tensor<1x3x2x4x2xf32>, %pad: f32) -> tensor<1x3x2x4x2xf32> { + %0 = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [4, 2] into %dest : tensor<1x5x7xf32> -> tensor<1x3x2x4x2xf32> + return %0 : tensor<1x3x2x4x2xf32> +} + +// CHECK-LABEL: func.func @pack_transposed_inner_dims_with_padding( +// CHECK-SAME: %[[SOURCE:.*]]: tensor<1x5x7xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<1x3x2x4x2xf32>, +// CHECK-SAME: %[[PAD:.*]]: f32) +// CHECK: %{{.*}} = linalg.pack +// CHECK-SAME: inner_dims_pos = [2, 1] +// CHECK-SAME: inner_tiles = [4, 2] +// CHECK-SAME: into %[[DEST]] : tensor<1x5x7xf32> -> tensor<1x3x2x4x2xf32> + +// ----- + +// The function suffix "with_padding" refers to the padding that was introduced by the pack operation. But here +// we are dropping the padding. Creating a tensor with less elements than what we started with. +func.func @unpack_descending_inner_dims_with_padding(%source: tensor<1x3x2x4x2xf32>, %dest: tensor<1x5x7xf32>) -> tensor<1x5x7xf32> { + %0 = linalg.unpack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2] into %dest : tensor<1x3x2x4x2xf32> -> tensor<1x5x7xf32> + return %0 : tensor<1x5x7xf32> +} + +// CHECK-LABEL: func.func @unpack_descending_inner_dims_with_padding( +// CHECK-SAME: %[[SOURCE:.*]]: tensor<1x3x2x4x2xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<1x5x7xf32>) +// CHECK: %{{.*}} = linalg.unpack +// CHECK-SAME: inner_dims_pos = [2, 1] +// CHECK-SAME: inner_tiles = [4, 2] +// CHECK-SAME: into %[[DEST]] : tensor<1x3x2x4x2xf32> -> tensor<1x5x7xf32> + +// ----- + +func.func @pack_non_adjacent_inner_dims(%source: tensor<20x1x12xf32>, %dest: tensor<10x1x3x4x2xf32>) -> tensor<10x1x3x4x2xf32> { + %0 = linalg.pack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2] into %dest : tensor<20x1x12xf32> -> tensor<10x1x3x4x2xf32> + return %0 : tensor<10x1x3x4x2xf32> +} + +// CHECK-LABEL: func.func @pack_non_adjacent_inner_dims( +// CHECK-SAME: %[[SOURCE:.*]]: tensor<20x1x12xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<10x1x3x4x2xf32>) +// CHECK: %{{.*}} = linalg.pack +// CHECK-SAME: inner_dims_pos = [2, 0] +// CHECK-SAME: inner_tiles = [4, 2] +// CHECK-SAME: into %[[DEST]] : tensor<20x1x12xf32> -> tensor<10x1x3x4x2xf32> + +// ----- + +func.func @unpack_non_adjacent_inner_dims(%source: tensor<10x1x3x4x2xf32>, %dest: tensor<20x1x12xf32>) -> tensor<20x1x12xf32> { + %0 = linalg.unpack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2] into %dest : tensor<10x1x3x4x2xf32> -> tensor<20x1x12xf32> + return %0 : tensor<20x1x12xf32> +} + +// CHECK-LABEL: func.func @unpack_non_adjacent_inner_dims( +// CHECK-SAME: %[[SOURCE:.*]]: tensor<10x1x3x4x2xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<20x1x12xf32>) +// CHECK: %{{.*}} = linalg.unpack +// CHECK-SAME: inner_dims_pos = [2, 0] +// CHECK-SAME: inner_tiles = [4, 2] +// CHECK-SAME: into %[[DEST]] : tensor<10x1x3x4x2xf32> -> tensor<20x1x12xf32> + +// ----- + +func.func @pack_implementing_transpose(%source: tensor<3x5x7xf32>, %dest: tensor<3x7x5xf32>) -> tensor<3x7x5xf32> { + %0 = linalg.pack %source outer_dims_perm = [0, 2, 1] inner_dims_pos = [] inner_tiles = [] into %dest : tensor<3x5x7xf32> -> tensor<3x7x5xf32> + return %0 : tensor<3x7x5xf32> +} + +// CHECK-LABEL: func.func @pack_implementing_transpose( +// CHECK-SAME: %[[SOURCE:.*]]: tensor<3x5x7xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<3x7x5xf32>) +// CHECK: %{{.*}} = linalg.pack +// CHECK-SAME: outer_dims_perm = [0, 2, 1] +// CHECK-SAME: inner_dims_pos = [] +// CHECK-SAME: inner_tiles = [] +// CHECK-SAME: into %[[DEST]] : tensor<3x5x7xf32> -> tensor<3x7x5xf32> + +// ----- + +func.func @unpack_implementing_transpose(%source: tensor<3x7x5xf32>, %dest: tensor<3x5x7xf32>) -> tensor<3x5x7xf32> { + %0 = linalg.unpack %source outer_dims_perm = [0, 2, 1] inner_dims_pos = [] inner_tiles = [] into %dest : tensor<3x7x5xf32> -> tensor<3x5x7xf32> + return %0 : tensor<3x5x7xf32> +} + +// CHECK-LABEL: func.func @unpack_implementing_transpose( +// CHECK-SAME: %[[SOURCE:.*]]: tensor<3x7x5xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<3x5x7xf32>) +// CHECK: %{{.*}} = linalg.unpack +// CHECK-SAME: outer_dims_perm = [0, 2, 1] +// CHECK-SAME: inner_dims_pos = [] +// CHECK-SAME: inner_tiles = [] +// CHECK-SAME: into %[[DEST]] : tensor<3x7x5xf32> -> tensor<3x5x7xf32> + +// ----- + func.func @unpack_fully_dynamic(%source: tensor, %dest: tensor, %tile_n : index, %tile_m : index) -> tensor { %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor return %0 : tensor