@@ -1753,42 +1753,71 @@ def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
17531753 AttrSizedOperandSegments]> {
17541754 let summary = "tensor pack operation";
17551755 let description = [{
1756- The pack operation converts an input tensor to a higher-dimensional tensor
1757- with a tiled and packed layout. The mandatory `inner_dims_pos` attribute
1758- specifies a permutation for the original dimensions, while `inner_tiles` is the
1759- tiling factor for each dimension. The optional attribute `outer_dims_perm`
1760- specifies the order for the tiled data dimension, while the attribute
1761- `padding_value` specifies a padding value at the boundary on non-perfectly
1762- divisible dimensions. Padding is optional:
1763- - If absent, it is UB if the tile does not perfectly divide the dimension.
1764- - If present, it will pad along high dimensions (high-padding) to make the
1765- tile complete.
1766-
1767- Example NC_to_NCnc:
1756+ The "pack" operation converts a source tensor of rank `n` into a result
1757+ tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
1758+ and optionally transposes the tiled source tensor dimensions.
1759+
1760+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
1761+ being tiled, where `0 < k <= n`. The order of the dimensions matters: the
1762+ tiled dimensions (of size `inner_tiles`) are added to the end of the result
1763+ tensor in the order in which they appear in `inner_dims_pos`.
1764+
1765+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
1766+ being tiled, where `0 < k <= n`. The order of the dimensions matters:
1767+ `inner_dims_pos[i]` specifies the source tensor dimension tiled by
1768+ `inner_tiles[i]`.
1769+
1770+ `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
1771+ correspond to the least significant ("inner") result tensor dimension sizes,
1772+ in the same order. Tile sizes can be static or dynamic.
1773+
1774+ Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
1775+ `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
1776+ by 16 and the 1st source dimension is tiled by 32. Other source dimensions
1777+ (if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is
1778+ tiled by 16 and the 0th dimension is tiled by 32.
17681779
1780+ Example:
17691781 ```mlir
1770- %0 = tensor.pack %source inner_dims_pos = [0, 1]
1771- inner_tiles = [8, 32] into %dest : tensor<128x256xf32> -> tensor<16x8x8x32xf32>
1782+ // NC to NCnc
1783+ %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
1784+ into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
1785+ // \ / \ /
1786+ // outer dims inner dims
17721787 ```
1773- Example CK to KCck
17741788
1789+ `outer_dims_perm` (optional) specifies a permutation for the outer
1790+ dimensions. If specified, it must have `n` elements.
1791+
1792+ Example:
17751793 ```mlir
1794+ // CK to KCck
17761795 %0 = tensor.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
1777- inner_tiles = [8, 32] into %dest : tensor<128x256xf32> -> tensor<8x16x8x32xf32>
1796+ inner_tiles = [8, 32] into %dest
1797+ : tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32>
1798+ // \ /
1799+ // compare with "NC to NCnc": outer dims are transposed
17781800 ```
17791801
1780- In all cases, dimension at position 0 in the input tensor (128) is tiled
1781- with a factor of 8, while dimension at position 1 (256) is tiled with a factor
1782- of 32. In the second example, the outer data dimensions are interchanged
1783- according to `outer_dims_perm`.
1784-
1785- Example NC_to_NCnc with padding:
1802+ `padding_value` specifies a padding value at the boundary on non-perfectly
1803+ divisible dimensions. Padding is optional:
1804+ - If absent, it is UB if the tile does not perfectly divide the dimension.
1805+ - If present, it will pad along high dimensions (high-padding) to make the
1806+ tile complete.
17861807
1808+ Example:
17871809 ```mlir
1788- %0 = tensor.pack %arg padding_value(%pad : f32) inner_dims_pos = [0, 1]
1789- inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
1810+ %0 = tensor.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0]
1811+ inner_dims_pos = [1] inner_tiles = [2] into %arg1
1812+ : tensor<200x127x256xf32> -> tensor<256x64x200x2xf32>
1813+ // \
1814+ // padded and tiled dim
1815+ //
1816+ // Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded
1817+ // element is added at the end.
1818+ //
1819+ // Note: Only tiled dimensions can be padded.
17901820 ```
1791-
17921821 }];
17931822 let arguments = (ins AnyRankedTensor:$source,
17941823 AnyRankedTensor:$dest,
@@ -1880,25 +1909,40 @@ def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
18801909def Tensor_UnPackOp : Tensor_RelayoutOp<"unpack"> {
18811910 let summary = "tensor unpack operation";
18821911 let description = [{
1883- The unpack operation converts a tensor with a tiled and packed layout to a
1884- lower-dimensional tensor. Similar to `pack`, the mandatory attributes
1885- `inner_dims_pos` specifies a permutation for the inner data dimensions, while
1886- `inner_tiles` is the tiling factor. The attribute `outer_dims_perm` has the
1887- exact behavior as the one described in `pack`. In `unpack`, it is UB if the
1888- tile does not perfectly divide the dimension.
1912+ The "unpack" operation converts a source tensor of rank `n` with a tiled and
1913+ packed layout to a result tensor of rank `n - k`.
1914+
1915+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
1916+ which the last `k` source tensor dimensions are combined, where
1917+ `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
1918+ The order of the dimensions in `inner_dims_pos` matters: dimension
1919+ `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
1920+ `outer_dims_perm` is not specified).
1921+
1922+ `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
1923+ correspond to the least significant ("inner") source tensor dimension sizes.
1924+ The behavior of this op is undefined if:
1925+ - `inner_tiles` do not exactly match with the corresponding source tensor
1926+ dimension sizes.
1927+ - Or, `inner_tiles[i]` does not divide the size of dimension
1928+ `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
1929+ evenly.
1930+
1931+ `outer_dims_perm` (optional) specifies a permutation for the outer
1932+ dimensions. If specified, it must have `n - k` elements. If specified, this
1933+ permutation is applied before combining any dimensions.
18891934
1890- Example NCnc_to_NC:
1891-
1892- ```mlir
1893- %0 = tensor.unpack %source inner_dims_pos = [0, 1]
1894- inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
1895- ```
1896-
1897- Example CK to KCck:
1935+ Example:
18981936
18991937 ```mlir
1900- %0 = tensor.unapck %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
1901- inner_tiles = [8, 32] into %dest : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
1938+ // NCnc to NC:
1939+ %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
1940+ into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
1941+
1942+ // CK to KCck:
1943+ %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
1944+ inner_tiles = [8, 32] into %dest
1945+ : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
19021946 ```
19031947 }];
19041948 let arguments = (ins AnyRankedTensor:$source,
0 commit comments