@@ -93,19 +93,20 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
9393 tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
9494 and optionally transposes the tiled source tensor dimensions.
9595
96- `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
97- being tiled, where `0 < k <= n`.
98- - `inner_dims_pos[i]` specifies the source tensor dimension tiled by
99- `inner_tiles[i]` where `0 <= i < k`.
100- - the resulting tiled source dimension maps to an outer dimension of the
101- packed tensor in the order the non-tiled dimension appeared in the source
102- tensor, i.e. `shape(result)[inner_dims_pos[i]]` is equal to
103- `shape(source)[inner_dims_pos[i]] / inner_tiles[i]`.
104-
10596 `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
10697 correspond to the least significant ("inner") result tensor dimension sizes,
10798 in the same order. Tile sizes can be static or dynamic.
10899
100+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
101+ being tiled, where `0 <= k <= n`.
102+ - `inner_dims_pos[i]` specifies the source tensor dimension tiled by
103+ `inner_tiles[i]` where `0 <= i < k`.
104+ - The tiled dimensions (of size `inner_tiles`) are added to the end of the
105+ result tensor in the order in which they appear, i.e.
106+ `shape(result)[rank(result) + i] = inner_tiles[i]` for `0 <= i < k`.
107+ - The following relationship for the tiled dimensions holds:
108+ `shape(result)[inner_dims_pos[i]] = shape(source)[inner_dims_pos[i]] / inner_tiles[i]`.
109+
109110 Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
110111 `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
111112 by 16 and the 1st source dimension is tiled by 32. Other source dimensions
@@ -118,17 +119,19 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
118119 %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
119120 into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
120121 // \ / \ /
121- // outer dims inner dims
122+ // Outer Dims: 16x8 Inner Dims: 8x32
123+
122124 // CHW to CHWhw
123125 %0 = linalg.pack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2]
124- into %dest : tensor<1x8x16xf32> -> tensor<1x2x4 x 4x2 xf32>
125- // \ / \ /
126- // outer dims inner dims
126+ into %dest : tensor<3x20x24xf32> -> tensor<3x10x6 x 4x2 xf32>
127+ // \ / \ /
128+ // Outer Dims: 3x10x6 Inner Dims: 4x2
129+
127130 // HCW to HCWhw
128131 %0 = linalg.pack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2]
129- into %dest : tensor<20x1x12xf32 > -> tensor<10x1x3 x 4x2xf32 >
130- // \ / \ /
131- // Outer Dims: 10x1x3 Inner Dims: 4x2
132+ into %dest : tensor<18x3x32xf32 > -> tensor<9x3x8 x 4x2 xf32 >
133+ // \ / \ /
134+ // Outer Dims: 9x3x8 Inner Dims: 4x2
132135 ```
133136
134137 `outer_dims_perm` (optional) specifies a permutation for the outer
@@ -258,15 +261,6 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
258261 The "unpack" operation converts a source tensor of rank `n` with a tiled and
259262 packed layout to a result tensor of rank `n - k`.
260263
261- `inner_dims_pos` (mandatory) specifies `k` result tensor dimensions that
262- were tiled with the `inner_tiles` to create the packed source tensor. The
263- source tensor dimensions can be combined given `inner_dims_pos` as follows:
264- the inner tile `shape(source)[n-k+i]` is combined with
265- `shape(source)[inner_dims_pos[i]]` where `0 <= i < k` and stored at
266- `shape(result)[inner_dims_pos[i]]`. The remaining dimensions are
267- `shape(result)[j] = shape(source)[j]` where `0 <= j < n-k` and `j` is not in
268- the set of `inner_dims_pos` indices.
269-
270264 `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
271265 correspond to the least significant ("inner") source tensor dimension sizes.
272266 The behavior of this op is undefined if:
@@ -276,36 +270,50 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
276270 `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
277271 evenly.
278272
273+ `inner_dims_pos` (mandatory) specifies `k` result tensor (i.e. unpacked
274+ tensor) dimensions that were tiled with the `inner_tiles` to create the
275+ packed source tensor. The source tensor (i.e. packed tensor) dimensions can
276+ be unpacked given `inner_dims_pos` as follows.
277+ - For `0 <= i < k` the following relationship holds:
278+ `shape(result)[inner_dims_pos[i]] = shape(source)[n-k+i] + shape(source)[inner_dims_pos[i]]`.
279+ - For `0 <= j < n-k` and `j` not in `inner_dims_pos` the following relationship holds:
280+ `shape(result)[j] = shape(source)[j]`.
281+
279282 `outer_dims_perm` (optional) specifies a permutation for the outer
280283 dimensions. If specified, it must have `n - k` elements. If specified, this
281284 permutation is applied before combining any dimensions.
282285
283- Note, the amount of elements in the source (packed tensor) and the result
284- (unpacked) can be unequal, i.e. `SizeOf(source) >= SizeOf(result)`. As
285- the unpack operation may drop any padding introduced by the pack operation .
286+ Note, the unpack operation may drop any padding introduced by the pack
287+ operation and hence the following holds
288+ `NumElementsOf(source) >= NumElementsOf(result)` .
286289
287290 Examples:
288291
289292 ```mlir
290293 // NCnc to NC:
291294 %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
292- into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
295+ into %dest : tensor<16x8 x 8x32 xf32> -> tensor<128x256xf32>
296+ // \ / \ /
297+ // Outer Dims: 16x8 Inner Dims: 8x32
293298
294299 // CK to KCck:
295300 %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
296- inner_tiles = [8, 32] into %dest
297- : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
301+ inner_tiles = [8, 32]
302+ into %dest : tensor<8x16 x 8x32 xf32> -> tensor<128x256xf32>
303+ // \ / \ /
304+ // Outer Dims: 8x16 Inner Dims: 8x32
298305
299306 // CHW to CHWhw:
300307 %0 = linalg.unpack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2]
301- into %dest : tensor<1x3x2x4x2xf32> -> tensor<1x5x7xf32>
302- // / \
303- // Outer Dims: 1x3x2 Inner Dims: 4x2
308+ into %dest : tensor<3x10x6 x 4x2 xf32> -> tensor<3x20x24xf32>
309+ // \ / \ /
310+ // Outer Dims: 3x10x6 Inner Dims: 4x2
311+
304312 // HCW to HCWhw
305313 %0 = linalg.unpack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2]
306- into %dest : tensor<10x1x3 x 4x2xf32 > -> tensor<20x1x12xf32 >
307- // / \
308- // Outer Dims: 10x1x3 Inner Dims: 4x2
314+ into %dest : tensor<9x3x8 x 4x2 xf32 > -> tensor<18x3x32xf32 >
315+ // \ / \ /
316+ // Outer Dims: 9x3x8 Inner Dims: 4x2
309317 ```
310318 }];
311319 let arguments = (ins AnyRankedTensor:$source,
0 commit comments