Fix comments

banach-space · banach-space · commit 8ba75d4900e2 · 2025-10-13T08:25:49.000Z
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1650,11 +1650,12 @@ struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
 /// Rewrites a linalg::PackOp into a sequence of:
 ///   * tensor::PadOp + linalg::TransposeOp + tensor::EmptyOp +
 ///     tensor::InsertSliceOp ops.
+/// (InsertSliceOp is rank-expanding).
 ///
-/// Requires that all the tiled outer dims of the input linalg::PackOp are 1.
-/// Note that this constraint means to effectively one tile is packed.
+/// Requires that all the tiled-outer-dims of the input linalg::PackOp are 1.
+/// Note that this constraint means that effectively exactly one tile is packed.
 ///
-/// In addition, assumes that the un-tiled outer dims are not permuted.
+/// In addition, assumes that the un-tiled-outer-dims are not permuted.
 ///
 /// Before:
 /// ```
@@ -1690,11 +1691,13 @@ struct DecomposeOuterUnitDimsPackOpPattern
                                 PatternRewriter &rewriter) const override;
 };
 
-/// Rewrites a linalg::UnPackOp into a sequence of rank-reduced
+/// Rewrites a linalg::UnPackOp into a sequence of:
 ///   * tensor::ExtractSliceOp + linalg::TransposeOp + tensor::InsertSliceOp
+/// (ExtractSliceOp is rank-reducing).
 ///
-/// Requires that all the tiled outer dims of the input linalg::PackOp are 1.
-/// Note that this constraint means to effectively one tile is unpacked.
+/// Requires that all the tiled-outer-dims of the input linalg::UnPackOp are 1.
+/// Note that this constraint means that effectively exactly one tile is
+/// unpacked.
 ///
 /// Before:
 /// ```
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -1143,15 +1143,18 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
   ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
   auto outerDimsPerm = packOp.getOuterDimsPerm();
 
-  // Verify that there are no non-unit un-tiled outer dims that are permuted.
-  // Supporting such cases will require refining the logic to generate the
-  // Transpose Op.
+  // Verify that there are no:
+  //   * non-unit + un-tiled-outer-dims,
+  // that are permuted. Supporting such cases would require refining the logic
+  // that generates the Transpose Op.
   if (!llvm::all_of(outerDimsPerm, [&innerDimsPos, &packOp](int64_t dim) {
         static int prev = 0;
-        // Tiled dims are not relevant here.
+        // Skip tiled dims - these can be permuted.
         if (llvm::is_contained(innerDimsPos, dim))
           return true;
-        // Was this dim permuted? Note, permuting unit dims is fine.
+
+        // Check whether this dim has been permuted. Permuting unit dims is fine
+        // as that's effectively a no-op.
         if (dim < prev && (packOp.getType().getShape()[prev] != 1 ||
                            packOp.getType().getShape()[dim] != 1))
           return false;
@@ -1182,8 +1185,7 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
   // Assumptions made:
   //  - All tiled outer dims are 1 - the corresponding transposition order
   //    doesn't matter, but requires all dim indices to be present.
-  //  - Un-tiled outer dims remain un-permuted. (TODO: Fail when this does not
-  //    hold)
+  //  - Un-tiled outer dims remain un-permuted.
 
   // 2.1 Get the permutation for linalg.transpose:
   //   [ untiled-dims, inner-dims-pos ]
@@ -1240,16 +1242,15 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
   auto transposedOp = linalg::TransposeOp::create(rewriter, loc, input, empty,
                                                   srcPermForTranspose);
 
-  // 3. Insert the inner tile to the destination:
+  // 3. Insert the inner tile into the destination tensor:
   //  %inserted_tile = tensor.insert_slice(%transposed_tile)
 
   // Compute the sizes attribute:
   //    [ outer-dims, tile-sizes ]
   // Note that the output from the transpose Op excludes the tiled outer dims.
-  // Given the assumptions (all tiled outer dims == 1), we can safely use a
-  // rank-expanding tensor.insert_slice. Rather than manually computing where to
-  // insert new unit dims (resulting from the expansion), use the Pack op
-  // attributes.
+  // However, given the assumption that:
+  //  * all tiled outer dims == 1,
+  // we can just use a rank-expanding tensor.insert_slice.
   SmallVector<OpFoldResult> writeSizes;
   for (auto size : packOp.getAllOuterDims()) {
     writeSizes.push_back(rewriter.getIndexAttr(size));
@@ -1261,10 +1262,11 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
     writeSizes.push_back(tileSizeOfr);
   }
 
+  // TODO: Add a constructor for tensor.insert_slice that doesn't require
+  // strides nor offsets.
   SmallVector<OpFoldResult> writeStrides(destRank, oneIdxAttr);
   SmallVector<OpFoldResult> writeOffsets(destRank, zeroIdxAttr);
 
-  // TODO: A constructor that doesn't require strides nor offsets.
   auto insert = tensor::InsertSliceOp::create(
       rewriter, loc, transposedOp.getResult()[0], packOp.getDest(),
       writeOffsets, writeSizes, writeStrides);
diff --git a/mlir/test/Dialect/Linalg/decompose-pack.mlir b/mlir/test/Dialect/Linalg/decompose-pack.mlir
@@ -340,16 +340,17 @@ func.func @pack_with_non_adjacent_and_non_permuted_inner_dims(%arg0: tensor<8x1x
 // CHECK:         return %[[INSERT]]
 
 // -----
-/// Note "126", which is a non-unit tile-outer-dim. This is not supported.
+
+/// Note "126", which is a non-unit tiled-outer-dim. This is not supported.
 
 func.func @negative_non_unit_tiled_outer_dim(%dest: tensor<1x126x1x1x8xf32>, %src: tensor<1x1x1x1001xf32>, %pad: f32) -> tensor<1x126x1x1x8xf32> {
   %pack = linalg.pack %src
     padding_value(%pad : f32)
     outer_dims_perm = [0, 3, 2, 1]
     inner_dims_pos = [3]
     inner_tiles = [8]
-    into %dest : tensor<1x1x1x1001xf32>
-    -> tensor<1x126x1x1x8xf32>
+    into %dest
+    : tensor<1x1x1x1001xf32> -> tensor<1x126x1x1x8xf32>
 
   return %pack : tensor<1x126x1x1x8xf32>
 }