[Dispatch Creation] Drop unit dims from tensor.extract ops (#22503)

IanWood1 · web-flow · commit dca3747642d6 · 2025-11-05T10:06:29.000-08:00
Adds pattern for `tensor.extract` to fold unit dimensions. Without this pattern there will be reshapes left in the program which may re-introduce the unit dims when propagated. The added xfail already has an issue #20011. --------- Signed-off-by: Ian Wood <ianwood@u.northwestern.edu>
diff --git a/compiler/src/iree/compiler/DispatchCreation/FoldUnitExtentDims.cpp b/compiler/src/iree/compiler/DispatchCreation/FoldUnitExtentDims.cpp
@@ -26,6 +26,7 @@
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
@@ -204,11 +205,58 @@ struct DropUnitDimsFromCollapseOfExpand
   }
 };
 
-} // namespace
+// Fold unit dims from `tensor.extract` ops.
+struct FoldUnitDimsFromExtractOp : OpRewritePattern<tensor::ExtractOp> {
+  using Base::Base;
+  LogicalResult matchAndRewrite(tensor::ExtractOp extractOp,
+                                PatternRewriter &rewriter) const override {
+    RankedTensorType srcType = extractOp.getTensor().getType();
+    if (srcType.getShape().empty() ||
+        llvm::none_of(srcType.getShape(),
+                      [](int64_t size) { return size == 1; })) {
+      return failure();
+    }
+    SmallVector<Value> oldIndices = extractOp.getIndices();
+
+    SmallVector<int64_t> newShape;
+    SmallVector<Value> newIndices;
+    SmallVector<ReassociationIndices> reassoc;
+    ReassociationIndices currReassoc;
+
+    // Build reassociation groups where each non-unit dimension forms one output
+    // dimension, and unit dimensions are grouped with adjacent non-unit dims.
+    for (auto [idx, size] : llvm::enumerate(srcType.getShape())) {
+      currReassoc.push_back(idx);
+
+      if (size != 1) {
+        // Non-unit dimension: this forms one output dimension
+        // Finish current group and start a new one
+        reassoc.push_back(std::move(currReassoc));
+        currReassoc.clear();
+        newShape.push_back(size);
+        newIndices.push_back(oldIndices[idx]);
+      }
+    }
 
-//===----------------------------------------------------------------------===//
-// Pass helpers
-//===----------------------------------------------------------------------===//
+    // If we have trailing unit dims, merge them with the last group
+    if (!currReassoc.empty() && !reassoc.empty()) {
+      reassoc.back().append(currReassoc.begin(), currReassoc.end());
+    }
+
+    rewriter.setInsertionPointAfterValue(extractOp.getTensor());
+    auto collapseOp = tensor::CollapseShapeOp::create(
+        rewriter, extractOp.getLoc(), extractOp.getTensor(), reassoc);
+
+    rewriter.setInsertionPointAfter(extractOp);
+    auto newExtract = tensor::ExtractOp::create(
+        rewriter, extractOp.getLoc(), extractOp.getResult().getType(),
+        collapseOp.getResult(), newIndices);
+    rewriter.replaceOp(extractOp, newExtract);
+    return success();
+  }
+};
+
+} // namespace
 
 static void
 populatefoldUnitDimsPatterns(RewritePatternSet &foldUnitDimsPatterns) {
@@ -230,8 +278,9 @@ populatefoldUnitDimsPatterns(RewritePatternSet &foldUnitDimsPatterns) {
   IREE::LinalgExt::populateFoldUnitExtentDimsPatterns(foldUnitDimsPatterns,
                                                       options);
   linalg::populateMoveInitOperandsToInputPattern(foldUnitDimsPatterns);
-  foldUnitDimsPatterns.insert<DropUnitDimsFromCollapseOfExpand>(
-      foldUnitDimsPatterns.getContext());
+  foldUnitDimsPatterns
+      .insert<DropUnitDimsFromCollapseOfExpand, FoldUnitDimsFromExtractOp>(
+          foldUnitDimsPatterns.getContext());
 }
 
 static LogicalResult
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/fold_unit_dims.mlir b/compiler/src/iree/compiler/DispatchCreation/test/fold_unit_dims.mlir
@@ -344,3 +344,117 @@ util.func @collapse_of_expand_preserved_trailing_unit_dims(%arg0: tensor<1x23040
 //       CHECK:   %[[COLLAPSE:.+]] = tensor.collapse_shape %[[EXPAND]]
 //  CHECK-SAME:     tensor<1x4x5760x1xbf16> into tensor<4x5760x1xbf16>
 //       CHECK:   util.return %[[COLLAPSE]] : tensor<4x5760x1xbf16>
+
+// -----
+
+util.func @fold_unit_dims_from_extract_leading(%arg0: tensor<1x4x8xf32>, %idx0: index, %idx1: index, %idx2: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2] : tensor<1x4x8xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_leading
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x4x8xf32>
+//  CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX2:[a-zA-Z0-9]+]]: index
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2]{{\]}}
+//  CHECK-SAME:     tensor<1x4x8xf32> into tensor<4x8xf32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]][%[[IDX1]], %[[IDX2]]]
+//       CHECK:   util.return %[[EXTRACT]] : f32
+
+// -----
+
+util.func @fold_unit_dims_from_extract_trailing(%arg0: tensor<4x8x1xf32>, %idx0: index, %idx1: index, %idx2: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2] : tensor<4x8x1xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_trailing
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<4x8x1xf32>
+//  CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX2:[a-zA-Z0-9]+]]: index
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]{{\]}}
+//  CHECK-SAME:     tensor<4x8x1xf32> into tensor<4x8xf32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]][%[[IDX0]], %[[IDX1]]]
+//       CHECK:   util.return %[[EXTRACT]] : f32
+
+// -----
+
+util.func @fold_unit_dims_from_extract_middle(%arg0: tensor<4x1x8xf32>, %idx0: index, %idx1: index, %idx2: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2] : tensor<4x1x8xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_middle
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<4x1x8xf32>
+//  CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX2:[a-zA-Z0-9]+]]: index
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]{{\]}}
+//  CHECK-SAME:     tensor<4x1x8xf32> into tensor<4x8xf32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]][%[[IDX0]], %[[IDX2]]]
+//       CHECK:   util.return %[[EXTRACT]] : f32
+
+// -----
+
+util.func @fold_unit_dims_from_extract_multiple(%arg0: tensor<1x4x1x8x1xf32>, %idx0: index, %idx1: index, %idx2: index, %idx3: index, %idx4: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2, %idx3, %idx4] : tensor<1x4x1x8x1xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_multiple
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x4x1x8x1xf32>
+//  CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX2:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX3:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX4:[a-zA-Z0-9]+]]: index
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3, 4]{{\]}}
+//  CHECK-SAME:     tensor<1x4x1x8x1xf32> into tensor<4x8xf32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]]
+//       CHECK:   util.return %[[EXTRACT]] : f32
+
+// -----
+
+// Test folding consecutive unit dims from tensor.extract
+util.func @fold_unit_dims_from_extract_consecutive(%arg0: tensor<1x1x1x8xf32>, %idx0: index, %idx1: index, %idx2: index, %idx3: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2, %idx3] : tensor<1x1x1x8xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_consecutive
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x1x1x8xf32>
+//  CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX2:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX3:[a-zA-Z0-9]+]]: index
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3]{{\]}}
+//  CHECK-SAME:     tensor<1x1x1x8xf32> into tensor<8xf32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]][%[[IDX3]]]
+//       CHECK:   util.return %[[EXTRACT]] : f32
+
+// -----
+
+// Test folding unit dims with dynamic dimensions
+util.func @fold_unit_dims_from_extract_dynamic(%arg0: tensor<1x?x1xf32>, %idx0: index, %idx1: index, %idx2: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2] : tensor<1x?x1xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_dynamic
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x?x1xf32>
+//  CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]: index
+//  CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]: index
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2]{{\]}}
+//  CHECK-SAME:     tensor<1x?x1xf32> into tensor<?xf32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]][%[[IDX1]]]
+//       CHECK:   util.return %[[EXTRACT]] : f32
+
+// -----
+
+util.func @fold_unit_dims_from_extract_all_unit(%arg0: tensor<1x1x1xf32>, %idx0: index, %idx1: index, %idx2: index) -> f32 {
+  %extracted = tensor.extract %arg0[%idx0, %idx1, %idx2] : tensor<1x1x1xf32>
+  util.return %extracted : f32
+}
+// CHECK-LABEL: util.func public @fold_unit_dims_from_extract_all_unit
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x1x1xf32>
+//       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] []
+//  CHECK-SAME:     tensor<1x1x1xf32> into tensor<f32>
+//       CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[COLLAPSED]]
+//  CHECK-SAME:     tensor<f32>
+//       CHECK:   util.return %[[EXTRACT]] : f32
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O0.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O0.json
@@ -346,6 +346,7 @@
     "onnx/node/generated/test_reduce_sum_empty_set_non_reduced_axis_zero",
     "onnx/node/generated/test_resize_downsample_scales_cubic_align_corners",
     "onnx/node/generated/test_resize_downsample_scales_linear_align_corners",
+    "onnx/node/generated/test_reversesequence_time",
     "onnx/node/generated/test_scan_sum",
     "onnx/node/generated/test_sce_mean_weight",
     "onnx/node/generated/test_sce_mean_weight_ii",
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O2.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O2.json
@@ -352,6 +352,7 @@
     "onnx/node/generated/test_reduce_sum_empty_set_non_reduced_axis_zero",
     "onnx/node/generated/test_resize_downsample_scales_cubic_align_corners",
     "onnx/node/generated/test_resize_downsample_scales_linear_align_corners",
+    "onnx/node/generated/test_reversesequence_time",
     "onnx/node/generated/test_scan_sum",
     "onnx/node/generated/test_sce_mean_weight",
     "onnx/node/generated/test_sce_mean_weight_ii",