[Codegen] Handle multiple dyn dims in tensor load pattern (iree-org#22328)

IanWood1 · weidel-p · commit ca4c5c541d20 · 2025-10-21T02:43:28.000-07:00
Fix compile error when `FoldExpandShapeIntoInterfaceTensorLoad` tries to fold an expand shape with multiple dyn dims into a `iree_tensor_ext.dispatch.tensor.load` op. This change tries to use the output shape SSA values from the expand shape when the output shape cannot be inferred. Fixes iree-org#22324 --------- Signed-off-by: Ian Wood <ianwood@u.northwestern.edu> Signed-off-by: Philipp <philipp.weidel@intel.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/ReshapePatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/ReshapePatterns.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/RegionUtils.h"
 
 #define DEBUG_TYPE "iree-codegen-reshape-patterns"
 
@@ -207,12 +208,22 @@ struct FoldExpandShapeIntoInterfaceTensorLoad
     auto currStaticDims = loadOp.getType().getShape();
     auto currOfrDynamicDims =
         mlir::getMixedValues(currStaticDims, currDynamicDims, rewriter);
+
+    // Try to infer the expanded shape. This only works if each reassociation
+    // has <=1 dyn dim.
     std::optional<SmallVector<OpFoldResult>> expandedDims =
         mlir::inferExpandShapeOutputShape(
             rewriter, subspanOp.getLoc(), reshapeOp.getType(),
             reshapeOp.getReassociationIndices(), currOfrDynamicDims);
     if (!expandedDims) {
-      return reshapeOp.emitOpError("failure in expanded shape");
+      // If inference fails, try to use the reshape's SSA values.
+      if (failed(mlir::moveValueDefinitions(
+              rewriter, reshapeOp.getOutputShape(), subspanOp))) {
+        return rewriter.notifyMatchFailure(reshapeOp,
+                                           "could not infer output shape or "
+                                           "move SSA values before subspan op");
+      }
+      expandedDims = reshapeOp.getMixedOutputShape();
     }
 
     auto tensorAccess =
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/fold_reshape_into_interface_tensor.mlir b/compiler/src/iree/compiler/Codegen/Common/test/fold_reshape_into_interface_tensor.mlir
@@ -47,6 +47,54 @@ func.func @fold_expand_into_loads_dynamic() -> tensor<2x?x16x32xf32> {
 //  CHECK-SAME:       offsets = [0, 0, 0, 0], sizes = [2, %[[SHAPE]], 16, 32], strides = [1, 1, 1, 1]
 //  CHECK-SAME:       !iree_tensor_ext.dispatch.tensor<readonly:tensor<2x?x16x32xf32>>{%[[SHAPE]]}
 
+// -----
+
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+    #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">], flags = Indirect>
+func.func @fold_expand_into_loads_fully_dynamic() -> tensor<?x?xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
+  %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
+  %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
+  %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
+      flags("ReadOnly|Indirect") : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?xf32>>{%0}
+  %4 = iree_tensor_ext.dispatch.tensor.load %3, offsets = [0], sizes = [%0], strides = [1]
+      : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?xf32>>{%0} -> tensor<?xf32>
+  %5 = tensor.expand_shape %4 [[0, 1]] output_shape [%1, %2] : tensor<?xf32> into tensor<?x?xf32>
+  return %5 : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @fold_expand_into_loads_fully_dynamic()
+//   CHECK-DAG:   %[[CONST0:.+]] = hal.interface.constant.load {{.*}} ordinal(1)
+//   CHECK-DAG:   %[[CONST1:.+]] = hal.interface.constant.load {{.*}} ordinal(2)
+//       CHECK:   %[[SUBSPAN:.+]] = hal.interface.binding.subspan
+//  CHECK-SAME:       !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32>>{%[[CONST0]], %[[CONST1]]}
+//       CHECK:   %[[LOAD:.+]] = iree_tensor_ext.dispatch.tensor.load %[[SUBSPAN]]
+//  CHECK-SAME:       offsets = [0, 0], sizes = [%[[CONST0]], %[[CONST1]]]
+//  CHECK-SAME:       !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32>>{%[[CONST0]], %[[CONST1]]}
+
+// -----
+
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+    #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">], flags = Indirect>
+func.func @no_fold_expand_into_loads_fully_dynamic() -> tensor<?x?xindex> {
+  %c0 = arith.constant 0 : index
+  %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
+  %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
+  %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
+      flags("ReadOnly|Indirect") : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?xindex>>{%0}
+  %3 = iree_tensor_ext.dispatch.tensor.load %2, offsets = [0], sizes = [%0], strides = [1]
+      : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?xindex>>{%0} -> tensor<?xindex>
+  %4 = tensor.extract %3[%c0] : tensor<?xindex>
+  %5 = tensor.expand_shape %3 [[0, 1]] output_shape [%1, %4] : tensor<?xindex> into tensor<?x?xindex>
+  return %5 : tensor<?x?xindex>
+}
+// This case cannot be folded because expanded sizes depend on the tensor itself.
+// So, the size cannot be known before the load.
+
+// CHECK-LABEL: func @no_fold_expand_into_loads_fully_dynamic()
+//       CHECK:   tensor.expand_shape
+
+
 // -----
 
 #pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [