New things.

lialan · lialan · commit f549e4fbc621 · 2025-09-17T09:30:55.000-04:00
diff --git a/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp
@@ -21,6 +21,7 @@
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/Attributes.h"
+#include "mlir/IR/DialectResourceBlobManager.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/OpDefinition.h"
@@ -95,6 +96,99 @@ static bool checkLayout(Value val) {
          isa<StridedLayoutAttr>(type.getLayout());
 }
 
+/// Produce an OpFoldResult representing the product of the values or constants
+/// referenced by `indices`. `staticShape` provides the statically known sizes
+/// for the source memref, while `values` contains the mixed (value/attribute)
+/// representation produced by `memref.extract_strided_metadata`.
+static OpFoldResult getProductOfValues(ArrayRef<int64_t> indices,
+                                       OpBuilder &builder, Location loc,
+                                       ArrayRef<int64_t> staticShape,
+                                       ArrayRef<OpFoldResult> values) {
+  AffineExpr product = builder.getAffineConstantExpr(1);
+  SmallVector<OpFoldResult> inputs;
+  unsigned numSymbols = 0;
+  for (int64_t idx : indices) {
+    product = product * builder.getAffineSymbolExpr(numSymbols++);
+    if (ShapedType::isDynamic(staticShape[idx]))
+      inputs.push_back(values[idx]);
+    else
+      inputs.push_back(builder.getIndexAttr(staticShape[idx]));
+  }
+  return affine::makeComposedFoldedAffineApply(builder, loc, product, inputs);
+}
+
+/// Return the collapsed size (as OpFoldResult) for the reassociation group
+/// `groupId` of `collapseShapeOp`.
+static SmallVector<OpFoldResult>
+getCollapsedSize(memref::CollapseShapeOp collapseShapeOp, OpBuilder &builder,
+                 ArrayRef<OpFoldResult> origSizes, unsigned groupId) {
+  SmallVector<OpFoldResult> collapsedSize;
+
+  MemRefType resultType = collapseShapeOp.getResultType();
+  int64_t dimSize = resultType.getDimSize(groupId);
+  if (!ShapedType::isDynamic(dimSize)) {
+    collapsedSize.push_back(builder.getIndexAttr(dimSize));
+    return collapsedSize;
+  }
+
+  auto sourceType = collapseShapeOp.getSrcType();
+  ArrayRef<int64_t> staticShape = sourceType.getShape();
+  ArrayRef<int64_t> reassocGroup =
+      collapseShapeOp.getReassociationIndices()[groupId];
+
+  collapsedSize.push_back(getProductOfValues(reassocGroup, builder,
+                                             collapseShapeOp.getLoc(),
+                                             staticShape, origSizes));
+  return collapsedSize;
+}
+
+/// Return the collapsed stride (as OpFoldResult) for the reassociation group
+/// `groupId` of `collapseShapeOp`.
+static SmallVector<OpFoldResult> getCollapsedStride(
+    memref::CollapseShapeOp collapseShapeOp, OpBuilder &builder,
+    ArrayRef<OpFoldResult> origSizes, ArrayRef<OpFoldResult> origStrides,
+    unsigned groupId) {
+  ArrayRef<int64_t> reassocGroup =
+      collapseShapeOp.getReassociationIndices()[groupId];
+  assert(!reassocGroup.empty() &&
+         "reassociation group must contain at least one dimension");
+
+  auto sourceType = collapseShapeOp.getSrcType();
+  auto [strides, offset] = sourceType.getStridesAndOffset();
+  (void)offset;
+  ArrayRef<int64_t> srcShape = sourceType.getShape();
+
+  OpFoldResult lastValidStride = nullptr;
+  for (int64_t dim : reassocGroup) {
+    if (srcShape[dim] == 1)
+      continue;
+    int64_t currentStride = strides[dim];
+    if (ShapedType::isDynamic(currentStride))
+      lastValidStride = origStrides[dim];
+    else
+      lastValidStride = builder.getIndexAttr(currentStride);
+  }
+
+  if (!lastValidStride) {
+    MemRefType collapsedType = collapseShapeOp.getResultType();
+    auto [collapsedStrides, collapsedOffset] =
+        collapsedType.getStridesAndOffset();
+    (void)collapsedOffset;
+    int64_t finalStride = collapsedStrides[groupId];
+    if (ShapedType::isDynamic(finalStride)) {
+      for (int64_t dim : reassocGroup) {
+        assert(srcShape[dim] == 1 && "expected size-one dimensions");
+        if (ShapedType::isDynamic(strides[dim]))
+          return {origStrides[dim]};
+      }
+      llvm_unreachable("expected to find a dynamic stride");
+    }
+    return {builder.getIndexAttr(finalStride)};
+  }
+
+  return {lastValidStride};
+}
+
 namespace {
 static Value getTargetMemref(Operation *op) {
   return llvm::TypeSwitch<Operation *, Value>(op)
@@ -256,6 +350,82 @@ struct MemRefRewritePattern : public OpRewritePattern<T> {
   }
 };
 
+/// Flattens memref global ops with more than 1 dimensions to 1 dimension.
+struct FlattenGlobal final : public OpRewritePattern<memref::GlobalOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  static Attribute flattenAttribute(Attribute value, ShapedType newType) {
+    if (!value)
+      return value;
+    if (auto splatAttr = llvm::dyn_cast<SplatElementsAttr>(value)) {
+      return splatAttr.reshape(newType);
+    } else if (auto denseAttr = llvm::dyn_cast<DenseElementsAttr>(value)) {
+      return denseAttr.reshape(newType);
+    } else if (auto denseResourceAttr =
+                   llvm::dyn_cast<DenseResourceElementsAttr>(value)) {
+      return DenseResourceElementsAttr::get(newType,
+                                            denseResourceAttr.getRawHandle());
+    }
+    return {};
+  }
+
+  LogicalResult
+  matchAndRewrite(memref::GlobalOp globalOp,
+                  PatternRewriter &rewriter) const override {
+    auto oldType = llvm::dyn_cast<MemRefType>(globalOp.getType());
+    if (!oldType || !oldType.getLayout().isIdentity() || oldType.getRank() <= 1)
+      return failure();
+
+    auto tensorType = RankedTensorType::get({oldType.getNumElements()},
+                                            oldType.getElementType());
+    auto memRefType =
+        MemRefType::get({oldType.getNumElements()}, oldType.getElementType(),
+                        AffineMap(), oldType.getMemorySpace());
+    auto newInitialValue =
+        flattenAttribute(globalOp.getInitialValueAttr(), tensorType);
+    rewriter.replaceOpWithNewOp<memref::GlobalOp>(
+        globalOp, globalOp.getSymName(), globalOp.getSymVisibilityAttr(),
+        memRefType, newInitialValue, globalOp.getConstant(),
+        /*alignment=*/IntegerAttr());
+    return success();
+  }
+};
+
+struct FlattenCollapseShape final
+    : public OpRewritePattern<memref::CollapseShapeOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::CollapseShapeOp op,
+                                PatternRewriter &rewriter) const override {
+    Location loc = op.getLoc();
+    memref::ExtractStridedMetadataOp metadata =
+        memref::ExtractStridedMetadataOp::create(rewriter, loc, op.getSrc());
+
+    SmallVector<OpFoldResult> origSizes = metadata.getConstifiedMixedSizes();
+    SmallVector<OpFoldResult> origStrides = metadata.getConstifiedMixedStrides();
+    OpFoldResult offset = metadata.getConstifiedMixedOffset();
+
+    SmallVector<OpFoldResult> collapsedSizes;
+    SmallVector<OpFoldResult> collapsedStrides;
+    unsigned numGroups = op.getReassociationIndices().size();
+    collapsedSizes.reserve(numGroups);
+    collapsedStrides.reserve(numGroups);
+    for (unsigned i = 0; i < numGroups; ++i) {
+      SmallVector<OpFoldResult> groupSizes =
+          getCollapsedSize(op, rewriter, origSizes, i);
+      SmallVector<OpFoldResult> groupStrides =
+          getCollapsedStride(op, rewriter, origSizes, origStrides, i);
+      collapsedSizes.append(groupSizes.begin(), groupSizes.end());
+      collapsedStrides.append(groupStrides.begin(), groupStrides.end());
+    }
+
+    rewriter.replaceOpWithNewOp<memref::ReinterpretCastOp>(
+        op, op.getType(), op.getSrc(), offset, collapsedSizes,
+        collapsedStrides);
+    return success();
+  }
+};
+
 struct FlattenMemrefsPass
     : public mlir::memref::impl::FlattenMemrefsPassBase<FlattenMemrefsPass> {
   using Base::Base;
@@ -288,12 +458,52 @@ void memref::populateFlattenVectorOpsOnMemrefPatterns(
       patterns.getContext());
 }
 
+/// Special pattern for GetGlobalOp to avoid infinite loops
+struct FlattenGetGlobal : public OpRewritePattern<memref::GetGlobalOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::GetGlobalOp op,
+                               PatternRewriter &rewriter) const override {
+    // Check if this get_global references a multi-dimensional global
+    auto module = op->template getParentOfType<ModuleOp>();
+    auto globalOp = module.template lookupSymbol<memref::GlobalOp>(op.getName());
+    if (!globalOp) {
+      return failure();
+    }
+
+    auto globalType = globalOp.getType();
+    auto resultType = op.getType();
+
+    // Only apply if the global has been flattened but the get_global hasn't
+    if (globalType.getRank() == 1 && resultType.getRank() > 1) {
+      auto newGetGlobal = memref::GetGlobalOp::create(
+          rewriter, op.getLoc(), globalType, op.getName());
+
+      // Cast the flattened result back to the original shape
+      memref::ExtractStridedMetadataOp stridedMetadata =
+          memref::ExtractStridedMetadataOp::create(rewriter, op.getLoc(), op.getResult());
+      auto castResult = memref::ReinterpretCastOp::create(
+          rewriter, op.getLoc(), resultType, newGetGlobal,
+          /*offset=*/rewriter.getIndexAttr(0),
+          stridedMetadata.getConstifiedMixedSizes(),
+          stridedMetadata.getConstifiedMixedStrides());
+      rewriter.replaceOp(op, castResult);
+      return success();
+    }
+
+    return failure();
+  }
+};
+
 void memref::populateFlattenMemrefOpsPatterns(RewritePatternSet &patterns) {
   patterns.insert<MemRefRewritePattern<memref::LoadOp>,
                   MemRefRewritePattern<memref::StoreOp>,
                   MemRefRewritePattern<memref::AllocOp>,
                   MemRefRewritePattern<memref::AllocaOp>,
-                  MemRefRewritePattern<memref::DeallocOp>>(
+                  MemRefRewritePattern<memref::DeallocOp>,
+                  FlattenCollapseShape,
+                  FlattenGetGlobal,
+                  FlattenGlobal>(
       patterns.getContext());
 }
 
diff --git a/mlir/test/Dialect/MemRef/flatten_memref.mlir b/mlir/test/Dialect/MemRef/flatten_memref.mlir
@@ -194,6 +194,35 @@ func.func @mask_load_vector_from_memref_dynamic(%input: memref<3x7xi2>, %row: in
 
 // -----
 
+func.func @collapse_shape_static(%arg0: memref<2x3x4xf32>) -> memref<6x4xf32> {
+  %0 = memref.collapse_shape %arg0 [[0, 1], [2]]
+      : memref<2x3x4xf32> into memref<6x4xf32>
+  return %0 : memref<6x4xf32>
+}
+// CHECK-LABEL: func @collapse_shape_static
+// CHECK: %[[REINT:.*]] = memref.reinterpret_cast %arg0 to offset: [0], sizes: [6, 4], strides: [4, 1]
+// CHECK: return %[[REINT]]
+
+// -----
+
+func.func @collapse_shape_dynamic(
+    %arg0: memref<2x?x4xf32, strided<[?, ?, ?], offset: ?>>) ->
+    memref<?x4xf32, strided<[?, ?], offset: ?>> {
+  %0 = memref.collapse_shape %arg0 [[0, 1], [2]]
+      : memref<2x?x4xf32, strided<[?, ?, ?], offset: ?>>
+        into memref<?x4xf32, strided<[?, ?], offset: ?>>
+  return %0 : memref<?x4xf32, strided<[?, ?], offset: ?>>
+}
+// CHECK: #map = affine_map<()[s0] -> (s0 * 2)>
+// CHECK: #map1 = affine_map<()[s0, s1] -> (s0 * 8 + s1)>
+// CHECK-LABEL: func @collapse_shape_dynamic
+// CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:3, %[[STRIDES:.*]]:3 = memref.extract_strided_metadata %arg0
+// CHECK: %[[SIZE:.*]] = affine.apply #map()[%[[SIZES]]#1]
+// CHECK: %[[REINT:.*]] = memref.reinterpret_cast %arg0 to offset: [%[[OFFSET]]], sizes: [%[[SIZE]], 4], strides: [%[[STRIDES]]#1, %[[STRIDES]]#2]
+// CHECK: return %[[REINT]]
+
+// -----
+
 func.func @transfer_read_memref(%input: memref<4x8xi2>, %value: vector<8xi2>, %row: index, %col: index) -> vector<8xi2> {
    %c0 = arith.constant 0 : i2
    %0 = vector.transfer_read %input[%col, %row], %c0 {in_bounds = [true]} : memref<4x8xi2>, vector<8xi2>
@@ -336,3 +365,22 @@ func.func @dealloc_strided_memref(%input: memref<4x8xf32, strided<[8, 1], offset
 // CHECK-SAME: (%[[ARG0:.*]]: memref<4x8xf32, strided<[8, 1], offset: 100>>)
 // CHECK-NEXT: %[[REINT:.*]] = memref.reinterpret_cast %[[ARG0]] to offset: [100], sizes: [32], strides: [1] : memref<4x8xf32, strided<[8, 1], offset: 100>> to memref<32xf32, strided<[1], offset: 100>>
 // CHECK-NEXT: memref.dealloc %[[REINT]] : memref<32xf32, strided<[1], offset: 100>>
+
+// -----
+
+memref.global "private" constant @constant_3x3x1x1xf32 : memref<3x3x1x1xf32> = dense<[[[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]], [[[-2.000000e+00]], [[0.000000e+00]], [[2.000000e+00]]], [[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]]]>
+func.func @load_global_with_offset(%i0: index, %i1: index, %i2: index, %i3: index) -> f32 {
+  %global = memref.get_global @constant_3x3x1x1xf32 : memref<3x3x1x1xf32>
+  %val = memref.load %global[%i0, %i1, %i2, %i3] : memref<3x3x1x1xf32>
+  return %val: f32
+}
+
+//      CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1, s2, s3] -> (s0 * 3 + s1 + s2 + s3)>
+//      CHECK: memref.global "private" constant @constant_3x3x1x1xf32 : memref<9xf32> = dense<[-1.000000e+00, 0.000000e+00, 1.000000e+00, -2.000000e+00, 0.000000e+00, 2.000000e+00, -1.000000e+00, 0.000000e+00, 1.000000e+00]>
+//CHECK-LABEL: func.func @load_global_with_offset
+// CHECK-SAME: (%[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index, %[[I3:.+]]: index)
+//      CHECK:   %[[GLOBAL:.+]] = memref.get_global @constant_3x3x1x1xf32 : memref<9xf32>
+//      CHECK:   %[[INDEX:.+]] = affine.apply #[[$MAP]]()[%[[I0]], %[[I1]], %[[I2]], %[[I3]]]
+//      CHECK:   %[[REINTERPRET:.+]] = memref.reinterpret_cast %[[GLOBAL]] to offset: [0], sizes: [9], strides: [1] : memref<9xf32> to memref<9xf32, strided<[1]>>
+//      CHECK:   %[[LOAD:.+]] = memref.load %[[REINTERPRET]][%[[INDEX]]] : memref<9xf32, strided<[1]>>
+//      CHECK:   return %[[LOAD]]