add scf type conversion util

chencha3 · chencha3 · commit ab448a34294b · 2025-05-13T18:45:16.000Z
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -65,6 +65,11 @@ std::string getLayoutName(OpOperand &opr);
 /// Retrieves the name for the LayoutAttr associated with a given OpResult.
 std::string getLayoutName(OpResult res);
 
+/// Do type conversion for SCF structural ops, e.g., scf.for. Since VectorType
+/// cannot carry the layout attribute, they are converted into RankedTensorType
+/// first, which will convert back to VectorType in the second round.
+void doSCFStructuralTypeConversionWithTensorType(Operation *op);
+
 } // namespace xegpu
 
 } // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUInstructionlize.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUInstructionlize.cpp
@@ -38,21 +38,33 @@ class XeGPUInstructionlizePass final
   void runOnOperation() override;
 
 private:
-  SmallVector<int64_t> getTileShape(TypedValue<ShapedType> value) const;
+  // Get the tile shape for a given value. If the value has a layout
+  // attribute and it is an SG layout, return the inst_data as the tile shape
+  // if inst_data is available; otherwise, return the original shape of the
+  // value. If the value does not have an SG layout, return std::nullopt.
+  std::optional<SmallVector<int64_t>>
+  getTileShape(TypedValue<ShapedType> value) const;
+
+  // Get the tile shape for a given operation.
   std::optional<SmallVector<int64_t>> getTileShape(Operation *op) const;
+
+  // Determine if the operation requires unrolling. Return false if all operands
+  // and results have tile shapes identical to their original types. Otherwise,
+  // return true.
   bool needsUnroll(Operation *op) const;
 };
 } // namespace
 
-SmallVector<int64_t>
+std::optional<SmallVector<int64_t>>
 XeGPUInstructionlizePass::getTileShape(TypedValue<ShapedType> value) const {
   assert(value && "value must be non-null");
   xegpu::LayoutAttr layout = xegpu::getLayoutAttr(value);
   if (layout && layout.isSgLayout()) {
     if (auto inst_data = layout.getInstData())
       return llvm::to_vector_of<int64_t>(inst_data.asArrayRef());
+    return llvm::to_vector(value.getType().getShape());
   }
-  return llvm::to_vector(value.getType().getShape());
+  return std::nullopt;
 }
 
 std::optional<SmallVector<int64_t>>
@@ -67,38 +79,39 @@ XeGPUInstructionlizePass::getTileShape(Operation *op) const {
   if (isa<xegpu::DpasOp>(op)) {
     auto a = cast<TypedValue<ShapedType>>(op->getOperand(0));
     auto b = cast<TypedValue<ShapedType>>(op->getOperand(1));
-    SmallVector<int64_t> aTileShape = getTileShape(a);
-    SmallVector<int64_t> bTileShape = getTileShape(b);
+    std::optional<SmallVector<int64_t>> aTile = getTileShape(a);
+    std::optional<SmallVector<int64_t>> bTile = getTileShape(b);
 
-    if (aTileShape.size() != 2 || bTileShape.size() != 2)
+    if (!aTile || aTile->size() != 2 || !bTile || bTile->size() != 2)
       return std::nullopt;
 
     // semantic check for A and B
-    if (aTileShape[1] != bTileShape[0])
+    if ((*aTile)[1] != (*bTile)[0])
       return std::nullopt;
 
     // semantic check for C
     if (op->getNumOperands() == 3) {
       auto c = cast<TypedValue<ShapedType>>(op->getOperand(2));
-      SmallVector<int64_t> cTileShape = getTileShape(c);
-      int64_t expectedShape[2] = {aTileShape[0], bTileShape[1]};
-      if (!llvm::equal(cTileShape, expectedShape))
+      std::optional<SmallVector<int64_t>> cTile = getTileShape(c);
+      int64_t expectedCTile[2] = {(*aTile)[0], (*bTile)[1]};
+      if (!cTile || !llvm::equal(*cTile, expectedCTile))
         return std::nullopt;
     }
 
-    return SmallVector<int64_t>({aTileShape[0], aTileShape[1], bTileShape[1]});
+    return SmallVector<int64_t>({(*aTile)[0], (*aTile)[1], (*bTile)[1]});
   }
   return std::nullopt;
 }
 
 bool XeGPUInstructionlizePass::needsUnroll(Operation *op) const {
   for (Value opr : op->getOperands()) {
     if (auto value = dyn_cast<TypedValue<ShapedType>>(opr)) {
-      auto tileShape = getTileShape(value);
+      std::optional<SmallVector<int64_t>> tileShape = getTileShape(value);
       // the tile should have the same rank as the origial type
-      if (tileShape.size() != static_cast<size_t>(value.getType().getRank()))
+      if (!tileShape ||
+          tileShape->size() != static_cast<size_t>(value.getType().getRank()))
         return false;
-      if (!llvm::equal(tileShape, value.getType().getShape()))
+      if (!llvm::equal(*tileShape, value.getType().getShape()))
         return true;
     }
   }
diff --git a/mlir/lib/Dialect/XeGPU/Utils/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/Utils/CMakeLists.txt
@@ -6,5 +6,6 @@ add_mlir_dialect_library(MLIRXeGPUUtils
 
   LINK_LIBS PUBLIC
   MLIRIR
+  MLIRSCFTransforms
   MLIRXeGPUDialect
   )
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -11,9 +11,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
+#include "mlir/Dialect/SCF/Transforms/Patterns.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Transforms/DialectConversion.h"
 #include "llvm/Support/FormatVariadic.h"
 #include <cstdint>
 #include <numeric>
@@ -127,3 +130,182 @@ std::string xegpu::getLayoutName(OpResult res) {
   const StringRef prefix = "layout_result_";
   return llvm::formatv("{0}{1}", prefix, res.getResultNumber()).str();
 }
+
+void xegpu::doSCFStructuralTypeConversionWithTensorType(Operation *op) {
+  MLIRContext *context = op->getContext();
+
+  auto materializeCast = [&](OpBuilder &builder, Type type, ValueRange inputs,
+                             Location loc) -> Value {
+    return builder.create<UnrealizedConversionCastOp>(loc, type, inputs)
+        .getResult(0);
+  };
+
+  { // convert VectorType to RankedTensorType for SCF Structural ops
+    TypeConverter converter;
+    converter.addConversion([&](Type type) -> Type { return type; });
+    converter.addConversion([&](VectorType type) -> Type {
+      return RankedTensorType::get(type.getShape(), type.getElementType());
+    });
+    converter.addSourceMaterialization(materializeCast);
+    converter.addTargetMaterialization(materializeCast);
+
+    mlir::ConversionTarget target(*context);
+    target.addLegalOp<UnrealizedConversionCastOp>();
+
+    mlir::RewritePatternSet patterns(context);
+    scf::populateSCFStructuralTypeConversionsAndLegality(converter, patterns,
+                                                         target);
+    (void)mlir::applyPartialConversion(op, target, std::move(patterns));
+  }
+
+  { // propagate the layout attribute to RankedTensorType by checking
+    // BuiltInUnrealizedCastOps
+    // for VectorType to RankedTensorType cast.
+    op->walk([&](UnrealizedConversionCastOp castOp) {
+      if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
+        return WalkResult::skip();
+
+      Value input = castOp.getInputs()[0];
+      Value result = castOp.getResults()[0];
+      auto inputTy = dyn_cast<VectorType>(input.getType());
+      auto resultTy = dyn_cast<RankedTensorType>(result.getType());
+
+      // Only look at ops casting from VectorType to RankedTensorType
+      if (!isa<VectorType>(inputTy) || !isa<RankedTensorType>(resultTy))
+        return WalkResult::skip();
+
+      xegpu::LayoutAttr layout = xegpu::getLayoutAttr(input);
+      if (!layout)
+        return WalkResult::skip();
+
+      RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
+      result.setType(newTy);
+
+      // update the arguments if user is a LoopLike op.
+      for (OpOperand &use : result.getUses()) {
+        if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
+          BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
+          arg.setType(newTy);
+        }
+        // whileOp has two regions, the BlockArgument of the after region
+        // is not exposed by LoopLikeOpInterface
+        if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
+          unsigned idx = use.getOperandNumber();
+          BlockArgument arg = whileOp.getAfterArguments()[idx];
+          arg.setType(newTy);
+        }
+      }
+      return WalkResult::advance();
+    });
+
+    // using yieldOp as anchor to update the result type of its ParentOp
+    op->walk([&](scf::YieldOp yieldOp) {
+      Operation *parentOp = yieldOp->getParentOp();
+      for (OpResult r : parentOp->getOpResults()) {
+        unsigned idx = r.getResultNumber();
+        Type resultTy = r.getType();
+        Type yieldTy = yieldOp.getResults()[idx].getType();
+        if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
+          r.setType(yieldTy);
+      }
+    });
+  }
+
+  { // perform the conversion from RankedTensorType to VectorType based on the
+    // LayoutAttr
+
+    auto computeTileShapeAndCount = [&](ArrayRef<int64_t> shape,
+                                        DenseI32ArrayAttr sgDataAttr,
+                                        DenseI32ArrayAttr sgLayoutAttr) {
+      SmallVector<int64_t> tileShape;
+      auto sgLayout = llvm::to_vector_of<int64_t>(sgLayoutAttr.asArrayRef());
+      if (sgDataAttr)
+        tileShape = llvm::to_vector_of<int64_t>(sgDataAttr.asArrayRef());
+      else
+        tileShape = computeShapeRatio(shape, sgLayout).value_or(tileShape);
+      assert(tileShape.size() && "failed to compute tileShape");
+      SmallVector<int64_t> distUnit =
+          computeElementwiseMul(sgLayout, tileShape);
+      int count = computeProduct(shape) / computeProduct(distUnit);
+      return std::make_pair(tileShape, count);
+    };
+
+    TypeConverter converter;
+    converter.addConversion([&](Type type) -> Type { return type; });
+    converter.addConversion(
+        [&](RankedTensorType type,
+            SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
+          ArrayRef<int64_t> shape = type.getShape();
+          auto encoding = type.getEncoding();
+          Type elemTy = type.getElementType();
+
+          // init count and subShape to the default value. If the LayoutAttr
+          // is not present, it will return a VectorType with original shape.
+          int count = 1;
+          SmallVector<int64_t> subShape(shape);
+
+          if (auto layout =
+                  llvm::dyn_cast_if_present<xegpu::LayoutAttr>(encoding)) {
+            if (layout.isWgLayout()) {
+              // for WgToSg, the subShape is either from sgData or computed as
+              // shape/sgLayout
+              std::tie(subShape, count) = computeTileShapeAndCount(
+                  shape, layout.getSgData(), layout.getSgLayout());
+            } else if (DenseI32ArrayAttr instData = layout.getInstData()) {
+              // for unrolling, the subShape is determined by inst_data
+              subShape = llvm::to_vector_of<int64_t>(instData.asArrayRef());
+              count = computeProduct(shape) / computeProduct(subShape);
+            }
+          }
+          auto newTy = VectorType::get(subShape, elemTy);
+          result.append(count, newTy);
+          return success();
+        });
+
+    converter.addConversion(
+        [&](xegpu::TensorDescType type,
+            SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
+          MLIRContext *ctx = type.getContext();
+          Type elemTy = type.getElementType();
+          Attribute encoding = type.getEncoding();
+          ArrayRef<int64_t> shape = type.getShape();
+
+          // init count and newTy to the default value. If the layout attribute
+          // is not present, it will return the original type.
+          int count = 1;
+          Type newTy = type;
+
+          if (xegpu::LayoutAttr layout = type.getLayoutAttr()) {
+            SmallVector<int64_t> subShape, distUnit;
+            if (layout.isWgLayout()) {
+              // for WgToSg, the subShape is either from sgData or computed as
+              // shape/sgLayout
+              std::tie(subShape, count) = computeTileShapeAndCount(
+                  shape, layout.getSgData(), layout.getSgLayout());
+              layout = layout.dropSgLayoutAndData();
+            } else if (DenseI32ArrayAttr instData = layout.getInstData()) {
+              // for unrolling, the subShape is determined by inst_data
+              subShape = llvm::to_vector_of<int64_t>(instData.asArrayRef());
+              count = computeProduct(shape) / computeProduct(subShape);
+              layout = layout.dropInstData();
+            }
+            newTy = xegpu::TensorDescType::get(ctx, subShape, elemTy, encoding,
+                                               layout);
+          }
+
+          result.append(count, newTy);
+          return success();
+        });
+
+    converter.addSourceMaterialization(materializeCast);
+    converter.addTargetMaterialization(materializeCast);
+
+    mlir::ConversionTarget target(*context);
+    target.addLegalOp<UnrealizedConversionCastOp>();
+
+    mlir::RewritePatternSet patterns(context);
+    scf::populateSCFStructuralTypeConversionsAndLegality(converter, patterns,
+                                                         target);
+    (void)mlir::applyPartialConversion(op, target, std::move(patterns));
+  }
+}

Original file line number	Diff line number	Diff line change
`@@ -6,5 +6,6 @@ add_mlir_dialect_library(MLIRXeGPUUtils`
`6`	`6`
`7`	`7`	`LINK_LIBS PUBLIC`
`8`	`8`	`MLIRIR`
	`9`	`+ MLIRSCFTransforms`
`9`	`10`	`MLIRXeGPUDialect`
`10`	`11`	`)`