rename genIndexAdd

chencha3 · chencha3 · commit 4f93bcbc6f82 · 2025-08-20T22:14:08.000Z
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -10,6 +10,7 @@
 #define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
 
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/OpDefinition.h"
 namespace mlir {
 
 class VectorType;
@@ -128,6 +129,20 @@ void doSCFStructuralTypeConversionWithTensorType(Operation *op,
 /// if no GPU module parent or XeVM target attribute exists.
 std::optional<std::string> getChipStr(Operation *op);
 
+/// Generates element-wise addition ops of two arrays with automatic alignment.
+/// When the input arrays have different sizes, the shorter array is
+/// right-aligned with the longer array, and the unmatched leading elements from
+/// the longer array are preserved unchanged. This is commonly used for offset
+/// computation where higher-dimensional offsets need to be added to
+/// lower-dimensional adjustments.
+///
+/// Example:
+///   lhs = [l1, l2, l3], rhs = [r1, r2]
+///   Result: [11, l2+r1, l3+r2]
+SmallVector<OpFoldResult> addWithRightAligned(OpBuilder &builder, Location loc,
+                                              ArrayRef<OpFoldResult> lhs,
+                                              ArrayRef<OpFoldResult> rhs);
+
 } // namespace xegpu
 
 } // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -76,32 +76,6 @@ getSgShapeAndCount(ArrayRef<int64_t> shape,
   return std::make_pair(sgShape, count);
 }
 
-/// Generates element-wise addition ops of two arrays with automatic alignment.
-/// When the input arrays have different sizes, the shorter array is
-/// right-aligned with the longer array, and the unmatched leading elements from
-/// the longer array are preserved unchanged. This is commonly used for offset
-/// computation where higher-dimensional offsets need to be added to
-/// lower-dimensional adjustments.
-///
-/// Example:
-///   lhs = [10, 20, 30], rhs = [5, 7]
-///   Result: [10, 25, 37] (20+5, 30+7, with 10 preserved)
-static SmallVector<OpFoldResult>
-genIndexAdds(ConversionPatternRewriter &rewriter, Location loc,
-             ArrayRef<OpFoldResult> lhs, ArrayRef<OpFoldResult> rhs) {
-  // ensure a is longer than b
-  ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
-  ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
-  SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
-  a = a.slice(a.size() - b.size());
-  for (auto [l, r] : llvm::zip(a, b)) {
-    auto lval = getValueOrCreateConstantIndexOp(rewriter, loc, l);
-    auto rval = getValueOrCreateConstantIndexOp(rewriter, loc, r);
-    results.push_back(rewriter.createOrFold<index::AddOp>(loc, lval, rval));
-  }
-  return results;
-}
-
 /// Utility helper for deriving a list of offsets for each sub-TensorDescs
 /// or sub-MemDescs to be accessed by current subgroup (sgId) based on the
 /// associated distribute layout attribute, the shape, subgroup id and the
@@ -150,8 +124,8 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op,
   // or sub-memory descriptor.
   // SmallVector<SmallVector<OpFoldResult>> offsetsList;
   for (const auto &sgOffsets : *maybeDescOffsets) {
-    SmallVector<OpFoldResult> newOffsets =
-        genIndexAdds(rewriter, loc, getAsOpFoldResult(sgOffsets), origOffsets);
+    SmallVector<OpFoldResult> newOffsets = xegpu::addWithRightAligned(
+        rewriter, loc, getAsOpFoldResult(sgOffsets), origOffsets);
     offsetsList.push_back(std::move(newOffsets));
   }
 
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/Index/IR/IndexOps.h"
 #include "mlir/Dialect/LLVMIR/XeVMDialect.h"
 #include "mlir/Dialect/SCF/Transforms/Patterns.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
@@ -424,3 +425,31 @@ std::optional<std::string> xegpu::getChipStr(Operation *op) {
 
   return std::nullopt;
 }
+
+/// Generates element-wise addition ops of two arrays with automatic alignment.
+/// When the input arrays have different sizes, the shorter array is
+/// right-aligned with the longer array, and the unmatched leading elements from
+/// the longer array are preserved unchanged. This is commonly used for offset
+/// computation where higher-dimensional offsets need to be added to
+/// lower-dimensional adjustments.
+///
+/// Example:
+///   lhs = [l1, l2, l3], rhs = [r1, r2]
+///   Result: [11, l2+r1, l3+r2]
+SmallVector<OpFoldResult>
+xegpu::addWithRightAligned(OpBuilder &builder, Location loc,
+                           ArrayRef<OpFoldResult> lhs,
+                           ArrayRef<OpFoldResult> rhs) {
+  // ensure a is longer than b
+  ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
+  ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
+  SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
+  a = a.slice(a.size() - b.size());
+  for (auto [l, r] : llvm::zip(a, b)) {
+    auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
+    auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
+    results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));
+  }
+  return results;
+  return {};
+}