Skip to content

Commit 4f93bcb

Browse files
committed
rename genIndexAdd
1 parent 69ff3ca commit 4f93bcb

File tree

3 files changed

+46
-28
lines changed

3 files changed

+46
-28
lines changed

mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
1111

1212
#include "mlir/IR/BuiltinTypes.h"
13+
#include "mlir/IR/OpDefinition.h"
1314
namespace mlir {
1415

1516
class VectorType;
@@ -128,6 +129,20 @@ void doSCFStructuralTypeConversionWithTensorType(Operation *op,
128129
/// if no GPU module parent or XeVM target attribute exists.
129130
std::optional<std::string> getChipStr(Operation *op);
130131

132+
/// Generates element-wise addition ops of two arrays with automatic alignment.
133+
/// When the input arrays have different sizes, the shorter array is
134+
/// right-aligned with the longer array, and the unmatched leading elements from
135+
/// the longer array are preserved unchanged. This is commonly used for offset
136+
/// computation where higher-dimensional offsets need to be added to
137+
/// lower-dimensional adjustments.
138+
///
139+
/// Example:
140+
/// lhs = [l1, l2, l3], rhs = [r1, r2]
141+
/// Result: [11, l2+r1, l3+r2]
142+
SmallVector<OpFoldResult> addWithRightAligned(OpBuilder &builder, Location loc,
143+
ArrayRef<OpFoldResult> lhs,
144+
ArrayRef<OpFoldResult> rhs);
145+
131146
} // namespace xegpu
132147

133148
} // namespace mlir

mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -76,32 +76,6 @@ getSgShapeAndCount(ArrayRef<int64_t> shape,
7676
return std::make_pair(sgShape, count);
7777
}
7878

79-
/// Generates element-wise addition ops of two arrays with automatic alignment.
80-
/// When the input arrays have different sizes, the shorter array is
81-
/// right-aligned with the longer array, and the unmatched leading elements from
82-
/// the longer array are preserved unchanged. This is commonly used for offset
83-
/// computation where higher-dimensional offsets need to be added to
84-
/// lower-dimensional adjustments.
85-
///
86-
/// Example:
87-
/// lhs = [10, 20, 30], rhs = [5, 7]
88-
/// Result: [10, 25, 37] (20+5, 30+7, with 10 preserved)
89-
static SmallVector<OpFoldResult>
90-
genIndexAdds(ConversionPatternRewriter &rewriter, Location loc,
91-
ArrayRef<OpFoldResult> lhs, ArrayRef<OpFoldResult> rhs) {
92-
// ensure a is longer than b
93-
ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
94-
ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
95-
SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
96-
a = a.slice(a.size() - b.size());
97-
for (auto [l, r] : llvm::zip(a, b)) {
98-
auto lval = getValueOrCreateConstantIndexOp(rewriter, loc, l);
99-
auto rval = getValueOrCreateConstantIndexOp(rewriter, loc, r);
100-
results.push_back(rewriter.createOrFold<index::AddOp>(loc, lval, rval));
101-
}
102-
return results;
103-
}
104-
10579
/// Utility helper for deriving a list of offsets for each sub-TensorDescs
10680
/// or sub-MemDescs to be accessed by current subgroup (sgId) based on the
10781
/// associated distribute layout attribute, the shape, subgroup id and the
@@ -150,8 +124,8 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op,
150124
// or sub-memory descriptor.
151125
// SmallVector<SmallVector<OpFoldResult>> offsetsList;
152126
for (const auto &sgOffsets : *maybeDescOffsets) {
153-
SmallVector<OpFoldResult> newOffsets =
154-
genIndexAdds(rewriter, loc, getAsOpFoldResult(sgOffsets), origOffsets);
127+
SmallVector<OpFoldResult> newOffsets = xegpu::addWithRightAligned(
128+
rewriter, loc, getAsOpFoldResult(sgOffsets), origOffsets);
155129
offsetsList.push_back(std::move(newOffsets));
156130
}
157131

mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
1414
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
15+
#include "mlir/Dialect/Index/IR/IndexOps.h"
1516
#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
1617
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
1718
#include "mlir/Dialect/Utils/IndexingUtils.h"
@@ -424,3 +425,31 @@ std::optional<std::string> xegpu::getChipStr(Operation *op) {
424425

425426
return std::nullopt;
426427
}
428+
429+
/// Generates element-wise addition ops of two arrays with automatic alignment.
430+
/// When the input arrays have different sizes, the shorter array is
431+
/// right-aligned with the longer array, and the unmatched leading elements from
432+
/// the longer array are preserved unchanged. This is commonly used for offset
433+
/// computation where higher-dimensional offsets need to be added to
434+
/// lower-dimensional adjustments.
435+
///
436+
/// Example:
437+
/// lhs = [l1, l2, l3], rhs = [r1, r2]
438+
/// Result: [11, l2+r1, l3+r2]
439+
SmallVector<OpFoldResult>
440+
xegpu::addWithRightAligned(OpBuilder &builder, Location loc,
441+
ArrayRef<OpFoldResult> lhs,
442+
ArrayRef<OpFoldResult> rhs) {
443+
// ensure a is longer than b
444+
ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
445+
ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
446+
SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
447+
a = a.slice(a.size() - b.size());
448+
for (auto [l, r] : llvm::zip(a, b)) {
449+
auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
450+
auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
451+
results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));
452+
}
453+
return results;
454+
return {};
455+
}

0 commit comments

Comments
 (0)