Skip to content

Commit 53e9601

Browse files
authored
Integrate llvm-project at fe3c23b439b9a2d00442d9bc6a4ca86f73066a3d (#19287)
Still carrying a revert for 1004865f1ca41a9581da8747f34b29862d3ebc3d and a cherry pick for llvm/llvm-project#116650.
1 parent 1a7b51d commit 53e9601

File tree

4 files changed

+24
-25
lines changed

4 files changed

+24
-25
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ static void debugPrint(Operation *op, const char *message) {
3939
/// Emit shared local memory allocation in case it is needed when lowering the
4040
/// warp operations.
4141
static Value allocateGlobalSharedMemory(Location loc, OpBuilder &builder,
42-
vector::WarpExecuteOnLane0Op warpOp,
42+
gpu::WarpExecuteOnLane0Op warpOp,
4343
Type type) {
4444
MemRefType memrefType;
4545
auto addressSpaceAttr = gpu::AddressSpaceAttr::get(
@@ -83,8 +83,7 @@ static bool isUniformLoad(Operation *op) {
8383

8484
/// Hoist uniform operations as well as special hal operations that have side
8585
/// effect but are safe to move out of the warp single lane region.
86-
static void
87-
moveScalarAndBindingUniformCode(vector::WarpExecuteOnLane0Op warpOp) {
86+
static void moveScalarAndBindingUniformCode(gpu::WarpExecuteOnLane0Op warpOp) {
8887
/// Hoist ops without side effect as well as special binding ops.
8988
auto canBeHoisted = [](Operation *op,
9089
function_ref<bool(Value)> definedOutside) {
@@ -155,12 +154,12 @@ struct InsertToBroadcast final : OpRewritePattern<vector::InsertOp> {
155154
};
156155

157156
/// Pattern to sink `gpu.barrier` ops out of a `warp_execute_on_lane_0` op.
158-
struct WarpOpBarrier final : OpRewritePattern<vector::WarpExecuteOnLane0Op> {
159-
using OpRewritePattern<vector::WarpExecuteOnLane0Op>::OpRewritePattern;
157+
struct WarpOpBarrier final : OpRewritePattern<gpu::WarpExecuteOnLane0Op> {
158+
using OpRewritePattern<gpu::WarpExecuteOnLane0Op>::OpRewritePattern;
160159

161-
LogicalResult matchAndRewrite(vector::WarpExecuteOnLane0Op warpOp,
160+
LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp,
162161
PatternRewriter &rewriter) const override {
163-
auto yield = cast<vector::YieldOp>(
162+
auto yield = cast<gpu::YieldOp>(
164163
warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
165164
Operation *lastNode = yield->getPrevNode();
166165
auto barrierOp = dyn_cast_or_null<gpu::BarrierOp>(lastNode);
@@ -233,7 +232,7 @@ struct VectorReductionToGPUPass final
233232
auto threadX = builder.create<gpu::ThreadIdOp>(loc, builder.getIndexType(),
234233
gpu::Dimension::x);
235234
auto cstGroupSize = builder.create<arith::ConstantIndexOp>(loc, groupSize);
236-
auto warpOp = builder.create<vector::WarpExecuteOnLane0Op>(
235+
auto warpOp = builder.create<gpu::WarpExecuteOnLane0Op>(
237236
loc, TypeRange(), threadX.getResult(), groupSize);
238237
warpOp.getWarpRegion().takeBody(funcOp.getFunctionBody());
239238
Block &newBlock = funcOp.getFunctionBody().emplaceBlock();
@@ -243,7 +242,7 @@ struct VectorReductionToGPUPass final
243242
warpOp.getWarpRegion().getBlocks().back().back().moveBefore(&newBlock,
244243
newBlock.end());
245244
builder.setInsertionPointToEnd(&warpOp.getWarpRegion().getBlocks().back());
246-
builder.create<vector::YieldOp>(loc);
245+
builder.create<gpu::YieldOp>(loc);
247246

248247
debugPrint(funcOp, "after step #2: wrapping code with the warp execute op");
249248

@@ -300,7 +299,7 @@ struct VectorReductionToGPUPass final
300299
vector::WarpExecuteOnLane0LoweringOptions options;
301300
options.warpAllocationFn = allocateGlobalSharedMemory;
302301
options.warpSyncronizationFn = [](Location loc, OpBuilder &builder,
303-
vector::WarpExecuteOnLane0Op warpOp) {
302+
gpu::WarpExecuteOnLane0Op warpOp) {
304303
builder.create<gpu::BarrierOp>(loc);
305304
};
306305
vector::populateWarpExecuteOnLane0OpToScfForPattern(patterns, options);

compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ void transform_dialect::VectorToWarpExecuteOnLane0Op::build(
153153
// SCCP.
154154
static LogicalResult
155155
replaceAllUsesOfLaneWithin(RewriterBase &b,
156-
vector::WarpExecuteOnLane0Op executeOp) {
156+
gpu::WarpExecuteOnLane0Op executeOp) {
157157
OpBuilder::InsertionGuard g(b);
158158
b.setInsertionPoint(executeOp);
159159
Value zero = b.create<arith::ConstantIndexOp>(executeOp.getLoc(), 0);
@@ -225,7 +225,7 @@ static FailureOr<gpu::ThreadIdOp> isThreadIdxxZeroPredicate(scf::IfOp ifOp) {
225225
}
226226

227227
struct VectorDistributionResult {
228-
vector::WarpExecuteOnLane0Op warpOp;
228+
gpu::WarpExecuteOnLane0Op warpOp;
229229
};
230230

231231
static FailureOr<VectorDistributionResult>
@@ -257,7 +257,7 @@ rewriteScfIfAsWarpExecuteOnLane0(RewriterBase &rewriter, Location loc,
257257
rewriter.create<scf::IfOp>(loc, predicate, /*withElseRegion=*/false);
258258
rewriter.setInsertionPointToStart(&newIfOp.getThenRegion().front());
259259
}
260-
auto warpOp = rewriter.create<vector::WarpExecuteOnLane0Op>(
260+
auto warpOp = rewriter.create<gpu::WarpExecuteOnLane0Op>(
261261
loc, TypeRange(), threadIdxx, warpSize);
262262

263263
// Move the code from the previous ifOp to the
@@ -270,7 +270,7 @@ rewriteScfIfAsWarpExecuteOnLane0(RewriterBase &rewriter, Location loc,
270270
sourceBlock.without_terminator().begin(),
271271
sourceBlock.without_terminator().end());
272272
rewriter.setInsertionPointToEnd(&targetBlock);
273-
rewriter.create<vector::YieldOp>(loc);
273+
rewriter.create<gpu::YieldOp>(loc);
274274

275275
// Erase old op.
276276
rewriter.eraseOp(ifOp);
@@ -358,7 +358,7 @@ void transform_dialect::VectorWarpDistributionOp::getEffects(
358358
/// Emit shared local memory allocation in case it is needed when lowering the
359359
/// warp operations.
360360
static Value allocateGlobalSharedMemory(Location loc, OpBuilder &builder,
361-
vector::WarpExecuteOnLane0Op warpOp,
361+
gpu::WarpExecuteOnLane0Op warpOp,
362362
Type type) {
363363
MemRefType memrefType;
364364
auto addressSpaceAttr = gpu::AddressSpaceAttr::get(
@@ -374,11 +374,11 @@ static Value allocateGlobalSharedMemory(Location loc, OpBuilder &builder,
374374
return builder.create<memref::AllocOp>(loc, memrefType);
375375
}
376376

377-
/// Return a value yielded by `warpOp` which statifies the filter lamdba
377+
/// Return a value yielded by `warpOp` which satisfies the filter lambda
378378
/// condition and is not dead.
379-
static OpOperand *getWarpResult(vector::WarpExecuteOnLane0Op warpOp,
379+
static OpOperand *getWarpResult(gpu::WarpExecuteOnLane0Op warpOp,
380380
function_ref<bool(Operation *)> fn) {
381-
auto yield = cast<vector::YieldOp>(
381+
auto yield = cast<gpu::YieldOp>(
382382
warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
383383
for (OpOperand &yieldOperand : yield->getOpOperands()) {
384384
Value yieldValues = yieldOperand.get();
@@ -426,9 +426,9 @@ class InsertElementToBroadcast final
426426
/// }
427427
/// gpu.synchronize
428428
/// %0 = memref.load %src[%c0] : memref<1024xf32>
429-
struct WarpOpLoad : public OpRewritePattern<vector::WarpExecuteOnLane0Op> {
430-
using OpRewritePattern<vector::WarpExecuteOnLane0Op>::OpRewritePattern;
431-
LogicalResult matchAndRewrite(vector::WarpExecuteOnLane0Op warpOp,
429+
struct WarpOpLoad : public OpRewritePattern<gpu::WarpExecuteOnLane0Op> {
430+
using OpRewritePattern<gpu::WarpExecuteOnLane0Op>::OpRewritePattern;
431+
LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp,
432432
PatternRewriter &rewriter) const override {
433433
OpOperand *operand = getWarpResult(warpOp, llvm::IsaPred<memref::LoadOp>);
434434
if (!operand)
@@ -476,7 +476,7 @@ struct HoistSharedMemoryAlloc : public OpRewritePattern<memref::AllocOp> {
476476
PatternRewriter &rewriter) const override {
477477
if (!iree_compiler::hasSharedMemoryAddressSpace(alloc.getType()))
478478
return failure();
479-
auto warpParent = alloc->getParentOfType<vector::WarpExecuteOnLane0Op>();
479+
auto warpParent = alloc->getParentOfType<gpu::WarpExecuteOnLane0Op>();
480480
if (!warpParent)
481481
return failure();
482482
alloc->moveBefore(warpParent);
@@ -561,7 +561,7 @@ static void populatePropagateVectorDistribution(Operation *target,
561561
}
562562

563563
static void warpSyncronizationFn(Location loc, OpBuilder &builder,
564-
vector::WarpExecuteOnLane0Op warpOp) {
564+
gpu::WarpExecuteOnLane0Op warpOp) {
565565
builder.create<gpu::BarrierOp>(loc);
566566
};
567567

compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func.func @reduce_dispatch_0() attributes {translation_info = #translation_info}
2424
// WARP-EXECUTE: %[[COND32:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
2525
// Single-warp guard filters out threads 32-63.
2626
// WARP-EXECUTE: scf.if %[[COND32]] {
27-
// WARP-EXECUTE: vector.warp_execute_on_lane_0(%[[TIDX]])[32] {
27+
// WARP-EXECUTE: gpu.warp_execute_on_lane_0(%[[TIDX]])[32] {
2828
// WARP-EXECUTE: %[[V:.*]] = "some_def"() : () -> vector<128xf32>
2929
// WARP-EXECUTE: vector.transfer_write %[[V]], %{{.*}} {in_bounds = [true]} : vector<128xf32>, memref<128xf32>
3030

third_party/llvm-project

Submodule llvm-project updated 1295 files

0 commit comments

Comments
 (0)