Skip to content

Commit acd8104

Browse files
antiagainstravil-mobileenjustliThomasRaoux
authored
This updates LLVM to pick up fixes * llvm/llvm-project#165692 To unblock ASAN breakages. --------- Co-authored-by: ravil-mobile <[email protected]> Co-authored-by: enjustli <[email protected]> Co-authored-by: Thomas Raoux <[email protected]>
1 parent 99f44dd commit acd8104

File tree

12 files changed

+25
-25
lines changed

12 files changed

+25
-25
lines changed

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
f6ded0be897e2878612dd903f7e8bb85448269e5
1+
49d5bb0ad0cb31410184c462801c5049ad671517

lib/Dialect/TritonGPU/IR/Ops.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -929,8 +929,9 @@ void WarpSpecializeOp::getSuccessorRegions(
929929
return;
930930
}
931931
// And the default region branches transparently back to the parent.
932-
assert(src.getRegionOrNull() == &getDefaultRegion());
933-
successors.push_back(RegionSuccessor(getResults()));
932+
assert(src.getTerminatorPredecessorOrNull()->getParentRegion() ==
933+
&getDefaultRegion());
934+
successors.push_back(RegionSuccessor(getOperation(), getResults()));
934935
}
935936

936937
LogicalResult WarpSpecializeOp::verify() {

lib/Dialect/TritonGPU/Transforms/WarpSpecialization/PartitionLoops.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ void cloneOpsInBlock(Block *block, SmallVector<WarpGroupBuilder> &builders,
338338
builder.mapping.lookupOrDefault(yieldOp.getOperand(i)));
339339
}
340340

341-
builder.create<scf::YieldOp>(op->getLoc(), newYieldOperands);
341+
scf::YieldOp::create(builder, op->getLoc(), newYieldOperands);
342342
}
343343
} else {
344344
assert(hasPartition(op));
@@ -449,7 +449,7 @@ LogicalResult triton::gpu::partitionLoop(scf::ForOp loop) {
449449
for (auto [b, region, partition] : llvm::zip(
450450
builders, wgOp.getPartitionRegions(), partitions.getPartitions())) {
451451
if (!llvm::is_contained(getPartitionIds(loop), b.partitionId)) {
452-
b.create<nvws::WarpGroupYieldOp>(wgOp.getLoc(), SmallVector<Value>{});
452+
nvws::WarpGroupYieldOp::create(b, wgOp.getLoc(), SmallVector<Value>{});
453453
continue;
454454
}
455455
auto newForOp = *region.front().getOps<scf::ForOp>().begin();

python/src/llvm.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ createTargetMachine(llvm::Module *module, std::string proc,
5757
bool disableLLVMOpt = mlir::triton::tools::getBoolEnv("DISABLE_LLVM_OPT");
5858
if (enable_fp_fusion)
5959
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
60-
opt.UnsafeFPMath = false;
6160
opt.NoInfsFPMath = false;
6261
opt.NoNaNsFPMath = true;
6362
opt.TrapUnreachable = true;

test/Conversion/tritonnvidiagpu_to_llvm.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,9 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
215215
// CHECK-LABEL: async_copy_mbarrier_arrive
216216
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.target = "cuda:90", "ttg.threads-per-warp" = 32 : i32} {
217217
tt.func public @async_copy_mbarrier_arrive(%arg0: !ttg.memdesc<1xi64, #shared, #ttg.shared_memory>) attributes { noinline = false } {
218-
// CHECK: nvvm.cp.async.mbarrier.arrive.shared %{{.*}} : !llvm.ptr<3>
218+
// CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} : !llvm.ptr<3>
219219
ttng.async_copy_mbarrier_arrive %arg0 : !ttg.memdesc<1xi64, #shared, #ttg.shared_memory>
220-
// CHECK: nvvm.cp.async.mbarrier.arrive.shared %{{.*}} {noinc = true} : !llvm.ptr<3>
220+
// CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true} : !llvm.ptr<3>
221221
ttng.async_copy_mbarrier_arrive %arg0 { noIncrement } : !ttg.memdesc<1xi64, #shared, #ttg.shared_memory>
222222
tt.return
223223
}

third_party/amd/include/Analysis/RangeAnalysis.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ struct TritonIntegerRangeAnalysis : dataflow::IntegerRangeAnalysis {
8484
/// the loop operands and all users and all users of the results of the loop.
8585
void visitRegionSuccessors(
8686
ProgramPoint *point, RegionBranchOpInterface branch,
87-
RegionBranchPoint successor,
87+
RegionSuccessor successor,
8888
ArrayRef<dataflow::AbstractSparseLattice *> abstractLattices) override;
8989

9090
/// Collect all operands that participate in assumptions (see description of

third_party/amd/lib/Analysis/RangeAnalysis.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ void TritonIntegerRangeAnalysis::initializeFuncOp(tt::FuncOp op) {
630630

631631
void TritonIntegerRangeAnalysis::visitRegionSuccessors(
632632
ProgramPoint *point, RegionBranchOpInterface branch,
633-
RegionBranchPoint successor,
633+
RegionSuccessor successor,
634634
ArrayRef<dataflow::AbstractSparseLattice *> abstractLattices) {
635635
LLVM_DEBUG({
636636
DBGS() << "Visit Region Succesors of ";
@@ -715,10 +715,11 @@ void TritonIntegerRangeAnalysis::visitRegionSuccessors(
715715
if (!inputs.empty()) {
716716
firstIndex = cast<OpResult>(inputs.front()).getResultNumber();
717717
}
718-
visitNonControlFlowArguments(branch,
719-
RegionSuccessor(branch->getResults().slice(
720-
firstIndex, inputs.size())),
721-
lattices, firstIndex);
718+
visitNonControlFlowArguments(
719+
branch,
720+
RegionSuccessor(
721+
branch, branch->getResults().slice(firstIndex, inputs.size())),
722+
lattices, firstIndex);
722723
} else {
723724
if (!inputs.empty()) {
724725
firstIndex = cast<BlockArgument>(inputs.front()).getArgNumber();

third_party/amd/lib/TritonAMDGPUToLLVM/MemoryOpToLLVM.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,8 +481,8 @@ class LocalBarrierOpConversion
481481
// amdgpu::MemoryCounterWaitOp will lower s_waitcnt
482482
// - s_barrier syncronizes the execution for the CTA
483483
auto dsAttr = rewriter.getI32IntegerAttr(0);
484-
rewriter.create<amdgpu::MemoryCounterWaitOp>(
485-
op->getLoc(), /* load= */ nullptr, /* store= */ nullptr,
484+
amdgpu::MemoryCounterWaitOp::create(
485+
rewriter, op->getLoc(), /* load= */ nullptr, /* store= */ nullptr,
486486
/* ds= */ dsAttr);
487487
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
488488

third_party/nvidia/lib/Dialect/NVWS/Transforms/LowerAref.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,9 @@ void createTMALoad(triton::nvws::DescriptorLoadOp op, PatternRewriter &rewriter,
305305
}
306306
}
307307
}
308-
auto newLoadOp =
309-
rewriter.create<triton::nvidia_gpu::AsyncTMACopyGlobalToLocalOp>(
310-
op.getLoc(), op.getDesc(), indices, barrierAlloc, op.getResult(),
311-
pred);
308+
auto newLoadOp = triton::nvidia_gpu::AsyncTMACopyGlobalToLocalOp::create(
309+
rewriter, op.getLoc(), op.getDesc(), indices, barrierAlloc,
310+
op.getResult(), pred);
312311
assignStageCluster(newLoadOp, getPartitionWsTagIds(op), getStageCluster(op),
313312
rewriter);
314313
};

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertWarpSpecializeToLLVM.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ static void createBarrier(TritonLLVMIRRewriter &b, unsigned barIdx,
9191
if (numThreads == 32)
9292
LLVM::NVIDIA::createSyncWarp(b.getLoc(), b);
9393
else
94-
NVVM::BarrierOp::create(b, b.getLoc(), b.i32_val(barIdx),
95-
b.i32_val(numThreads));
94+
NVVM::BarrierOp::create(b, b.getLoc(), TypeRange{}, b.i32_val(barIdx),
95+
b.i32_val(numThreads), {}, Value{});
9696
}
9797

9898
static void createAllBarrier(TritonLLVMIRRewriter &b, unsigned barIdx) {

0 commit comments

Comments
 (0)