Skip to content

Commit 26f3da6

Browse files
olegshyshkovkarupayun
authored andcommitted
[BACKEND] Update LLVM version to llvm/llvm-project@b864909 (triton-lang#8263)
1 parent da28688 commit 26f3da6

File tree

5 files changed

+37
-9
lines changed

5 files changed

+37
-9
lines changed

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0a83e96f6616c79a2ac63588a550ed420798791f
1+
b8649098a7fcf598406d8d8b7d68891d1444e9c8

test/Conversion/tritongpu_to_llvm.mlir

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1+
<<<<<<< HEAD
12
// RUN: triton-opt %s -split-input-file --allocate-shared-memory-nv --convert-triton-gpu-to-llvm -reconcile-unrealized-casts 2>/dev/null | FileCheck %s --dump-input-context 20
3+
=======
4+
// RUN: triton-opt %s -split-input-file \
5+
// RUN: --allocate-shared-memory-nv --convert-triton-gpu-to-llvm \
6+
// RUN: --reconcile-unrealized-casts 2>/dev/null \
7+
// RUN: | FileCheck %s --dump-input-context 20
8+
>>>>>>> 0bf92bfbf ([BACKEND] Update LLVM version to https://github.com/llvm/llvm-project/commit/b8649098a7fcf598406d8d8b7d68891d1444e9c8 (#8263))
29

310
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
411
// CHECK: llvm.func @test_empty_kernel(%arg0: i32, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>)

third_party/amd/lib/TritonAMDGPUToLLVM/AtomicRMWOpsEmitter.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -405,15 +405,28 @@ Value AtomicRMWEmitter::atomicIntraWaveReduce(RewriterBase &rewriter,
405405
Value done = b.icmp_eq(chosen, rmwPtr);
406406
Value mask = targetInfo.ballot(rewriter, loc, i64_ty, done);
407407
Value start = loopBody->getArgument(0);
408+
NamedAttribute noundef = rewriter.getNamedAttr(
409+
LLVM::LLVMDialect::getNoUndefAttrName(), rewriter.getUnitAttr());
410+
NamedAttribute lowRange = rewriter.getNamedAttr(
411+
LLVM::LLVMDialect::getRangeAttrName(),
412+
LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
413+
APInt(32, 32)));
414+
NamedAttribute highRange = rewriter.getNamedAttr(
415+
LLVM::LLVMDialect::getRangeAttrName(),
416+
LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
417+
APInt(32, 64)));
408418
Value cnt = b.trunc(i32_ty, generatePopcount64(rewriter, mask));
409419
Value maskLo = b.trunc(i32_ty, mask);
410-
Value mbcntLoRes =
411-
ROCDL::MbcntLoOp::create(rewriter, loc, i32_ty, maskLo, b.i32_val(0),
412-
/*arg_attrs=*/{}, /*res_attrs=*/{});
420+
Value mbcntLoRes = ROCDL::MbcntLoOp::create(
421+
rewriter, loc, i32_ty, maskLo, b.i32_val(0),
422+
/*arg_attrs=*/{}, /*res_attrs=*/
423+
rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, lowRange})));
413424
Value maskHi = b.trunc(i32_ty, b.lshr(mask, b.i64_val(32)));
414-
Value idx =
415-
ROCDL::MbcntHiOp::create(rewriter, loc, i32_ty, maskHi, mbcntLoRes,
416-
/*arg_attrs=*/{}, /*res_attrs=*/{});
425+
Value idx = ROCDL::MbcntHiOp::create(
426+
rewriter, loc, i32_ty, maskHi, mbcntLoRes,
427+
/*arg_attrs=*/{},
428+
/*res_attrs=*/
429+
rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, highRange})));
417430
Value base = b.add(start, cnt);
418431
Value leader = b.icmp_eq(idx, b.i32_val(0));
419432
cnt = b.sub(cnt, idx);

third_party/amd/lib/TritonAMDGPUTransforms/CanonicalizePointers.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ class ConvertUnimplementedOpUnrealizedCasts
17911791
if (auto integerAttr =
17921792
llvm::dyn_cast_or_null<mlir::IntegerAttr>(maybeAttr)) {
17931793
if (integerAttr.getValue() == 0) {
1794-
rewriter.replaceAllUsesWith(castOp.getResult(0), fatPtrBase);
1794+
rewriter.RewriterBase::replaceAllUsesWith(castOp.getResult(0), fatPtrBase);
17951795
rewriter.eraseOp(castOp);
17961796
return success();
17971797
}
@@ -1801,7 +1801,7 @@ class ConvertUnimplementedOpUnrealizedCasts
18011801
fatPtrs.at({fatPtrBase, fatPtrOffset});
18021802
auto newPtr = createTensorPointer(rewriter, fatPtrBase, fatPtrOffset,
18031803
castOp.getLoc(), fatPtrAttrs);
1804-
rewriter.replaceAllUsesWith(newPtr, fatPtrBase);
1804+
rewriter.RewriterBase::replaceAllUsesWith(newPtr, fatPtrBase);
18051805
rewriter.eraseOp(castOp);
18061806
return success();
18071807
}

third_party/amd/lib/TritonAMDGPUTransforms/Utility.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ int deduceMinCountBetweeOps(Operation *beginOp, Operation *endOp,
2828
deduceMinCountInBlock(ifOp.getElseRegion().front(), countFunc);
2929
count += std::min(minThen, minElse);
3030
} else if (auto forOp = llvm::dyn_cast<scf::ForOp>(op)) {
31+
<<<<<<< HEAD
3132
if (std::optional<APInt> tripCount = forOp.getStaticTripCount()) {
3233
uint64_t tcVal = 0;
3334
if (forOp.getUnsignedCmp() && tripCount->ugt(0))
@@ -36,6 +37,13 @@ int deduceMinCountBetweeOps(Operation *beginOp, Operation *endOp,
3637
tcVal = tripCount->getSExtValue();
3738
if (tcVal > 0)
3839
count += tcVal * deduceMinCountInBlock(*forOp.getBody(), countFunc);
40+
=======
41+
int64_t tripCount = forOp.getStaticTripCount()
42+
.value_or(llvm::APInt(64, 0))
43+
.getZExtValue();
44+
if (tripCount > 0) {
45+
count += tripCount * deduceMinCountInBlock(*forOp.getBody(), countFunc);
46+
>>>>>>> 0bf92bfbf ([BACKEND] Update LLVM version to https://github.com/llvm/llvm-project/commit/b8649098a7fcf598406d8d8b7d68891d1444e9c8 (#8263))
3947
}
4048
} else {
4149
count += countFunc(op);

0 commit comments

Comments
 (0)