Skip to content

Commit af85d9e

Browse files
Address feedback on the location of the opt
- Remove redundant const propagration (assume equality opt) from InstCombine. - Moved assume(ballot(cmp) == -1) optimization from InstCombine to GVN.
1 parent a768d65 commit af85d9e

File tree

6 files changed

+62
-171
lines changed

6 files changed

+62
-171
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,7 +1322,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
13221322
if (isa<PoisonValue>(Arg))
13231323
return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
13241324

1325-
// For Wave32 targets, convert i64 ballot to i32 ballot + zext
1325+
if (auto *Src = dyn_cast<ConstantInt>(Arg)) {
1326+
if (Src->isZero()) {
1327+
// amdgcn.ballot(i1 0) is zero.
1328+
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
1329+
}
1330+
}
13261331
if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
13271332
// %b64 = call i64 ballot.i64(...)
13281333
// =>
@@ -1336,15 +1341,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
13361341
Call->takeName(&II);
13371342
return IC.replaceInstUsesWith(II, Call);
13381343
}
1339-
1340-
if (auto *Src = dyn_cast<ConstantInt>(Arg)) {
1341-
if (Src->isZero()) {
1342-
// amdgcn.ballot(i1 0) is zero.
1343-
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
1344-
}
1345-
// Note: ballot(true) is NOT constant folded because the result depends
1346-
// on the active lanes in the wavefront, not just the condition value.
1347-
}
13481344
break;
13491345
}
13501346
case Intrinsic::amdgcn_wavefrontsize: {

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 0 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3549,81 +3549,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
35493549
}
35503550
}
35513551

3552-
// Basic assume equality optimization: assume(x == c) -> replace uses of x
3553-
// with c
3554-
if (auto *ICmp = dyn_cast<ICmpInst>(IIOperand)) {
3555-
if (ICmp->getPredicate() == ICmpInst::ICMP_EQ) {
3556-
Value *LHS = ICmp->getOperand(0);
3557-
Value *RHS = ICmp->getOperand(1);
3558-
Value *Variable = nullptr;
3559-
Constant *ConstantVal = nullptr;
3560-
3561-
if (auto *C = dyn_cast<Constant>(RHS)) {
3562-
Variable = LHS;
3563-
ConstantVal = C;
3564-
} else if (auto *C = dyn_cast<Constant>(LHS)) {
3565-
Variable = RHS;
3566-
ConstantVal = C;
3567-
}
3568-
3569-
if (Variable && ConstantVal && Variable->hasUseList()) {
3570-
SmallVector<Use *, 8> Uses;
3571-
for (Use &U : Variable->uses()) {
3572-
if (auto *UseInst = dyn_cast<Instruction>(U.getUser())) {
3573-
if (UseInst != ICmp &&
3574-
isValidAssumeForContext(II, UseInst, &DT)) {
3575-
Uses.push_back(&U);
3576-
}
3577-
}
3578-
}
3579-
3580-
for (Use *U : Uses) {
3581-
U->set(ConstantVal);
3582-
Worklist.pushValue(U->getUser());
3583-
}
3584-
3585-
if (!Uses.empty()) {
3586-
Worklist.pushValue(Variable);
3587-
}
3588-
}
3589-
}
3590-
}
3591-
3592-
// Optimize AMDGPU ballot patterns in assumes:
3593-
// assume(ballot(cmp) == -1) means cmp is true on all active lanes
3594-
// We can replace uses of cmp with true
3595-
Value *BallotInst;
3596-
if (match(IIOperand, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(BallotInst),
3597-
m_AllOnes()))) {
3598-
if (auto *IntrCall = dyn_cast<IntrinsicInst>(BallotInst)) {
3599-
if (IntrCall->getIntrinsicID() == Intrinsic::amdgcn_ballot) {
3600-
Value *BallotArg = IntrCall->getArgOperand(0);
3601-
if (BallotArg->getType()->isIntegerTy(1) && BallotArg->hasUseList()) {
3602-
// Find uses and replace with true
3603-
SmallVector<Use *, 8> Uses;
3604-
for (Use &U : BallotArg->uses()) {
3605-
if (auto *UseInst = dyn_cast<Instruction>(U.getUser())) {
3606-
if (UseInst != IntrCall &&
3607-
isValidAssumeForContext(II, UseInst, &DT)) {
3608-
Uses.push_back(&U);
3609-
}
3610-
}
3611-
}
3612-
3613-
// Replace uses with true
3614-
for (Use *U : Uses) {
3615-
U->set(ConstantInt::getTrue(BallotArg->getType()));
3616-
Worklist.pushValue(U->getUser());
3617-
}
3618-
3619-
if (!Uses.empty()) {
3620-
Worklist.pushValue(BallotArg);
3621-
}
3622-
}
3623-
}
3624-
}
3625-
}
3626-
36273552
// If there is a dominating assume with the same condition as this one,
36283553
// then this one is redundant, and should be removed.
36293554
KnownBits Known(1);

llvm/lib/Transforms/Scalar/GVN.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "llvm/IR/Instruction.h"
5555
#include "llvm/IR/Instructions.h"
5656
#include "llvm/IR/IntrinsicInst.h"
57+
#include "llvm/IR/IntrinsicsAMDGPU.h"
5758
#include "llvm/IR/LLVMContext.h"
5859
#include "llvm/IR/Metadata.h"
5960
#include "llvm/IR/Module.h"

llvm/test/Transforms/GVN/assume-equal.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,60 @@ define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) {
462462
ret i8 %v
463463
}
464464

465+
; Test AMDGPU ballot pattern optimization
466+
; assume(ballot(cmp) == -1) means cmp is true on all active lanes
467+
; so uses of cmp can be replaced with true
468+
define void @assume_ballot_const(i32 %x) {
469+
; CHECK-LABEL: @assume_ballot_const(
470+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
471+
; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
472+
; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], -1
473+
; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
474+
; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
475+
; CHECK: foo:
476+
; CHECK-NEXT: ret void
477+
; CHECK: bar:
478+
; CHECK-NEXT: ret void
479+
;
480+
%cmp = icmp eq i32 %x, 0
481+
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
482+
%all = icmp eq i64 %ballot, -1
483+
call void @llvm.assume(i1 %all)
484+
br i1 %cmp, label %foo, label %bar
485+
486+
foo:
487+
ret void
488+
489+
bar:
490+
ret void
491+
}
492+
493+
define void @assume_ballot_exec_mask(i32 %x, i64 %exec_mask) {
494+
; CHECK-LABEL: @assume_ballot_exec_mask(
495+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
496+
; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
497+
; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], %exec_mask
498+
; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
499+
; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
500+
; CHECK: foo:
501+
; CHECK-NEXT: ret void
502+
; CHECK: bar:
503+
; CHECK-NEXT: ret void
504+
;
505+
%cmp = icmp eq i32 %x, 0
506+
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
507+
%all = icmp eq i64 %ballot, %exec_mask
508+
call void @llvm.assume(i1 %all)
509+
br i1 %cmp, label %foo, label %bar
510+
511+
foo:
512+
ret void
513+
514+
bar:
515+
ret void
516+
}
517+
518+
declare i64 @llvm.amdgcn.ballot.i64(i1)
465519
declare noalias ptr @_Znwm(i64)
466520
declare void @_ZN1AC1Ev(ptr)
467521
declare void @llvm.assume(i1)

llvm/test/Transforms/InstCombine/amdgpu-ballot-constant-fold.ll

Lines changed: 0 additions & 56 deletions
This file was deleted.

llvm/test/Transforms/InstCombine/assume.ll

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ define i32 @simple(i32 %a) #1 {
104104
; CHECK-LABEL: @simple(
105105
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4
106106
; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]])
107-
; CHECK-NEXT: ret i32 4
107+
; CHECK-NEXT: ret i32 [[A]]
108108
;
109109
%cmp = icmp eq i32 %a, 4
110110
tail call void @llvm.assume(i1 %cmp)
@@ -1056,35 +1056,6 @@ define i1 @neg_assume_trunc_eq_one(i8 %x) {
10561056
ret i1 %q
10571057
}
10581058

1059-
; Test AMDGPU ballot pattern optimization
1060-
; assume(ballot(cmp) == -1) means cmp is true on all active lanes
1061-
; so uses of cmp can be replaced with true
1062-
define void @assume_ballot_uniform(i32 %x) {
1063-
; CHECK-LABEL: @assume_ballot_uniform(
1064-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
1065-
; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
1066-
; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], -1
1067-
; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
1068-
; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
1069-
; CHECK: foo:
1070-
; CHECK-NEXT: ret void
1071-
; CHECK: bar:
1072-
; CHECK-NEXT: ret void
1073-
;
1074-
%cmp = icmp eq i32 %x, 0
1075-
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
1076-
%all = icmp eq i64 %ballot, -1
1077-
call void @llvm.assume(i1 %all)
1078-
br i1 %cmp, label %foo, label %bar
1079-
1080-
foo:
1081-
ret void
1082-
1083-
bar:
1084-
ret void
1085-
}
1086-
1087-
declare i64 @llvm.amdgcn.ballot.i64(i1)
10881059
declare void @use(i1)
10891060
declare void @llvm.dbg.value(metadata, metadata, metadata)
10901061

0 commit comments

Comments
 (0)