Skip to content

Commit 94a4787

Browse files
pull the ballot argument to all the match users
1 parent b3c4e93 commit 94a4787

File tree

2 files changed

+67
-27
lines changed

2 files changed

+67
-27
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,12 @@ AMDGPUUniformIntrinsicCombinePass::run(Function &F,
9696
bool AMDGPUUniformIntrinsicCombineImpl::run(Function &F) {
9797
bool IsChanged{false};
9898
Module *M = F.getParent();
99-
99+
100100
// If none of the relevant intrinsics are declared, return early.
101-
// if (!M->getFunction(Intrinsic::getName(Intrinsic::amdgcn_permlane64)) &&
102-
// !M->getFunction(Intrinsic::getName(Intrinsic::amdgcn_readfirstlane)) &&
103-
// !M->getFunction(Intrinsic::getName(Intrinsic::amdgcn_readlane)) &&
104-
// !M->getFunction(Intrinsic::getName(Intrinsic::amdgcn_ballot))) {
101+
// if (!Intrinsic::getDeclarationIfExists(M, Intrinsic::amdgcn_permlane64, {}) &&
102+
// !Intrinsic::getDeclarationIfExists(M, Intrinsic::amdgcn_readfirstlane, {}) &&
103+
// !Intrinsic::getDeclarationIfExists(M, Intrinsic::amdgcn_readlane, {}) &&
104+
// !Intrinsic::getDeclarationIfExists(M, Intrinsic::amdgcn_ballot, {})) {
105105
// return false;
106106
// }
107107

@@ -139,28 +139,21 @@ bool AMDGPUUniformIntrinsicCombineImpl::optimizeUniformIntrinsicInst(
139139
LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << "\n");
140140

141141
// Look for a direct `icmp eq` use of the ballot result.
142-
// FIXME: replace all the uses?
143-
auto It = llvm::find_if(II.users(), [&](User *U) {
144-
return match(U, m_ICmp(m_Specific(&II), m_Zero()));
145-
});
146-
147-
// Check if a match was found
148-
if (It == II.user_end())
149-
return false;
150-
151-
// Extract the matching `icmp` instruction
152-
ICmpInst *ICmp = dyn_cast<ICmpInst>(*It);
153-
IRBuilder<> Builder(ICmp);
154-
155-
// Convert ballot argument to match `icmp` operand type (i64)
156-
Value *ConvertedSrc = Builder.CreateZExtOrTrunc(Src, II.getType());
157-
158-
LLVM_DEBUG(dbgs() << "Replacing ballot result in icmp: " << *ICmp
159-
<< " with " << *ConvertedSrc << "\n");
160-
161-
// Replace `%ballot` in `icmp` with `ConvertedSrc`
162-
ICmp->setOperand(0, ConvertedSrc);
163-
return true;
142+
bool Changed = false;
143+
for (User *U : make_early_inc_range(II.users())) {
144+
if (match(U, m_ICmp(m_Specific(&II), m_Zero()))) {
145+
ICmpInst *ICmp = dyn_cast<ICmpInst>(U);
146+
IRBuilder<> Builder(ICmp);
147+
Value *ConvertedSrc = Builder.CreateZExtOrTrunc(Src, II.getType());
148+
149+
LLVM_DEBUG(dbgs() << "Replacing ballot result in icmp: " << *ICmp
150+
<< " with " << *ConvertedSrc << "\n");
151+
152+
ICmp->setOperand(0, ConvertedSrc);
153+
Changed = true;
154+
}
155+
}
156+
return Changed;
164157
}
165158
}
166159
return false;

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,53 @@ exit:
122122
ret void
123123
}
124124

125+
define protected amdgpu_kernel void @trivial_waterfall_multiple_icmp(ptr addrspace(1) %out) {
126+
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_multiple_icmp(
127+
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
128+
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
129+
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
130+
; PASS-CHECK: [[WHILE]]:
131+
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
132+
; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
133+
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
134+
; PASS-CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[NOT_DONE]] to i64
135+
; PASS-CHECK-NEXT: [[IS_DONE_1:%.*]] = icmp eq i64 [[TMP1]], 0
136+
; PASS-CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[NOT_DONE]] to i64
137+
; PASS-CHECK-NEXT: [[IS_DONE_3:%.*]] = icmp eq i64 [[TMP0]], 0
138+
; PASS-CHECK-NEXT: br i1 [[IS_DONE_1]], label %[[EXIT:.*]], label %[[IF]]
139+
; PASS-CHECK: [[IF]]:
140+
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
141+
; PASS-CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[NOT_DONE]] to i64
142+
; PASS-CHECK-NEXT: [[IS_DONE_4:%.*]] = icmp eq i64 [[TMP2]], 0
143+
; PASS-CHECK-NEXT: br label %[[WHILE]]
144+
; PASS-CHECK: [[EXIT]]:
145+
; PASS-CHECK-NEXT: ret void
146+
;
147+
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_multiple_icmp(
148+
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
149+
; DCE-CHECK-NEXT: [[ENTRY:.*:]]
150+
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
151+
; DCE-CHECK-NEXT: ret void
152+
;
153+
entry:
154+
br label %while
155+
156+
while:
157+
%done = phi i1 [ 0, %entry ], [ 1, %if ]
158+
%not_done = xor i1 %done, true
159+
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
160+
%is_done_1 = icmp eq i64 %ballot, 0
161+
%is_done_2 = icmp eq i64 %ballot, 0
162+
br i1 %is_done_1, label %exit, label %if
163+
164+
if:
165+
store i32 5, ptr addrspace(1) %out
166+
%is_done_3 = icmp eq i64 %ballot, 0
167+
br label %while
168+
169+
exit:
170+
ret void
171+
}
125172

126173
declare i64 @llvm.amdgcn.ballot.i64(i1) #1
127174
!6 = !{i64 690}

0 commit comments

Comments
 (0)