Skip to content

Commit c8b7e0c

Browse files
committed
[AMDGPU] Eliminate likely-spurious execz checks via intrinsic argument
Currently, we introduce branches to skip conditionally executed instructions if the EXEC mask is zero and only eliminate them if the scheduling model says that executing the skipped instructions is cheaper than taking the branch instruction. This patch adds a heuristic to SIAnnotateControlFlow to determine if the lanes of a wavefront are likely to have dynamically varying values for the branch condition. This information is passed through new arguments/operands of the amdgcn.if/else intrinsics and the SI_IF/ELSE pseudo instructions to SILowerControlFlow, where the execz branch is inserted with corresponding branch probabilities. This causes SIPreEmitPeephole to eliminate the corresponding execz branch if it is legal to do so. This is an alternative to PR llvm#117567, using a simpler heuristic and passing the LikelyVarying information through new arguments for the amdgcn.if/else intrinsics and the SI_IF/ELSE pseudo instructions instead of abusing branch weight metadata. Most test changes are caused by the new arguments for the amdgcn.if/else intrinsics and the SI_IF/ELSE pseudo instructions; the LikelyVarying argument is set to false/0 in these existing tests. New tests for the functionality are in conditional-mem-no-cbranch-execz.ll and annotate-likely-varying-branches.ll. For SWDEV-483228.
1 parent 733be4e commit c8b7e0c

File tree

75 files changed

+1288
-463
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1288
-463
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3407,11 +3407,11 @@ def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">,
34073407
// having side effects, which is sufficient to prevent optimizations without
34083408
// having to mark them as convergent.
34093409
def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
3410-
[llvm_i1_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree]
3410+
[llvm_i1_ty, llvm_i1_ty], [ImmArg<ArgIndex<1>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
34113411
>;
34123412

34133413
def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
3414-
[llvm_anyint_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree]
3414+
[llvm_anyint_ty, llvm_i1_ty], [ImmArg<ArgIndex<1>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
34153415
>;
34163416

34173417
def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
4242
def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
4343
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
4444

45-
def AMDGPUIfOp : SDTypeProfile<1, 2,
46-
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
45+
def AMDGPUIfOp : SDTypeProfile<1, 3,
46+
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>, SDTCisVT<3, OtherVT>]
4747
>;
4848

49-
def AMDGPUElseOp : SDTypeProfile<1, 2,
50-
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
49+
def AMDGPUElseOp : SDTypeProfile<1, 3,
50+
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>, SDTCisVT<3, OtherVT>]
5151
>;
5252

5353
def AMDGPULoopOp : SDTypeProfile<0, 2,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7264,6 +7264,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
72647264

72657265
Register Def = MI.getOperand(1).getReg();
72667266
Register Use = MI.getOperand(3).getReg();
7267+
auto LikelyVarying = MI.getOperand(4).getImm();
72677268

72687269
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
72697270

@@ -7275,11 +7276,13 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
72757276
B.buildInstr(AMDGPU::SI_IF)
72767277
.addDef(Def)
72777278
.addUse(Use)
7279+
.addImm(LikelyVarying)
72787280
.addMBB(UncondBrTarget);
72797281
} else {
72807282
B.buildInstr(AMDGPU::SI_ELSE)
72817283
.addDef(Def)
72827284
.addUse(Use)
7285+
.addImm(LikelyVarying)
72837286
.addMBB(UncondBrTarget);
72847287
}
72857288

llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/IR/Dominators.h"
2323
#include "llvm/IR/IRBuilder.h"
2424
#include "llvm/IR/IntrinsicsAMDGPU.h"
25+
#include "llvm/IR/IntrinsicsR600.h"
2526
#include "llvm/Target/TargetMachine.h"
2627
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
2728
#include "llvm/Transforms/Utils/Local.h"
@@ -36,6 +37,24 @@ namespace {
3637
using StackEntry = std::pair<BasicBlock *, Value *>;
3738
using StackVector = SmallVector<StackEntry, 16>;
3839

40+
class LikelyVaryingHeuristic {
41+
public:
42+
LikelyVaryingHeuristic(const Function &F, const GCNSubtarget &ST) {
43+
IsSingleLaneExecution = ST.isSingleLaneExecution(F);
44+
}
45+
46+
/// Check if \p V is likely to be have dynamically varying values among the
47+
/// workitems in each wavefront.
48+
bool isLikelyVarying(const Value *V);
49+
50+
private:
51+
bool IsSingleLaneExecution = false;
52+
53+
bool isRelevantSourceOfDivergence(const Value *V) const;
54+
55+
ValueMap<const Value *, bool> LikelyVaryingCache;
56+
};
57+
3958
class SIAnnotateControlFlow {
4059
private:
4160
Function *F;
@@ -62,6 +81,8 @@ class SIAnnotateControlFlow {
6281

6382
LoopInfo *LI;
6483

84+
LikelyVaryingHeuristic LVHeuristic;
85+
6586
void initialize(const GCNSubtarget &ST);
6687

6788
bool isUniform(BranchInst *T);
@@ -99,7 +120,7 @@ class SIAnnotateControlFlow {
99120
public:
100121
SIAnnotateControlFlow(Function &F, const GCNSubtarget &ST, DominatorTree &DT,
101122
LoopInfo &LI, UniformityInfo &UA)
102-
: F(&F), UA(&UA), DT(&DT), LI(&LI) {
123+
: F(&F), UA(&UA), DT(&DT), LI(&LI), LVHeuristic(F, ST) {
103124
initialize(ST);
104125
}
105126

@@ -186,9 +207,14 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
186207
if (isUniform(Term))
187208
return false;
188209

210+
// Check if it's likely that at least one lane will always follow the
211+
// then-branch, i.e., the then-branch is never skipped completly.
212+
Value *IsLikelyVarying =
213+
LVHeuristic.isLikelyVarying(Term->getCondition()) ? BoolTrue : BoolFalse;
214+
189215
IRBuilder<> IRB(Term);
190216
Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, IntMask),
191-
{Term->getCondition()});
217+
{Term->getCondition(), IsLikelyVarying});
192218
Value *Cond = IRB.CreateExtractValue(IfCall, {0});
193219
Value *Mask = IRB.CreateExtractValue(IfCall, {1});
194220
Term->setCondition(Cond);
@@ -202,9 +228,16 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
202228
return false;
203229
}
204230

231+
Value *IncomingMask = popSaved();
232+
// Check if it's likely that at least one lane will always follow the
233+
// else-branch, i.e., the else-branch is never skipped completly.
234+
Value *IsLikelyVarying =
235+
LVHeuristic.isLikelyVarying(IncomingMask) ? BoolTrue : BoolFalse;
236+
205237
IRBuilder<> IRB(Term);
206-
Value *ElseCall = IRB.CreateCall(
207-
getDecl(Else, Intrinsic::amdgcn_else, {IntMask, IntMask}), {popSaved()});
238+
Value *ElseCall =
239+
IRB.CreateCall(getDecl(Else, Intrinsic::amdgcn_else, {IntMask, IntMask}),
240+
{IncomingMask, IsLikelyVarying});
208241
Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
209242
Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
210243
Term->setCondition(Cond);
@@ -385,6 +418,62 @@ bool SIAnnotateControlFlow::run() {
385418
return Changed;
386419
}
387420

421+
bool LikelyVaryingHeuristic::isRelevantSourceOfDivergence(
422+
const Value *V) const {
423+
auto *II = dyn_cast<IntrinsicInst>(V);
424+
if (!II)
425+
return false;
426+
427+
switch (II->getIntrinsicID()) {
428+
case Intrinsic::amdgcn_workitem_id_z:
429+
case Intrinsic::r600_read_tidig_z:
430+
case Intrinsic::amdgcn_workitem_id_y:
431+
case Intrinsic::r600_read_tidig_y:
432+
case Intrinsic::amdgcn_workitem_id_x:
433+
case Intrinsic::r600_read_tidig_x:
434+
case Intrinsic::amdgcn_mbcnt_hi:
435+
case Intrinsic::amdgcn_mbcnt_lo:
436+
return true;
437+
default:
438+
return false;
439+
}
440+
}
441+
442+
bool LikelyVaryingHeuristic::isLikelyVarying(const Value *V) {
443+
if (IsSingleLaneExecution)
444+
return false;
445+
446+
if (isRelevantSourceOfDivergence(V))
447+
return true;
448+
449+
auto *I = dyn_cast<Instruction>(V);
450+
if (!I)
451+
return false;
452+
453+
// ExtractValueInst and IntrinsicInst enable looking through the
454+
// amdgcn_if/else intrinsics inserted by SIAnnotateControlFlow.
455+
// This condition excludes PHINodes, which prevents infinite recursion.
456+
if (!isa<BinaryOperator>(I) && !isa<UnaryOperator>(I) && !isa<CastInst>(I) &&
457+
!isa<CmpInst>(I) && !isa<ExtractValueInst>(I) && !isa<IntrinsicInst>(I))
458+
return false;
459+
460+
// Have we already checked V?
461+
auto CacheEntry = LikelyVaryingCache.find(V);
462+
if (CacheEntry != LikelyVaryingCache.end())
463+
return CacheEntry->second;
464+
465+
// Does it use a likely varying Value?
466+
bool Result = false;
467+
for (const auto &Use : I->operands()) {
468+
Result |= isLikelyVarying(Use);
469+
if (Result)
470+
break;
471+
}
472+
473+
LikelyVaryingCache.insert({V, Result});
474+
return Result;
475+
}
476+
388477
PreservedAnalyses SIAnnotateControlFlowPass::run(Function &F,
389478
FunctionAnalysisManager &FAM) {
390479
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,8 @@ def IGLP_OPT : SPseudoInstSI<(outs), (ins i32imm:$mask),
417417
let isTerminator = 1, isNotDuplicable = 1 in {
418418

419419
def SI_IF: CFPseudoInstSI <
420-
(outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target),
421-
[(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
420+
(outs SReg_1:$dst), (ins SReg_1:$vcc, i1imm:$likelyvarying, brtarget:$target),
421+
[(set i1:$dst, (AMDGPUif i1:$vcc, (i1 timm:$likelyvarying), bb:$target))], 1, 1> {
422422
let Constraints = "";
423423
let Size = 12;
424424
let hasSideEffects = 1;
@@ -427,7 +427,7 @@ def SI_IF: CFPseudoInstSI <
427427

428428
def SI_ELSE : CFPseudoInstSI <
429429
(outs SReg_1:$dst),
430-
(ins SReg_1:$src, brtarget:$target), [], 1, 1> {
430+
(ins SReg_1:$src, i1imm:$likelyvarying, brtarget:$target), [], 1, 1> {
431431
let Size = 12;
432432
let hasSideEffects = 1;
433433
let IsNeverUniform = 1;
@@ -1049,8 +1049,8 @@ def : GCNPat<
10491049
>;
10501050

10511051
def : GCNPat<
1052-
(AMDGPUelse i1:$src, bb:$target),
1053-
(SI_ELSE $src, $target)
1052+
(AMDGPUelse i1:$src, i1:$likelyvarying, bb:$target),
1053+
(SI_ELSE $src, $likelyvarying, $target)
10541054
>;
10551055

10561056
def : Pat <

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,11 @@
5555
#include "llvm/ADT/SmallSet.h"
5656
#include "llvm/CodeGen/LiveIntervals.h"
5757
#include "llvm/CodeGen/LiveVariables.h"
58+
#include "llvm/CodeGen/MachineBasicBlock.h"
5859
#include "llvm/CodeGen/MachineDominators.h"
5960
#include "llvm/CodeGen/MachineFunctionPass.h"
61+
#include "llvm/IR/LLVMContext.h"
62+
#include "llvm/Support/BranchProbability.h"
6063
#include "llvm/Target/TargetMachine.h"
6164

6265
using namespace llvm;
@@ -221,9 +224,11 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
221224
MachineOperand& Cond = MI.getOperand(1);
222225
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
223226

224-
MachineOperand &ImpDefSCC = MI.getOperand(4);
227+
MachineOperand &ImpDefSCC = MI.getOperand(5);
225228
assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
226229

230+
bool LikelyVarying = MI.getOperand(2).getImm();
231+
227232
// If there is only one use of save exec register and that use is SI_END_CF,
228233
// we can optimize SI_IF by returning the full saved exec mask instead of
229234
// just cleared bits.
@@ -281,7 +286,17 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
281286
// Insert the S_CBRANCH_EXECZ instruction which will be optimized later
282287
// during SIPreEmitPeephole.
283288
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
284-
.add(MI.getOperand(2));
289+
.add(MI.getOperand(3));
290+
291+
if (LikelyVarying) {
292+
MachineBasicBlock *ExeczDest = MI.getOperand(3).getMBB();
293+
auto **E = MBB.succ_end();
294+
for (auto **SI = MBB.succ_begin(); SI != E; ++SI) {
295+
if (*SI == ExeczDest)
296+
MBB.setSuccProbability(SI, BranchProbability::getZero());
297+
}
298+
MBB.normalizeSuccProbs();
299+
}
285300

286301
if (!LIS) {
287302
MI.eraseFromParent();
@@ -329,7 +344,9 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
329344
if (LV)
330345
LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
331346

332-
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
347+
bool LikelyVarying = MI.getOperand(2).getImm();
348+
349+
MachineBasicBlock *DestBB = MI.getOperand(3).getMBB();
333350

334351
MachineBasicBlock::iterator ElsePt(MI);
335352

@@ -352,6 +369,15 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
352369
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
353370
.addMBB(DestBB);
354371

372+
if (LikelyVarying) {
373+
auto **E = MBB.succ_end();
374+
for (auto **SI = MBB.succ_begin(); SI != E; ++SI) {
375+
if (*SI == DestBB)
376+
MBB.setSuccProbability(SI, BranchProbability::getZero());
377+
}
378+
MBB.normalizeSuccProbs();
379+
}
380+
355381
if (!LIS) {
356382
MI.eraseFromParent();
357383
return;

llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ MachineBasicBlock *
184184
SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
185185
for (auto &BR : MBB->terminators()) {
186186
if (BR.getOpcode() == AMDGPU::SI_ELSE)
187-
return BR.getOperand(2).getMBB();
187+
return BR.getOperand(3).getMBB();
188188
}
189189
return nullptr;
190190
}
@@ -682,7 +682,7 @@ bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) {
682682
for (auto &MI : MBB.terminators()) {
683683
// Detect the if-else blocks
684684
if (MI.getOpcode() == AMDGPU::SI_IF) {
685-
MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
685+
MachineBasicBlock *IfTarget = MI.getOperand(3).getMBB();
686686
auto *Endif = getElseTarget(IfTarget);
687687
if (!Endif)
688688
continue;

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "AMDGPU.h"
1515
#include "GCNSubtarget.h"
1616
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17+
#include "llvm/ADT/Statistic.h"
1718
#include "llvm/CodeGen/MachineFunctionPass.h"
1819
#include "llvm/CodeGen/TargetSchedule.h"
1920
#include "llvm/Support/BranchProbability.h"
@@ -22,6 +23,8 @@ using namespace llvm;
2223

2324
#define DEBUG_TYPE "si-pre-emit-peephole"
2425

26+
STATISTIC(NumCBranchExeczElim, "Number of s_cbranch_execz eliminated.");
27+
2528
namespace {
2629

2730
class SIPreEmitPeephole : public MachineFunctionPass {
@@ -404,6 +407,7 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
404407
return false;
405408

406409
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
410+
++NumCBranchExeczElim;
407411
MI.eraseFromParent();
408412
SrcMBB.removeSuccessor(TrueMBB);
409413

llvm/test/Analysis/DotMachineCFG/AMDGPU/irreducible.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
1515
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.3:\l|\l successors: %bb.4(0x80000000)\l\l %4:vgpr_32 = PHI %5:vgpr_32, %bb.1, %7:vgpr_32, %bb.2\l}"];
1616
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
17-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.4:\l|\l successors: %bb.2(0x40000000), %bb.5(0x40000000)\l\l %8:vgpr_32 = V_AND_B32_e32 3, %1:vgpr_32, implicit $exec\l %9:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 2, implicit $exec\l %10:sreg_64 = SI_IF killed %9:sreg_64, %bb.2, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
17+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.4:\l|\l successors: %bb.2(0x40000000), %bb.5(0x40000000)\l\l %8:vgpr_32 = V_AND_B32_e32 3, %1:vgpr_32, implicit $exec\l %9:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 2, implicit $exec\l %10:sreg_64 = SI_IF killed %9:sreg_64, 0, %bb.2, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
1818
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
1919
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
20-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.5:\l|\l successors: %bb.1(0x40000000), %bb.6(0x40000000)\l\l %11:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 1, implicit $exec\l %12:sreg_64 = SI_IF killed %11:sreg_64, %bb.1, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
20+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.5:\l|\l successors: %bb.1(0x40000000), %bb.6(0x40000000)\l\l %11:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 1, implicit $exec\l %12:sreg_64 = SI_IF killed %11:sreg_64, 0, %bb.1, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
2121
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
2222
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
2323
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.6:\l|\l\l S_ENDPGM 0\l}"];
@@ -74,12 +74,12 @@ body: |
7474
7575
%50:vgpr_32 = V_AND_B32_e32 3, %2, implicit $exec
7676
%51:sreg_64 = V_CMP_EQ_U32_e64 %50, 2, implicit $exec
77-
%52:sreg_64 = SI_IF killed %51:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
77+
%52:sreg_64 = SI_IF killed %51:sreg_64, 0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
7878
7979
bb.5:
8080
successors: %bb.1, %bb.6
8181
%61:sreg_64 = V_CMP_EQ_U32_e64 %50, 1, implicit $exec
82-
%62:sreg_64 = SI_IF killed %61:sreg_64, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
82+
%62:sreg_64 = SI_IF killed %61:sreg_64, 0, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
8383
8484
bb.6:
8585
S_ENDPGM 0

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/control-flow-intrinsics.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ body: |
1010
; CHECK-NOT: DIVERGENT: %1
1111
%1:sreg_64(s64) = G_IMPLICIT_DEF
1212
; CHECK: DIVERGENT: {{.*}} SI_IF
13-
%2:sreg_64 = SI_IF %1, %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
13+
%2:sreg_64 = SI_IF %1, 0, %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
1414
1515
bb.1:
1616
SI_RETURN
@@ -30,7 +30,7 @@ body: |
3030
; CHECK-NOT: DIVERGENT: %1
3131
%1:sreg_64(s64) = G_IMPLICIT_DEF
3232
; CHECK: DIVERGENT: {{.*}} SI_ELSE
33-
%2:sreg_64 = SI_ELSE %1, %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
33+
%2:sreg_64 = SI_ELSE %1, 0, %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
3434
3535
bb.1:
3636
SI_RETURN

0 commit comments

Comments
 (0)