Skip to content

Commit 07c4bb4

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
[AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2
1 parent 0b5daeb commit 07c4bb4

25 files changed

+987
-803
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ class SIShrinkInstructions {
5151
unsigned SubReg) const;
5252
bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
5353
unsigned SubReg) const;
54+
bool trySwitchOperands(MachineInstr &MI, Register *OldVCC,
55+
Register *NewVCC) const;
56+
bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI,
57+
const SIInstrInfo &TII) const;
58+
unsigned getInverseCompareOpcode(MachineInstr &MI) const;
5459
TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
5560
unsigned I) const;
5661
void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
@@ -831,6 +836,109 @@ bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
831836
return true;
832837
}
833838

839+
unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
840+
switch (MI.getOpcode()) {
841+
// unsigned 32
842+
case AMDGPU::V_CMP_EQ_U32_e64:
843+
return AMDGPU::V_CMP_NE_U32_e64;
844+
case AMDGPU::V_CMP_NE_U32_e64:
845+
return AMDGPU::V_CMP_EQ_U32_e64;
846+
case AMDGPU::V_CMP_GE_U32_e64:
847+
return AMDGPU::V_CMP_LT_U32_e64;
848+
case AMDGPU::V_CMP_LE_U32_e64:
849+
return AMDGPU::V_CMP_GT_U32_e64;
850+
case AMDGPU::V_CMP_GT_U32_e64:
851+
return AMDGPU::V_CMP_LE_U32_e64;
852+
case AMDGPU::V_CMP_LT_U32_e64:
853+
return AMDGPU::V_CMP_GE_U32_e64;
854+
// float 32
855+
case AMDGPU::V_CMP_EQ_F32_e64:
856+
return AMDGPU::V_CMP_NEQ_F32_e64;
857+
case AMDGPU::V_CMP_NEQ_F32_e64:
858+
return AMDGPU::V_CMP_EQ_F32_e64;
859+
case AMDGPU::V_CMP_GE_F32_e64:
860+
return AMDGPU::V_CMP_LT_F32_e64;
861+
case AMDGPU::V_CMP_LE_F32_e64:
862+
return AMDGPU::V_CMP_GT_F32_e64;
863+
case AMDGPU::V_CMP_GT_F32_e64:
864+
return AMDGPU::V_CMP_LE_F32_e64;
865+
case AMDGPU::V_CMP_LT_F32_e64:
866+
return AMDGPU::V_CMP_GE_F32_e64;
867+
default:
868+
return 0;
869+
}
870+
}
871+
872+
bool SIShrinkInstructions::shouldSwitchOperands(MachineRegisterInfo &MRI,
873+
MachineInstr &MI,
874+
const SIInstrInfo &TII) const {
875+
auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
876+
unsigned Count = 0;
877+
878+
for (auto &Use : allUses) {
879+
if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
880+
return false;
881+
MachineOperand *Src0 =
882+
TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
883+
MachineOperand *Src1 =
884+
TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
885+
886+
auto Src0Imm = Src0->isImm();
887+
auto Src1Imm = Src1->isImm();
888+
889+
if (!Src1Imm && Src0Imm)
890+
return false;
891+
if (Src1Imm && !Src0Imm)
892+
Count++;
893+
}
894+
return (Count >= 1);
895+
}
896+
897+
// OldVCC and NewVCC are used to remember VCC after inverting comparison
898+
bool SIShrinkInstructions::trySwitchOperands(MachineInstr &MI, Register *OldVCC,
899+
Register *NewVCC) const {
900+
const DebugLoc &DL = MI.getDebugLoc();
901+
auto Reg = MI.getOperand(5).getReg();
902+
if (!Reg.isVirtual())
903+
return false;
904+
905+
if (*OldVCC != Reg) {
906+
MachineInstr *DefMI = MRI->getVRegDef(Reg);
907+
if (DefMI) {
908+
unsigned Opcode = getInverseCompareOpcode(*DefMI);
909+
if (Opcode &&
910+
SIShrinkInstructions::shouldSwitchOperands(*MRI, MI, *TII)) {
911+
auto cmpDL = DefMI->getDebugLoc();
912+
*NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
913+
*OldVCC = Reg;
914+
MachineInstrBuilder InverseCompare = BuildMI(
915+
*DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
916+
InverseCompare->setFlags(DefMI->getFlags());
917+
918+
unsigned OpNum = DefMI->getNumExplicitOperands();
919+
for (unsigned i = 1; i < OpNum; i++) {
920+
MachineOperand Op = DefMI->getOperand(i);
921+
InverseCompare.add(Op);
922+
if (Op.isReg() && Op.isKill())
923+
InverseCompare->getOperand(i).setIsKill(false);
924+
}
925+
}
926+
}
927+
}
928+
if (*OldVCC == Reg) {
929+
BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
930+
MI.getOperand(0).getReg())
931+
.add(MI.getOperand(3))
932+
.add(MI.getOperand(4))
933+
.add(MI.getOperand(1))
934+
.add(MI.getOperand(2))
935+
.addReg(*NewVCC);
936+
MI.eraseFromParent();
937+
return true;
938+
}
939+
return false;
940+
}
941+
834942
bool SIShrinkInstructions::run(MachineFunction &MF) {
835943

836944
this->MF = &MF;
@@ -842,6 +950,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
842950
unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
843951

844952
std::vector<unsigned> I1Defs;
953+
Register OldVCC = AMDGPU::NoRegister;
954+
Register NewVCC = AMDGPU::NoRegister;
845955

846956
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
847957
BI != BE; ++BI) {
@@ -973,6 +1083,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
9731083
continue;
9741084
}
9751085

1086+
if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
1087+
trySwitchOperands(MI, &OldVCC, &NewVCC))
1088+
MRI->setRegAllocationHint(NewVCC, 0, VCCReg);
1089+
9761090
// If there is no chance we will shrink it and use VCC as sdst to get
9771091
// a 32 bit form try to replace dead sdst with NULL.
9781092
if (TII->isVOP3(MI.getOpcode())) {

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

Lines changed: 68 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
1313
; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
1414
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
1515
; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
16-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
16+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0
1717
; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000
18-
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
18+
; GCN-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc
1919
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
2020
; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000
2121
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -34,18 +34,18 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
3434
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3535
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
3636
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
37-
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
38-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
39-
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
40-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
41-
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
42-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
43-
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
44-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
45-
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
46-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
47-
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
48-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
37+
; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0
38+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
39+
; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0
40+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
41+
; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0
42+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
43+
; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0
44+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
45+
; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0
46+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
47+
; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0
48+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, 0x41000000, v1, vcc_lo
4949
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
5050
entry:
5151
%ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
@@ -3388,9 +3388,9 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
33883388
; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
33893389
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
33903390
; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
3391-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
3391+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0
33923392
; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000
3393-
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc
3393+
; GCN-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc
33943394
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0
33953395
; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000
33963396
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -3432,32 +3432,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
34323432
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34333433
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
34343434
; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3435-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3436-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3437-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3438-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3439-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3440-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3441-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3442-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3443-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3444-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3445-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3446-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3447-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3448-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3449-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3450-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3451-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3452-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3453-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3454-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3455-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3456-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3457-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3458-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3459-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3460-
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3435+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0
3436+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
3437+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0
3438+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
3439+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0
3440+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
3441+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0
3442+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
3443+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0
3444+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
3445+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0
3446+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo
3447+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0
3448+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo
3449+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0
3450+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo
3451+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0
3452+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo
3453+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0
3454+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo
3455+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0
3456+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo
3457+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0
3458+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo
3459+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0
3460+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo
34613461
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
34623462
; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo
34633463
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -3467,32 +3467,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
34673467
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34683468
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
34693469
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3470-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
3471-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3472-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0
3473-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3474-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0
3475-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3476-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0
3477-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3478-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0
3479-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3480-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0
3481-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3482-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0
3483-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3484-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0
3485-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3486-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0
3487-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3488-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0
3489-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3490-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0
3491-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3492-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0
3493-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3494-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0
3495-
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3470+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0
3471+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
3472+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0
3473+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
3474+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0
3475+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
3476+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0
3477+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
3478+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0
3479+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
3480+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0
3481+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo
3482+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0
3483+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo
3484+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0
3485+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo
3486+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0
3487+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo
3488+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0
3489+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo
3490+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0
3491+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo
3492+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0
3493+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo
3494+
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0
3495+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo
34963496
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0
34973497
; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
34983498
; GFX11-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)