Skip to content

Commit ecab37e

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
added sgpr case, refactoring
1 parent 57335e7 commit ecab37e

File tree

12 files changed

+885
-663
lines changed

12 files changed

+885
-663
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@ class SIShrinkInstructions {
5252
bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
5353
unsigned SubReg) const;
5454
Register trySwapCndOperands(MachineInstr &MI) const;
55-
bool
56-
shouldSwapCndOperands(MachineInstr &MI,
57-
SmallVector<MachineOperand *, 4> &UsesToProcess) const;
55+
bool shouldSwapCndOperands(Register Reg,
56+
std::vector<MachineInstr *> &UsesToProcess) const;
5857
unsigned getInverseCompareOpcode(MachineInstr &MI) const;
5958
TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
6059
unsigned I) const;
@@ -953,31 +952,34 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
953952
}
954953

955954
bool SIShrinkInstructions::shouldSwapCndOperands(
956-
MachineInstr &MI, SmallVector<MachineOperand *, 4> &UsesToProcess) const {
957-
auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
955+
Register Reg, std::vector<MachineInstr *> &UsesToProcess) const {
956+
auto AllUses = MRI->use_nodbg_instructions(Reg);
958957
int InstsToSwap = 0;
959958

960-
for (auto &Use : AllUses) {
961-
MachineInstr *UseInst = Use.getParent();
962-
if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
959+
for (auto &UseInst : AllUses) {
960+
if (UseInst.getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
963961
return false;
964962

965-
UsesToProcess.push_back(&Use);
963+
UsesToProcess.push_back(&UseInst);
966964

967-
MachineOperand &Src0 = UseInst->getOperand(2);
968-
MachineOperand &Src1 = UseInst->getOperand(4);
965+
MachineOperand &Src0 = UseInst.getOperand(2);
966+
MachineOperand &Src1 = UseInst.getOperand(4);
969967

970-
bool Src0Imm = Src0.isImm();
971-
bool Src1Imm = Src1.isImm();
968+
//if instruction has source modifiers it cannot be converted to VOP2
969+
if (UseInst.getOperand(1).getImm() != SISrcMods::NONE ||
970+
UseInst.getOperand(3).getImm() != SISrcMods::NONE)
971+
continue;
972+
973+
bool Src0IsVGPR = Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg());
974+
bool Src1IsVGPR = Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg());
972975

973-
if (!Src1Imm && Src0Imm)
976+
//Src1 always has to be VGPR in VOP2
977+
if (!Src0IsVGPR && Src1IsVGPR)
974978
InstsToSwap--;
975-
else if (Src1Imm && !Src0Imm &&
976-
UseInst->getOperand(1).getImm() == SISrcMods::NONE &&
977-
TRI->isVGPR(*MRI, Src0.getReg()))
979+
else if (Src0IsVGPR && !Src1IsVGPR)
978980
InstsToSwap++;
979981
}
980-
return (InstsToSwap > 0);
982+
return InstsToSwap > 0;
981983
}
982984

983985
static void swapCndOperands(MachineInstr &MI) {
@@ -1012,9 +1014,9 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
10121014
Register Reg = MI.getOperand(0).getReg();
10131015

10141016
unsigned Opcode = getInverseCompareOpcode(MI);
1015-
SmallVector<MachineOperand *, 4> UsesToProcess;
1017+
std::vector<MachineInstr *> UsesToProcess;
10161018
if (!Opcode ||
1017-
!SIShrinkInstructions::shouldSwapCndOperands(MI, UsesToProcess))
1019+
!SIShrinkInstructions::shouldSwapCndOperands(Reg, UsesToProcess))
10181020
return Reg;
10191021

10201022
auto DL = MI.getDebugLoc();
@@ -1026,15 +1028,14 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
10261028

10271029
unsigned OpNum = MI.getNumExplicitOperands();
10281030
for (unsigned Idx = 1; Idx < OpNum; Idx++) {
1029-
MachineOperand Op = MI.getOperand(Idx);
1031+
MachineOperand &Op = MI.getOperand(Idx);
10301032
InverseCompare.add(Op);
10311033
if (Op.isReg() && Op.isKill())
10321034
InverseCompare->getOperand(Idx).setIsKill(false);
10331035
}
10341036

1035-
for (auto &Use : UsesToProcess) {
1036-
MachineInstr *Inst = Use->getParent();
1037-
swapCndOperands(*Inst);
1037+
for (auto Use : UsesToProcess) {
1038+
swapCndOperands(*Use);
10381039
}
10391040

10401041
MRI->replaceRegWith(Reg, NewVCC);

llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %sr
145145
; GFX10PLUS-LABEL: select_vcc_s_s:
146146
; GFX10PLUS: ; %bb.0:
147147
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, s3
148-
; GFX10PLUS-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
149-
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo
148+
; GFX10PLUS-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1
149+
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s2, v2, vcc_lo
150150
; GFX10PLUS-NEXT: ; return to shader part epilog
151151
%cmp = fcmp oeq float %cmp0, %cmp1
152152
%result = select i1 %cmp, float %src0, float %src1

0 commit comments

Comments
 (0)