Skip to content

Commit 56662cd

Browse files
committed
support -O0 flow intrue16
1 parent db03c27 commit 56662cd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+46129
-85037
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,7 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
10881088
assert(MF.getSubtarget<GCNSubtarget>().useRealTrue16Insts() &&
10891089
"We do not expect to see 16-bit copies from VGPR to SGPR unless "
10901090
"we have 16-bit VGPRs");
1091-
assert(MRI->getRegClass(DstReg) == &AMDGPU::SGPR_LO16RegClass ||
1091+
assert(MRI->getRegClass(DstReg) == &AMDGPU::SGPR_32RegClass ||
10921092
MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
10931093
MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
10941094
// There is no V_READFIRSTLANE_B16, so legalize the dst/src reg to 32 bits

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7261,7 +7261,8 @@ void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, unsigned OpIdx,
72617261
MachineBasicBlock *MBB = MI.getParent();
72627262
// Legalize operands and check for size mismatch
72637263
if (!OpIdx || OpIdx >= MI.getNumExplicitOperands() ||
7264-
OpIdx >= get(Opcode).getNumOperands())
7264+
OpIdx >= get(Opcode).getNumOperands() ||
7265+
get(Opcode).operands()[OpIdx].RegClass == -1)
72657266
return;
72667267

72677268
MachineOperand &Op = MI.getOperand(OpIdx);
@@ -7820,15 +7821,22 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78207821
// that copies will end up as machine instructions and not be
78217822
// eliminated.
78227823
addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
7823-
MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
7824-
MRI.clearKillFlags(Inst.getOperand(1).getReg());
7824+
Register NewDstReg = Inst.getOperand(1).getReg();
7825+
MRI.replaceRegWith(DstReg, NewDstReg);
7826+
MRI.clearKillFlags(NewDstReg);
78257827
Inst.getOperand(0).setReg(DstReg);
78267828
// Make sure we don't leave around a dead VGPR->SGPR copy. Normally
78277829
// these are deleted later, but at -O0 it would leave a suspicious
78287830
// looking illegal copy of an undef register.
78297831
for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
78307832
Inst.removeOperand(I);
78317833
Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
7834+
// Legalize t16 operand since replaceReg is called after addUsersToVALU
7835+
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7836+
E = MRI.use_end();
7837+
I != E; ++I) {
7838+
legalizeOperandsVALUt16(*I->getParent(), MRI);
7839+
}
78327840
return;
78337841
}
78347842

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3557,9 +3557,7 @@ SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
35573557

35583558
const TargetRegisterClass *
35593559
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
3560-
if (BitWidth == 16)
3561-
return &AMDGPU::SGPR_LO16RegClass;
3562-
if (BitWidth == 32)
3560+
if (BitWidth == 16 || BitWidth == 32)
35633561
return &AMDGPU::SReg_32RegClass;
35643562
if (BitWidth == 64)
35653563
return &AMDGPU::SReg_64RegClass;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,17 +1534,34 @@ def : GCNPat<
15341534
>;
15351535

15361536
def : GCNPat<
1537-
(i64 (anyext i16:$src)),
1537+
(i64 (UniformUnaryFrag<anyext> i16:$src)),
1538+
(REG_SEQUENCE VReg_64,
1539+
(i32 (COPY $src)), sub0,
1540+
(V_MOV_B32_e32 (i32 0)), sub1)
1541+
>;
1542+
1543+
def : GCNPat<
1544+
(i64 (DivergentUnaryFrag<anyext> i16:$src)),
15381545
(REG_SEQUENCE VReg_64, $src, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
15391546
>;
15401547

15411548
def : GCNPat<
1542-
(i16 (trunc i32:$src)),
1549+
(i16 (UniformUnaryFrag<trunc> i32:$src)),
1550+
(COPY $src)
1551+
>;
1552+
1553+
def : GCNPat<
1554+
(i16 (DivergentUnaryFrag<trunc> i32:$src)),
15431555
(EXTRACT_SUBREG $src, lo16)
15441556
>;
15451557

15461558
def : GCNPat <
1547-
(i16 (trunc i64:$src)),
1559+
(i16 (UniformUnaryFrag<trunc> i64:$src)),
1560+
(EXTRACT_SUBREG $src, sub0)
1561+
>;
1562+
1563+
def : GCNPat <
1564+
(i16 (DivergentUnaryFrag<trunc> i64:$src)),
15481565
(EXTRACT_SUBREG $src, lo16)
15491566
>;
15501567

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 16460 additions & 28699 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)