Skip to content

Commit 42e0e86

Browse files
committed
support -O0 flow intrue16
1 parent 6e5ee4a commit 42e0e86

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+46145
-85024
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,7 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
10881088
assert(MF.getSubtarget<GCNSubtarget>().useRealTrue16Insts() &&
10891089
"We do not expect to see 16-bit copies from VGPR to SGPR unless "
10901090
"we have 16-bit VGPRs");
1091-
assert(MRI->getRegClass(DstReg) == &AMDGPU::SGPR_LO16RegClass ||
1091+
assert(MRI->getRegClass(DstReg) == &AMDGPU::SGPR_32RegClass ||
10921092
MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
10931093
MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
10941094
// There is no V_READFIRSTLANE_B16, so legalize the dst/src reg to 32 bits

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7245,7 +7245,8 @@ void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, unsigned OpIdx,
72457245
MachineBasicBlock *MBB = MI.getParent();
72467246
// Legalize operands and check for size mismatch
72477247
if (!OpIdx || OpIdx >= MI.getNumExplicitOperands() ||
7248-
OpIdx >= get(Opcode).getNumOperands())
7248+
OpIdx >= get(Opcode).getNumOperands() ||
7249+
get(Opcode).operands()[OpIdx].RegClass == -1)
72497250
return;
72507251

72517252
MachineOperand &Op = MI.getOperand(OpIdx);
@@ -7803,15 +7804,22 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78037804
// that copies will end up as machine instructions and not be
78047805
// eliminated.
78057806
addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
7806-
MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
7807-
MRI.clearKillFlags(Inst.getOperand(1).getReg());
7807+
Register NewDstReg = Inst.getOperand(1).getReg();
7808+
MRI.replaceRegWith(DstReg, NewDstReg);
7809+
MRI.clearKillFlags(NewDstReg);
78087810
Inst.getOperand(0).setReg(DstReg);
78097811
// Make sure we don't leave around a dead VGPR->SGPR copy. Normally
78107812
// these are deleted later, but at -O0 it would leave a suspicious
78117813
// looking illegal copy of an undef register.
78127814
for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
78137815
Inst.removeOperand(I);
78147816
Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
7817+
// Legalize t16 operand since replaceReg is called after addUsersToVALU
7818+
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7819+
E = MRI.use_end();
7820+
I != E; ++I) {
7821+
legalizeOperandsVALUt16(*I->getParent(), MRI);
7822+
}
78157823
return;
78167824
}
78177825

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3553,9 +3553,7 @@ SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
35533553

35543554
const TargetRegisterClass *
35553555
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
3556-
if (BitWidth == 16)
3557-
return &AMDGPU::SGPR_LO16RegClass;
3558-
if (BitWidth == 32)
3556+
if (BitWidth == 16 || BitWidth == 32)
35593557
return &AMDGPU::SReg_32RegClass;
35603558
if (BitWidth == 64)
35613559
return &AMDGPU::SReg_64RegClass;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,17 +1470,34 @@ def : GCNPat<
14701470
>;
14711471

14721472
def : GCNPat<
1473-
(i64 (anyext i16:$src)),
1473+
(i64 (UniformUnaryFrag<anyext> i16:$src)),
1474+
(REG_SEQUENCE VReg_64,
1475+
(i32 (COPY $src)), sub0,
1476+
(V_MOV_B32_e32 (i32 0)), sub1)
1477+
>;
1478+
1479+
def : GCNPat<
1480+
(i64 (DivergentUnaryFrag<anyext> i16:$src)),
14741481
(REG_SEQUENCE VReg_64, $src, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
14751482
>;
14761483

14771484
def : GCNPat<
1478-
(i16 (trunc i32:$src)),
1485+
(i16 (UniformUnaryFrag<trunc> i32:$src)),
1486+
(COPY $src)
1487+
>;
1488+
1489+
def : GCNPat<
1490+
(i16 (DivergentUnaryFrag<trunc> i32:$src)),
14791491
(EXTRACT_SUBREG $src, lo16)
14801492
>;
14811493

14821494
def : GCNPat <
1483-
(i16 (trunc i64:$src)),
1495+
(i16 (UniformUnaryFrag<trunc> i64:$src)),
1496+
(EXTRACT_SUBREG $src, sub0)
1497+
>;
1498+
1499+
def : GCNPat <
1500+
(i16 (DivergentUnaryFrag<trunc> i64:$src)),
14841501
(EXTRACT_SUBREG $src, lo16)
14851502
>;
14861503

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 16460 additions & 28699 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)