Skip to content

Commit 856bcd3

Browse files
committed
CodeGen/AMDGPU: Allow 3-address conversion of bundled instructions
This is in preparation for future changes in AMDGPU that will make more substantial use of bundles pre-RA. For now, simply test this with degenerate (single-instruction) bundles. commit-id:4a30cb78
1 parent 3eb0dc1 commit 856bcd3

File tree

3 files changed

+87
-32
lines changed

3 files changed

+87
-32
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -794,29 +794,34 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr(
794794
if (!NewMI)
795795
return false;
796796

797-
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
798-
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
799-
800-
// If the old instruction is debug value tracked, an update is required.
801-
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
802-
assert(mi->getNumExplicitDefs() == 1);
803-
assert(NewMI->getNumExplicitDefs() == 1);
804-
805-
// Find the old and new def location.
806-
unsigned OldIdx = mi->defs().begin()->getOperandNo();
807-
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
808-
809-
// Record that one def has been replaced by the other.
810-
unsigned NewInstrNum = NewMI->getDebugInstrNum();
811-
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
812-
std::make_pair(NewInstrNum, NewIdx));
813-
}
814-
815-
MBB->erase(mi); // Nuke the old inst.
816-
817797
for (MachineInstr &MI : MIS)
818798
DistanceMap.insert(std::make_pair(&MI, Dist++));
819-
Dist--;
799+
800+
if (&*mi == NewMI) {
801+
LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi);
802+
} else {
803+
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
804+
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
805+
806+
// If the old instruction is debug value tracked, an update is required.
807+
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
808+
assert(mi->getNumExplicitDefs() == 1);
809+
assert(NewMI->getNumExplicitDefs() == 1);
810+
811+
// Find the old and new def location.
812+
unsigned OldIdx = mi->defs().begin()->getOperandNo();
813+
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
814+
815+
// Record that one def has been replaced by the other.
816+
unsigned NewInstrNum = NewMI->getDebugInstrNum();
817+
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
818+
std::make_pair(NewInstrNum, NewIdx));
819+
}
820+
821+
MBB->erase(mi); // Nuke the old inst.
822+
Dist--;
823+
}
824+
820825
mi = NewMI;
821826
nmi = std::next(mi);
822827

@@ -1329,6 +1334,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13291334

13301335
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
13311336

1337+
// Give targets a chance to convert bundled instructions.
1338+
bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle);
1339+
13321340
// If the instruction is convertible to 3 Addr, instead
13331341
// of returning try 3 Addr transformation aggressively and
13341342
// use this variable to check later. Because it might be better.
@@ -1337,7 +1345,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13371345
// addl %esi, %edi
13381346
// movl %edi, %eax
13391347
// ret
1340-
if (Commuted && !MI.isConvertibleTo3Addr())
1348+
if (Commuted && !ConvertibleTo3Addr)
13411349
return false;
13421350

13431351
if (shouldOnlyCommute)
@@ -1357,7 +1365,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13571365
regBKilled = isKilled(MI, regB, true);
13581366
}
13591367

1360-
if (MI.isConvertibleTo3Addr()) {
1368+
if (ConvertibleTo3Addr) {
13611369
// This instruction is potentially convertible to a true
13621370
// three-address instruction. Check if it is profitable.
13631371
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4047,10 +4047,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40474047
LiveVariables *LV,
40484048
LiveIntervals *LIS) const {
40494049
MachineBasicBlock &MBB = *MI.getParent();
4050+
MachineInstr *CandidateMI = &MI;
4051+
4052+
if (MI.isBundle()) {
4053+
// This is a temporary placeholder for bundle handling that enables us to
4054+
// exercise the relevant code paths in the two-address instruction pass.
4055+
if (MI.getBundleSize() != 1)
4056+
return nullptr;
4057+
CandidateMI = MI.getNextNode();
4058+
}
4059+
40504060
ThreeAddressUpdates U;
4051-
MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
4061+
MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4062+
if (!NewMI)
4063+
return nullptr;
40524064

4053-
if (NewMI) {
4065+
if (MI.isBundle()) {
4066+
CandidateMI->eraseFromBundle();
4067+
4068+
for (MachineOperand &MO : MI.all_defs()) {
4069+
if (MO.isTied())
4070+
MI.untieRegOperand(MO.getOperandNo());
4071+
}
4072+
} else {
40544073
updateLiveVariables(LV, MI, *NewMI);
40554074
if (LIS) {
40564075
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
@@ -4091,7 +4110,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40914110
LV->getVarInfo(DefReg).AliveBlocks.clear();
40924111
}
40934112

4094-
if (LIS) {
4113+
if (MI.isBundle()) {
4114+
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
4115+
if (!VRI.Reads && !VRI.Writes) {
4116+
for (MachineOperand &MO : MI.all_uses()) {
4117+
if (MO.isReg() && MO.getReg() == DefReg) {
4118+
MI.removeOperand(MO.getOperandNo());
4119+
break;
4120+
}
4121+
}
4122+
4123+
if (LIS)
4124+
LIS->shrinkToUses(&LIS->getInterval(DefReg));
4125+
}
4126+
} else if (LIS) {
40954127
LiveInterval &DefLI = LIS->getInterval(DefReg);
40964128

40974129
// We cannot delete the original instruction here, so hack out the use
@@ -4106,11 +4138,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
41064138
}
41074139
}
41084140

4141+
if (MI.isBundle()) {
4142+
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
4143+
if (!VRI.Reads && !VRI.Writes) {
4144+
for (MachineOperand &MIOp : MI.uses()) {
4145+
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4146+
MIOp.setIsUndef(true);
4147+
MIOp.setReg(DummyReg);
4148+
}
4149+
}
4150+
}
4151+
4152+
auto MO = MachineOperand::CreateReg(DummyReg, false);
4153+
MO.setIsUndef(true);
4154+
MI.addOperand(MO);
4155+
}
4156+
41094157
LIS->shrinkToUses(&DefLI);
41104158
}
41114159
}
41124160

4113-
return NewMI;
4161+
return MI.isBundle() ? &MI : NewMI;
41144162
}
41154163

41164164
MachineInstr *

llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ body: |
3131
3232
...
3333

34-
# This test is an example where conversion to three-address form would be beneficial.
34+
# This test is an example where conversion to three-address form is beneficial.
3535
---
3636
name: test_fmac_reuse_bundle
3737
body: |
@@ -41,11 +41,10 @@ body: |
4141
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4242
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
4343
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
44-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
45-
; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
46-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
44+
; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec {
45+
; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
4746
; GCN-NEXT: }
48-
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
47+
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec
4948
%2:vgpr_32 = COPY $vgpr0
5049
%0:vgpr_32 = IMPLICIT_DEF
5150
%1:vgpr_32 = IMPLICIT_DEF

0 commit comments

Comments
 (0)