Skip to content

Commit 6636659

Browse files
authored
CodeGen/AMDGPU: Allow 3-address conversion of bundled instructions (llvm#166213)
This is in preparation for future changes in AMDGPU that will make more substantial use of bundles pre-RA. For now, simply test this with degenerate (single-instruction) bundles.
1 parent 66da12a commit 6636659

File tree

3 files changed

+90
-32
lines changed

3 files changed

+90
-32
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -794,29 +794,36 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr(
794794
if (!NewMI)
795795
return false;
796796

797-
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
798-
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
799-
800-
// If the old instruction is debug value tracked, an update is required.
801-
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
802-
assert(mi->getNumExplicitDefs() == 1);
803-
assert(NewMI->getNumExplicitDefs() == 1);
804-
805-
// Find the old and new def location.
806-
unsigned OldIdx = mi->defs().begin()->getOperandNo();
807-
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
808-
809-
// Record that one def has been replaced by the other.
810-
unsigned NewInstrNum = NewMI->getDebugInstrNum();
811-
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
812-
std::make_pair(NewInstrNum, NewIdx));
813-
}
814-
815-
MBB->erase(mi); // Nuke the old inst.
816-
817797
for (MachineInstr &MI : MIS)
818798
DistanceMap.insert(std::make_pair(&MI, Dist++));
819-
Dist--;
799+
800+
if (&*mi == NewMI) {
801+
LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi);
802+
} else {
803+
LLVM_DEBUG({
804+
dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi;
805+
dbgs() << "2addr: TO 3-ADDR: " << *NewMI;
806+
});
807+
808+
// If the old instruction is debug value tracked, an update is required.
809+
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
810+
assert(mi->getNumExplicitDefs() == 1);
811+
assert(NewMI->getNumExplicitDefs() == 1);
812+
813+
// Find the old and new def location.
814+
unsigned OldIdx = mi->defs().begin()->getOperandNo();
815+
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
816+
817+
// Record that one def has been replaced by the other.
818+
unsigned NewInstrNum = NewMI->getDebugInstrNum();
819+
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
820+
std::make_pair(NewInstrNum, NewIdx));
821+
}
822+
823+
MBB->erase(mi); // Nuke the old inst.
824+
Dist--;
825+
}
826+
820827
mi = NewMI;
821828
nmi = std::next(mi);
822829

@@ -1329,6 +1336,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13291336

13301337
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
13311338

1339+
// Give targets a chance to convert bundled instructions.
1340+
bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle);
1341+
13321342
// If the instruction is convertible to 3 Addr, instead
13331343
// of returning try 3 Addr transformation aggressively and
13341344
// use this variable to check later. Because it might be better.
@@ -1337,7 +1347,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13371347
// addl %esi, %edi
13381348
// movl %edi, %eax
13391349
// ret
1340-
if (Commuted && !MI.isConvertibleTo3Addr())
1350+
if (Commuted && !ConvertibleTo3Addr)
13411351
return false;
13421352

13431353
if (shouldOnlyCommute)
@@ -1357,7 +1367,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13571367
regBKilled = isKilled(MI, regB, true);
13581368
}
13591369

1360-
if (MI.isConvertibleTo3Addr()) {
1370+
if (ConvertibleTo3Addr) {
13611371
// This instruction is potentially convertible to a true
13621372
// three-address instruction. Check if it is profitable.
13631373
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4046,10 +4046,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40464046
LiveVariables *LV,
40474047
LiveIntervals *LIS) const {
40484048
MachineBasicBlock &MBB = *MI.getParent();
4049+
MachineInstr *CandidateMI = &MI;
4050+
4051+
if (MI.isBundle()) {
4052+
// This is a temporary placeholder for bundle handling that enables us to
4053+
// exercise the relevant code paths in the two-address instruction pass.
4054+
if (MI.getBundleSize() != 1)
4055+
return nullptr;
4056+
CandidateMI = MI.getNextNode();
4057+
}
4058+
40494059
ThreeAddressUpdates U;
4050-
MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
4060+
MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4061+
if (!NewMI)
4062+
return nullptr;
40514063

4052-
if (NewMI) {
4064+
if (MI.isBundle()) {
4065+
CandidateMI->eraseFromBundle();
4066+
4067+
for (MachineOperand &MO : MI.all_defs()) {
4068+
if (MO.isTied())
4069+
MI.untieRegOperand(MO.getOperandNo());
4070+
}
4071+
} else {
40534072
updateLiveVariables(LV, MI, *NewMI);
40544073
if (LIS) {
40554074
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
@@ -4090,7 +4109,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40904109
LV->getVarInfo(DefReg).AliveBlocks.clear();
40914110
}
40924111

4093-
if (LIS) {
4112+
if (MI.isBundle()) {
4113+
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
4114+
if (!VRI.Reads && !VRI.Writes) {
4115+
for (MachineOperand &MO : MI.all_uses()) {
4116+
if (MO.isReg() && MO.getReg() == DefReg) {
4117+
assert(MO.getSubReg() == 0 &&
4118+
"tied sub-registers in bundles currently not supported");
4119+
MI.removeOperand(MO.getOperandNo());
4120+
break;
4121+
}
4122+
}
4123+
4124+
if (LIS)
4125+
LIS->shrinkToUses(&LIS->getInterval(DefReg));
4126+
}
4127+
} else if (LIS) {
40944128
LiveInterval &DefLI = LIS->getInterval(DefReg);
40954129

40964130
// We cannot delete the original instruction here, so hack out the use
@@ -4105,11 +4139,26 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
41054139
}
41064140
}
41074141

4142+
if (MI.isBundle()) {
4143+
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
4144+
if (!VRI.Reads && !VRI.Writes) {
4145+
for (MachineOperand &MIOp : MI.uses()) {
4146+
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4147+
MIOp.setIsUndef(true);
4148+
MIOp.setReg(DummyReg);
4149+
}
4150+
}
4151+
}
4152+
4153+
MI.addOperand(MachineOperand::CreateReg(DummyReg, false, false, false,
4154+
false, /*isUndef=*/true));
4155+
}
4156+
41084157
LIS->shrinkToUses(&DefLI);
41094158
}
41104159
}
41114160

4112-
return NewMI;
4161+
return MI.isBundle() ? &MI : NewMI;
41134162
}
41144163

41154164
MachineInstr *

llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ body: |
3131
3232
...
3333

34-
# This test is an example where conversion to three-address form would be beneficial.
34+
# This test is an example where conversion to three-address form is beneficial.
3535
---
3636
name: test_fmac_reuse_bundle
3737
body: |
@@ -41,11 +41,10 @@ body: |
4141
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4242
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
4343
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
44-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
45-
; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
46-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
44+
; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec {
45+
; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
4746
; GCN-NEXT: }
48-
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
47+
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec
4948
%2:vgpr_32 = COPY $vgpr0
5049
%0:vgpr_32 = IMPLICIT_DEF
5150
%1:vgpr_32 = IMPLICIT_DEF

0 commit comments

Comments
 (0)