Skip to content

Commit 582defb

Browse files
committed
CodeGen/AMDGPU: Allow 3-address conversion of bundled instructions
This is in preparation for future changes in AMDGPU that will make more substantial use of bundles pre-RA. For now, simply test this with degenerate (single-instruction) bundles. commit-id:4a30cb78
1 parent 4af0353 commit 582defb

File tree

3 files changed

+87
-32
lines changed

3 files changed

+87
-32
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -794,29 +794,34 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr(
794794
if (!NewMI)
795795
return false;
796796

797-
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
798-
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
799-
800-
// If the old instruction is debug value tracked, an update is required.
801-
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
802-
assert(mi->getNumExplicitDefs() == 1);
803-
assert(NewMI->getNumExplicitDefs() == 1);
804-
805-
// Find the old and new def location.
806-
unsigned OldIdx = mi->defs().begin()->getOperandNo();
807-
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
808-
809-
// Record that one def has been replaced by the other.
810-
unsigned NewInstrNum = NewMI->getDebugInstrNum();
811-
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
812-
std::make_pair(NewInstrNum, NewIdx));
813-
}
814-
815-
MBB->erase(mi); // Nuke the old inst.
816-
817797
for (MachineInstr &MI : MIS)
818798
DistanceMap.insert(std::make_pair(&MI, Dist++));
819-
Dist--;
799+
800+
if (&*mi == NewMI) {
801+
LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi);
802+
} else {
803+
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
804+
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
805+
806+
// If the old instruction is debug value tracked, an update is required.
807+
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
808+
assert(mi->getNumExplicitDefs() == 1);
809+
assert(NewMI->getNumExplicitDefs() == 1);
810+
811+
// Find the old and new def location.
812+
unsigned OldIdx = mi->defs().begin()->getOperandNo();
813+
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
814+
815+
// Record that one def has been replaced by the other.
816+
unsigned NewInstrNum = NewMI->getDebugInstrNum();
817+
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
818+
std::make_pair(NewInstrNum, NewIdx));
819+
}
820+
821+
MBB->erase(mi); // Nuke the old inst.
822+
Dist--;
823+
}
824+
820825
mi = NewMI;
821826
nmi = std::next(mi);
822827

@@ -1329,6 +1334,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13291334

13301335
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
13311336

1337+
// Give targets a chance to convert bundled instructions.
1338+
bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle);
1339+
13321340
// If the instruction is convertible to 3 Addr, instead
13331341
// of returning try 3 Addr transformation aggressively and
13341342
// use this variable to check later. Because it might be better.
@@ -1337,7 +1345,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13371345
// addl %esi, %edi
13381346
// movl %edi, %eax
13391347
// ret
1340-
if (Commuted && !MI.isConvertibleTo3Addr())
1348+
if (Commuted && !ConvertibleTo3Addr)
13411349
return false;
13421350

13431351
if (shouldOnlyCommute)
@@ -1357,7 +1365,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
13571365
regBKilled = isKilled(MI, regB, true);
13581366
}
13591367

1360-
if (MI.isConvertibleTo3Addr()) {
1368+
if (ConvertibleTo3Addr) {
13611369
// This instruction is potentially convertible to a true
13621370
// three-address instruction. Check if it is profitable.
13631371
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4044,10 +4044,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40444044
LiveVariables *LV,
40454045
LiveIntervals *LIS) const {
40464046
MachineBasicBlock &MBB = *MI.getParent();
4047+
MachineInstr *CandidateMI = &MI;
4048+
4049+
if (MI.isBundle()) {
4050+
// This is a temporary placeholder for bundle handling that enables us to
4051+
// exercise the relevant code paths in the two-address instruction pass.
4052+
if (MI.getBundleSize() != 1)
4053+
return nullptr;
4054+
CandidateMI = MI.getNextNode();
4055+
}
4056+
40474057
ThreeAddressUpdates U;
4048-
MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
4058+
MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4059+
if (!NewMI)
4060+
return nullptr;
40494061

4050-
if (NewMI) {
4062+
if (MI.isBundle()) {
4063+
CandidateMI->eraseFromBundle();
4064+
4065+
for (MachineOperand &MO : MI.all_defs()) {
4066+
if (MO.isTied())
4067+
MI.untieRegOperand(MO.getOperandNo());
4068+
}
4069+
} else {
40514070
updateLiveVariables(LV, MI, *NewMI);
40524071
if (LIS) {
40534072
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
@@ -4088,7 +4107,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40884107
LV->getVarInfo(DefReg).AliveBlocks.clear();
40894108
}
40904109

4091-
if (LIS) {
4110+
if (MI.isBundle()) {
4111+
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
4112+
if (!VRI.Reads && !VRI.Writes) {
4113+
for (MachineOperand &MO : MI.all_uses()) {
4114+
if (MO.isReg() && MO.getReg() == DefReg) {
4115+
MI.removeOperand(MO.getOperandNo());
4116+
break;
4117+
}
4118+
}
4119+
4120+
if (LIS)
4121+
LIS->shrinkToUses(&LIS->getInterval(DefReg));
4122+
}
4123+
} else if (LIS) {
40924124
LiveInterval &DefLI = LIS->getInterval(DefReg);
40934125

40944126
// We cannot delete the original instruction here, so hack out the use
@@ -4103,11 +4135,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
41034135
}
41044136
}
41054137

4138+
if (MI.isBundle()) {
4139+
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
4140+
if (!VRI.Reads && !VRI.Writes) {
4141+
for (MachineOperand &MIOp : MI.uses()) {
4142+
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4143+
MIOp.setIsUndef(true);
4144+
MIOp.setReg(DummyReg);
4145+
}
4146+
}
4147+
}
4148+
4149+
auto MO = MachineOperand::CreateReg(DummyReg, false);
4150+
MO.setIsUndef(true);
4151+
MI.addOperand(MO);
4152+
}
4153+
41064154
LIS->shrinkToUses(&DefLI);
41074155
}
41084156
}
41094157

4110-
return NewMI;
4158+
return MI.isBundle() ? &MI : NewMI;
41114159
}
41124160

41134161
MachineInstr *

llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ body: |
3131
3232
...
3333

34-
# This test is an example where conversion to three-address form would be beneficial.
34+
# This test is an example where conversion to three-address form is beneficial.
3535
---
3636
name: test_fmac_reuse_bundle
3737
body: |
@@ -41,11 +41,10 @@ body: |
4141
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4242
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
4343
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
44-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
45-
; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
46-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
44+
; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec {
45+
; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
4746
; GCN-NEXT: }
48-
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
47+
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec
4948
%2:vgpr_32 = COPY $vgpr0
5049
%0:vgpr_32 = IMPLICIT_DEF
5150
%1:vgpr_32 = IMPLICIT_DEF

0 commit comments

Comments
 (0)