Skip to content

Commit c2fbea3

Browse files
committed
AMDGPU: Use getMergedLocation in SILoadStoreOptimizer
This is merging loads and stores so use the combined DebugLoc. Not sure if computeBase should be using the merged location from all the involved instructions. I'm also not sure how to test this sort of thing.
1 parent 5736595 commit c2fbea3

File tree

1 file changed

+41
-25
lines changed

1 file changed

+41
-25
lines changed

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,11 @@ class SILoadStoreOptimizer {
232232

233233
void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
234234
MachineBasicBlock::iterator InsertBefore,
235-
AMDGPU::OpName OpName, Register DestReg) const;
235+
const DebugLoc &DL, AMDGPU::OpName OpName,
236+
Register DestReg) const;
236237
Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
237238
MachineBasicBlock::iterator InsertBefore,
238-
AMDGPU::OpName OpName) const;
239+
const DebugLoc &DL, AMDGPU::OpName OpName) const;
239240

240241
unsigned read2Opcode(unsigned EltSize) const;
241242
unsigned read2ST64Opcode(unsigned EltSize) const;
@@ -1320,10 +1321,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
13201321
// Paired.
13211322
void SILoadStoreOptimizer::copyToDestRegs(
13221323
CombineInfo &CI, CombineInfo &Paired,
1323-
MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName,
1324-
Register DestReg) const {
1324+
MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL,
1325+
AMDGPU::OpName OpName, Register DestReg) const {
13251326
MachineBasicBlock *MBB = CI.I->getParent();
1326-
DebugLoc DL = CI.I->getDebugLoc();
13271327

13281328
auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
13291329

@@ -1351,9 +1351,9 @@ void SILoadStoreOptimizer::copyToDestRegs(
13511351
Register
13521352
SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
13531353
MachineBasicBlock::iterator InsertBefore,
1354+
const DebugLoc &DL,
13541355
AMDGPU::OpName OpName) const {
13551356
MachineBasicBlock *MBB = CI.I->getParent();
1356-
DebugLoc DL = CI.I->getDebugLoc();
13571357

13581358
auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
13591359

@@ -1409,7 +1409,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
14091409
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
14101410
Register DestReg = MRI->createVirtualRegister(SuperRC);
14111411

1412-
DebugLoc DL = CI.I->getDebugLoc();
1412+
DebugLoc DL =
1413+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
14131414

14141415
Register BaseReg = AddrReg->getReg();
14151416
unsigned BaseSubReg = AddrReg->getSubReg();
@@ -1437,7 +1438,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
14371438
.addImm(0) // gds
14381439
.cloneMergedMemRefs({&*CI.I, &*Paired.I});
14391440

1440-
copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1441+
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);
14411442

14421443
CI.I->eraseFromParent();
14431444
Paired.I->eraseFromParent();
@@ -1491,7 +1492,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
14911492
(NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
14921493

14931494
const MCInstrDesc &Write2Desc = TII->get(Opc);
1494-
DebugLoc DL = CI.I->getDebugLoc();
1495+
DebugLoc DL =
1496+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
14951497

14961498
Register BaseReg = AddrReg->getReg();
14971499
unsigned BaseSubReg = AddrReg->getSubReg();
@@ -1532,7 +1534,9 @@ MachineBasicBlock::iterator
15321534
SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
15331535
MachineBasicBlock::iterator InsertBefore) {
15341536
MachineBasicBlock *MBB = CI.I->getParent();
1535-
DebugLoc DL = CI.I->getDebugLoc();
1537+
DebugLoc DL =
1538+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
1539+
15361540
const unsigned Opcode = getNewOpcode(CI, Paired);
15371541

15381542
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
@@ -1557,7 +1561,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
15571561

15581562
MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
15591563

1560-
copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1564+
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
15611565

15621566
CI.I->eraseFromParent();
15631567
Paired.I->eraseFromParent();
@@ -1568,7 +1572,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
15681572
CombineInfo &CI, CombineInfo &Paired,
15691573
MachineBasicBlock::iterator InsertBefore) {
15701574
MachineBasicBlock *MBB = CI.I->getParent();
1571-
DebugLoc DL = CI.I->getDebugLoc();
1575+
DebugLoc DL =
1576+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
1577+
15721578
const unsigned Opcode = getNewOpcode(CI, Paired);
15731579

15741580
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
@@ -1589,7 +1595,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
15891595
New.addImm(MergedOffset);
15901596
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
15911597

1592-
copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
1598+
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg);
15931599

15941600
CI.I->eraseFromParent();
15951601
Paired.I->eraseFromParent();
@@ -1600,7 +1606,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
16001606
CombineInfo &CI, CombineInfo &Paired,
16011607
MachineBasicBlock::iterator InsertBefore) {
16021608
MachineBasicBlock *MBB = CI.I->getParent();
1603-
DebugLoc DL = CI.I->getDebugLoc();
1609+
1610+
DebugLoc DL =
1611+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
16041612

16051613
const unsigned Opcode = getNewOpcode(CI, Paired);
16061614

@@ -1630,7 +1638,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
16301638
.addImm(0) // swz
16311639
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
16321640

1633-
copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1641+
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
16341642

16351643
CI.I->eraseFromParent();
16361644
Paired.I->eraseFromParent();
@@ -1641,7 +1649,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
16411649
CombineInfo &CI, CombineInfo &Paired,
16421650
MachineBasicBlock::iterator InsertBefore) {
16431651
MachineBasicBlock *MBB = CI.I->getParent();
1644-
DebugLoc DL = CI.I->getDebugLoc();
1652+
1653+
DebugLoc DL =
1654+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
16451655

16461656
const unsigned Opcode = getNewOpcode(CI, Paired);
16471657

@@ -1681,7 +1691,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
16811691
.addImm(0) // swz
16821692
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
16831693

1684-
copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1694+
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
16851695

16861696
CI.I->eraseFromParent();
16871697
Paired.I->eraseFromParent();
@@ -1692,12 +1702,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
16921702
CombineInfo &CI, CombineInfo &Paired,
16931703
MachineBasicBlock::iterator InsertBefore) {
16941704
MachineBasicBlock *MBB = CI.I->getParent();
1695-
DebugLoc DL = CI.I->getDebugLoc();
1705+
DebugLoc DL =
1706+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
16961707

16971708
const unsigned Opcode = getNewOpcode(CI, Paired);
16981709

16991710
Register SrcReg =
1700-
copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1711+
copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
17011712

17021713
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
17031714
.addReg(SrcReg, RegState::Kill);
@@ -1739,7 +1750,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
17391750
CombineInfo &CI, CombineInfo &Paired,
17401751
MachineBasicBlock::iterator InsertBefore) {
17411752
MachineBasicBlock *MBB = CI.I->getParent();
1742-
DebugLoc DL = CI.I->getDebugLoc();
1753+
1754+
DebugLoc DL =
1755+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
17431756

17441757
const unsigned Opcode = getNewOpcode(CI, Paired);
17451758

@@ -1757,7 +1770,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
17571770
.addImm(CI.CPol)
17581771
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
17591772

1760-
copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1773+
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);
17611774

17621775
CI.I->eraseFromParent();
17631776
Paired.I->eraseFromParent();
@@ -1768,12 +1781,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
17681781
CombineInfo &CI, CombineInfo &Paired,
17691782
MachineBasicBlock::iterator InsertBefore) {
17701783
MachineBasicBlock *MBB = CI.I->getParent();
1771-
DebugLoc DL = CI.I->getDebugLoc();
1784+
1785+
DebugLoc DL =
1786+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
17721787

17731788
const unsigned Opcode = getNewOpcode(CI, Paired);
17741789

17751790
Register SrcReg =
1776-
copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1791+
copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
17771792

17781793
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
17791794
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
@@ -2042,12 +2057,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
20422057
CombineInfo &CI, CombineInfo &Paired,
20432058
MachineBasicBlock::iterator InsertBefore) {
20442059
MachineBasicBlock *MBB = CI.I->getParent();
2045-
DebugLoc DL = CI.I->getDebugLoc();
2060+
DebugLoc DL =
2061+
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
20462062

20472063
const unsigned Opcode = getNewOpcode(CI, Paired);
20482064

20492065
Register SrcReg =
2050-
copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
2066+
copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
20512067

20522068
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
20532069
.addReg(SrcReg, RegState::Kill);

0 commit comments

Comments
 (0)