Skip to content

Commit e8a2bb4

Browse files
committed
[RegAlloc] Account for non-rematerializable uses when applying weight discount
This aims to fix the issue that caused https://reviews.llvm.org/D106408 to be reverted. CalcSpillWeights will reduce the weight of an interval by half if it's considered rematerializable, so it will be evicted before others. It does this by checking TII.isTriviallyReMaterializable. However rematerialization may still fail if any of the defining MI's uses aren't available at the locations it needs to be rematerialized. LiveRangeEdit::canRematerializeAt calls allUsesAvailableAt to check this. This fixes it by also checking allUsesAvailableAt in CalcSpillWeights. There may be a better place to share the function than LiveIntervals. In practice this has zero change AArch64/X86-64/RISC-V as measured on llvm-test-suite, but prevents weights from being perturbed in an upcoming patch which enables more rematerialization by re-attempting https://reviews.llvm.org/D106408
1 parent ec8819f commit e8a2bb4

17 files changed

+986
-954
lines changed

llvm/include/llvm/CodeGen/LiveIntervals.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,10 @@ class LiveIntervals {
466466
/// have any segments or value numbers.
467467
LLVM_ABI void constructMainRangeFromSubranges(LiveInterval &LI);
468468

469+
/// \returns true if all registers used by \p OrigMI at \p OrigIdx are also
470+
/// available with the same value at \p UseIdx.
471+
bool allUsesAvailableAt(const MachineInstr &MI, SlotIndex UseIdx) const;
472+
469473
private:
470474
/// Compute live intervals for all virtual registers.
471475
void computeVirtRegs();

llvm/include/llvm/CodeGen/LiveRangeEdit.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,6 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate {
193193
explicit Remat(const VNInfo *ParentVNI) : ParentVNI(ParentVNI) {}
194194
};
195195

196-
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
197-
/// OrigIdx are also available with the same value at UseIdx.
198-
bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
199-
SlotIndex UseIdx) const;
200-
201196
/// canRematerializeAt - Determine if ParentVNI can be rematerialized at
202197
/// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
203198
bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx);

llvm/lib/CodeGen/CalcSpillWeights.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,17 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
124124

125125
if (!TII.isTriviallyReMaterializable(*MI))
126126
return false;
127+
128+
// If MI has register uses, it will only be rematerializable if its uses are
129+
// also live at the indices it will be rematerialized at.
130+
const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
131+
for (MachineInstr &Use : MRI.use_instructions(Reg)) {
132+
SlotIndex UseIdx = LIS.getInstructionIndex(Use);
133+
if (LI.getVNInfoAt(UseIdx) != VNI)
134+
continue;
135+
if (!LIS.allUsesAvailableAt(*MI, UseIdx))
136+
return false;
137+
}
127138
}
128139
return true;
129140
}

llvm/lib/CodeGen/LiveIntervals.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/CodeGen/Passes.h"
3535
#include "llvm/CodeGen/SlotIndexes.h"
3636
#include "llvm/CodeGen/StackMaps.h"
37+
#include "llvm/CodeGen/TargetInstrInfo.h"
3738
#include "llvm/CodeGen/TargetRegisterInfo.h"
3839
#include "llvm/CodeGen/TargetSubtargetInfo.h"
3940
#include "llvm/CodeGen/VirtRegMap.h"
@@ -1820,3 +1821,54 @@ void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) {
18201821
LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
18211822
LICalc->constructMainRangeFromSubranges(LI);
18221823
}
1824+
1825+
bool LiveIntervals::allUsesAvailableAt(const MachineInstr &MI,
1826+
SlotIndex UseIdx) const {
1827+
SlotIndex OrigIdx = getInstructionIndex(MI).getRegSlot(true);
1828+
UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
1829+
for (const MachineOperand &MO : MI.operands()) {
1830+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1831+
continue;
1832+
1833+
// We can't remat physreg uses, unless it is a constant or target wants
1834+
// to ignore this use.
1835+
if (MO.getReg().isPhysical()) {
1836+
if (MRI->isConstantPhysReg(MO.getReg()) || TII->isIgnorableUse(MO))
1837+
continue;
1838+
return false;
1839+
}
1840+
1841+
const LiveInterval &li = getInterval(MO.getReg());
1842+
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
1843+
if (!OVNI)
1844+
continue;
1845+
1846+
// Don't allow rematerialization immediately after the original def.
1847+
// It would be incorrect if OrigMI redefines the register.
1848+
// See PR14098.
1849+
if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
1850+
return false;
1851+
1852+
if (OVNI != li.getVNInfoAt(UseIdx))
1853+
return false;
1854+
1855+
// Check that subrange is live at UseIdx.
1856+
if (li.hasSubRanges()) {
1857+
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1858+
unsigned SubReg = MO.getSubReg();
1859+
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
1860+
: MRI->getMaxLaneMaskForVReg(MO.getReg());
1861+
for (const LiveInterval::SubRange &SR : li.subranges()) {
1862+
if ((SR.LaneMask & LM).none())
1863+
continue;
1864+
if (!SR.liveAt(UseIdx))
1865+
return false;
1866+
// Early exit if all used lanes are checked. No need to continue.
1867+
LM &= ~SR.LaneMask;
1868+
if (LM.none())
1869+
break;
1870+
}
1871+
}
1872+
}
1873+
return true;
1874+
}

llvm/lib/CodeGen/LiveRangeEdit.cpp

Lines changed: 2 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -101,60 +101,6 @@ bool LiveRangeEdit::anyRematerializable() {
101101
return !Remattable.empty();
102102
}
103103

104-
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
105-
/// OrigIdx are also available with the same value at UseIdx.
106-
bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
107-
SlotIndex OrigIdx,
108-
SlotIndex UseIdx) const {
109-
OrigIdx = OrigIdx.getRegSlot(true);
110-
UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
111-
for (const MachineOperand &MO : OrigMI->operands()) {
112-
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
113-
continue;
114-
115-
// We can't remat physreg uses, unless it is a constant or target wants
116-
// to ignore this use.
117-
if (MO.getReg().isPhysical()) {
118-
if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO))
119-
continue;
120-
return false;
121-
}
122-
123-
LiveInterval &li = LIS.getInterval(MO.getReg());
124-
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
125-
if (!OVNI)
126-
continue;
127-
128-
// Don't allow rematerialization immediately after the original def.
129-
// It would be incorrect if OrigMI redefines the register.
130-
// See PR14098.
131-
if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
132-
return false;
133-
134-
if (OVNI != li.getVNInfoAt(UseIdx))
135-
return false;
136-
137-
// Check that subrange is live at UseIdx.
138-
if (li.hasSubRanges()) {
139-
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
140-
unsigned SubReg = MO.getSubReg();
141-
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
142-
: MRI.getMaxLaneMaskForVReg(MO.getReg());
143-
for (LiveInterval::SubRange &SR : li.subranges()) {
144-
if ((SR.LaneMask & LM).none())
145-
continue;
146-
if (!SR.liveAt(UseIdx))
147-
return false;
148-
// Early exit if all used lanes are checked. No need to continue.
149-
LM &= ~SR.LaneMask;
150-
if (LM.none())
151-
break;
152-
}
153-
}
154-
}
155-
return true;
156-
}
157-
158104
bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
159105
SlotIndex UseIdx) {
160106
assert(ScannedRemattable && "Call anyRematerializable first");
@@ -164,12 +110,10 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
164110
return false;
165111

166112
// No defining instruction provided.
167-
SlotIndex DefIdx;
168113
assert(RM.OrigMI && "No defining instruction for remattable value");
169-
DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
170114

171115
// Verify that all used registers are available with the same values.
172-
if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
116+
if (!LIS.allUsesAvailableAt(*RM.OrigMI, UseIdx))
173117
return false;
174118

175119
return true;
@@ -230,8 +174,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
230174

231175
// Since we're moving the DefMI load, make sure we're not extending any live
232176
// ranges.
233-
if (!allUsesAvailableAt(DefMI, LIS.getInstructionIndex(*DefMI),
234-
LIS.getInstructionIndex(*UseMI)))
177+
if (!LIS.allUsesAvailableAt(*DefMI, LIS.getInstructionIndex(*UseMI)))
235178
return false;
236179

237180
// We also need to make sure it is safe to move the load.

llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,7 +2025,8 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20252025
; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
20262026
; RV32-NEXT: vmv1r.v v7, v0
20272027
; RV32-NEXT: csrr a1, vlenb
2028-
; RV32-NEXT: slli a1, a1, 5
2028+
; RV32-NEXT: li a2, 40
2029+
; RV32-NEXT: mul a1, a1, a2
20292030
; RV32-NEXT: add a1, sp, a1
20302031
; RV32-NEXT: addi a1, a1, 16
20312032
; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -2036,48 +2037,47 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20362037
; RV32-NEXT: sub a3, a0, a1
20372038
; RV32-NEXT: addi a2, a2, 1365
20382039
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2039-
; RV32-NEXT: vmv.v.x v8, a2
2040+
; RV32-NEXT: vmv.v.x v24, a2
20402041
; RV32-NEXT: sltu a2, a0, a3
20412042
; RV32-NEXT: addi a2, a2, -1
20422043
; RV32-NEXT: and a2, a2, a3
20432044
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2044-
; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
2045+
; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
20452046
; RV32-NEXT: csrr a3, vlenb
2046-
; RV32-NEXT: li a4, 40
2047-
; RV32-NEXT: mul a3, a3, a4
2047+
; RV32-NEXT: slli a3, a3, 5
20482048
; RV32-NEXT: add a3, sp, a3
20492049
; RV32-NEXT: addi a3, a3, 16
2050-
; RV32-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
2051-
; RV32-NEXT: vand.vv v24, v24, v8, v0.t
2052-
; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
2050+
; RV32-NEXT: vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
2051+
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2052+
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
20532053
; RV32-NEXT: lui a3, 209715
20542054
; RV32-NEXT: addi a3, a3, 819
20552055
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2056-
; RV32-NEXT: vmv.v.x v8, a3
2056+
; RV32-NEXT: vmv.v.x v24, a3
20572057
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2058-
; RV32-NEXT: vand.vv v24, v16, v8, v0.t
2059-
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
2058+
; RV32-NEXT: vand.vv v16, v8, v24, v0.t
2059+
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
20602060
; RV32-NEXT: csrr a3, vlenb
20612061
; RV32-NEXT: li a4, 24
20622062
; RV32-NEXT: mul a3, a3, a4
20632063
; RV32-NEXT: add a3, sp, a3
20642064
; RV32-NEXT: addi a3, a3, 16
2065-
; RV32-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
2066-
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2067-
; RV32-NEXT: vadd.vv v16, v24, v16, v0.t
2068-
; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
2069-
; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
2065+
; RV32-NEXT: vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
2066+
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2067+
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
2068+
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2069+
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
20702070
; RV32-NEXT: lui a3, 61681
20712071
; RV32-NEXT: addi a3, a3, -241
20722072
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2073-
; RV32-NEXT: vmv.v.x v8, a3
2073+
; RV32-NEXT: vmv.v.x v16, a3
20742074
; RV32-NEXT: csrr a3, vlenb
20752075
; RV32-NEXT: slli a3, a3, 4
20762076
; RV32-NEXT: add a3, sp, a3
20772077
; RV32-NEXT: addi a3, a3, 16
2078-
; RV32-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
2078+
; RV32-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
20792079
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2080-
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2080+
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
20812081
; RV32-NEXT: lui a3, 4112
20822082
; RV32-NEXT: addi a3, a3, 257
20832083
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
@@ -2098,32 +2098,32 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20982098
; RV32-NEXT: mv a0, a1
20992099
; RV32-NEXT: .LBB46_2:
21002100
; RV32-NEXT: vmv1r.v v0, v7
2101-
; RV32-NEXT: slli a1, a1, 5
2101+
; RV32-NEXT: li a3, 40
2102+
; RV32-NEXT: mul a1, a1, a3
21022103
; RV32-NEXT: add a1, sp, a1
21032104
; RV32-NEXT: addi a1, a1, 16
21042105
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
21052106
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
21062107
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
21072108
; RV32-NEXT: csrr a0, vlenb
2108-
; RV32-NEXT: li a1, 40
2109-
; RV32-NEXT: mul a0, a0, a1
2109+
; RV32-NEXT: slli a0, a0, 5
21102110
; RV32-NEXT: add a0, sp, a0
21112111
; RV32-NEXT: addi a0, a0, 16
21122112
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
2113-
; RV32-NEXT: vand.vv v24, v16, v24, v0.t
2114-
; RV32-NEXT: vsub.vv v24, v8, v24, v0.t
2113+
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2114+
; RV32-NEXT: vsub.vv v16, v8, v16, v0.t
21152115
; RV32-NEXT: csrr a0, vlenb
21162116
; RV32-NEXT: li a1, 24
21172117
; RV32-NEXT: mul a0, a0, a1
21182118
; RV32-NEXT: add a0, sp, a0
21192119
; RV32-NEXT: addi a0, a0, 16
2120-
; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
2121-
; RV32-NEXT: vand.vv v8, v24, v16, v0.t
2122-
; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t
2123-
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
2124-
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
2125-
; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t
2126-
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
2120+
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
2121+
; RV32-NEXT: vand.vv v8, v16, v24, v0.t
2122+
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
2123+
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2124+
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
2125+
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2126+
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
21272127
; RV32-NEXT: csrr a0, vlenb
21282128
; RV32-NEXT: slli a0, a0, 4
21292129
; RV32-NEXT: add a0, sp, a0
@@ -2263,21 +2263,21 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
22632263
; RV32-NEXT: addi a4, a4, 16
22642264
; RV32-NEXT: vs8r.v v0, (a4) # vscale x 64-byte Folded Spill
22652265
; RV32-NEXT: vand.vv v24, v24, v0
2266-
; RV32-NEXT: vsub.vv v16, v16, v24
2266+
; RV32-NEXT: vsub.vv v24, v16, v24
22672267
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
22682268
; RV32-NEXT: vmv.v.x v0, a3
22692269
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2270-
; RV32-NEXT: vand.vv v24, v16, v0
2271-
; RV32-NEXT: vsrl.vi v16, v16, 2
2270+
; RV32-NEXT: vand.vv v16, v24, v0
2271+
; RV32-NEXT: vsrl.vi v24, v24, 2
22722272
; RV32-NEXT: csrr a3, vlenb
22732273
; RV32-NEXT: slli a3, a3, 4
22742274
; RV32-NEXT: add a3, sp, a3
22752275
; RV32-NEXT: addi a3, a3, 16
22762276
; RV32-NEXT: vs8r.v v0, (a3) # vscale x 64-byte Folded Spill
2277-
; RV32-NEXT: vand.vv v16, v16, v0
2277+
; RV32-NEXT: vand.vv v24, v24, v0
2278+
; RV32-NEXT: vadd.vv v24, v16, v24
2279+
; RV32-NEXT: vsrl.vi v16, v24, 4
22782280
; RV32-NEXT: vadd.vv v16, v24, v16
2279-
; RV32-NEXT: vsrl.vi v24, v16, 4
2280-
; RV32-NEXT: vadd.vv v16, v16, v24
22812281
; RV32-NEXT: lui a3, 61681
22822282
; RV32-NEXT: lui a4, 4112
22832283
; RV32-NEXT: addi a3, a3, -241
@@ -2312,16 +2312,16 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
23122312
; RV32-NEXT: addi a0, a0, 16
23132313
; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
23142314
; RV32-NEXT: vand.vv v24, v24, v0
2315-
; RV32-NEXT: vsub.vv v8, v8, v24
2315+
; RV32-NEXT: vsub.vv v24, v8, v24
23162316
; RV32-NEXT: csrr a0, vlenb
23172317
; RV32-NEXT: slli a0, a0, 4
23182318
; RV32-NEXT: add a0, sp, a0
23192319
; RV32-NEXT: addi a0, a0, 16
23202320
; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
2321-
; RV32-NEXT: vand.vv v24, v8, v0
2322-
; RV32-NEXT: vsrl.vi v8, v8, 2
2323-
; RV32-NEXT: vand.vv v8, v8, v0
2324-
; RV32-NEXT: vadd.vv v8, v24, v8
2321+
; RV32-NEXT: vand.vv v8, v24, v0
2322+
; RV32-NEXT: vsrl.vi v24, v24, 2
2323+
; RV32-NEXT: vand.vv v24, v24, v0
2324+
; RV32-NEXT: vadd.vv v8, v8, v24
23252325
; RV32-NEXT: vsrl.vi v24, v8, 4
23262326
; RV32-NEXT: vadd.vv v8, v8, v24
23272327
; RV32-NEXT: csrr a0, vlenb

0 commit comments

Comments
 (0)