diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index 41b7f10cfc38a..acb8b762efc64 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -18,6 +18,7 @@ class LiveIntervals; class MachineBlockFrequencyInfo; class MachineFunction; class MachineLoopInfo; +class ProfileSummaryInfo; class VirtRegMap; /// Normalize the spill weight of a live interval @@ -47,6 +48,7 @@ class VirtRegMap; LiveIntervals &LIS; const VirtRegMap &VRM; const MachineLoopInfo &Loops; + ProfileSummaryInfo *PSI; const MachineBlockFrequencyInfo &MBFI; /// Returns true if Reg of live interval LI is used in instruction with many @@ -56,8 +58,9 @@ class VirtRegMap; public: VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, const VirtRegMap &VRM, const MachineLoopInfo &Loops, - const MachineBlockFrequencyInfo &MBFI) - : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {} + const MachineBlockFrequencyInfo &MBFI, + ProfileSummaryInfo *PSI = nullptr) + : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), PSI(PSI), MBFI(MBFI) {} virtual ~VirtRegAuxInfo() = default; diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index 4c45a9676d6bd..161bb247a0e96 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -47,6 +47,7 @@ class MachineDominatorTree; class MachineFunction; class MachineInstr; class MachineRegisterInfo; +class ProfileSummaryInfo; class raw_ostream; class TargetInstrInfo; class VirtRegMap; @@ -113,14 +114,18 @@ class LiveIntervals { ~LiveIntervals(); /// Calculate the spill weight to assign to a single instruction. + /// If \p PSI is provided the calculation is altered for optsize functions. static float getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, - const MachineInstr &MI); + const MachineInstr &MI, + ProfileSummaryInfo *PSI = nullptr); /// Calculate the spill weight to assign to a single instruction. + /// If \p PSI is provided the calculation is altered for optsize functions. static float getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, - const MachineBasicBlock *MBB); + const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI = nullptr); LiveInterval &getInterval(Register Reg) { if (hasInterval(Reg)) diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 88ed2291313c9..f361c956092e8 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -199,8 +199,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // localLI = COPY other // ... // other = COPY localLI - TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB); - TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB); + TotalWeight += + LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB, PSI); + TotalWeight += + LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB, PSI); NumInstr += 2; } @@ -272,7 +274,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // Calculate instr weight. bool Reads, Writes; std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg()); - Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI); + Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI, PSI); // Give extra weight to what looks like a loop induction variable update. if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB)) diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 7ddaaaa915ef1..21a316cf99a21 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" @@ -37,6 +38,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/IR/Statepoint.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" @@ -875,14 +877,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, - const MachineInstr &MI) { - return getSpillWeight(isDef, isUse, MBFI, MI.getParent()); + const MachineInstr &MI, + ProfileSummaryInfo *PSI) { + return getSpillWeight(isDef, isUse, MBFI, MI.getParent(), PSI); } float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, - const MachineBasicBlock *MBB) { - return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB); + const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI) { + float Weight = isDef + isUse; + const auto *MF = MBB->getParent(); + // When optimizing for size we only consider the codesize impact of spilling + // the register, not the runtime impact. + if (PSI && (MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(MF, PSI, MBFI))) + return Weight; + return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB); } LiveRange::Segment diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index caf9c32a5a349..046784c386e30 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -14,6 +14,7 @@ #include "AllocationOrder.h" #include "RegAllocBase.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -140,6 +141,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, false) @@ -182,6 +184,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequiredID(MachineDominatorsID); @@ -312,7 +315,8 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { getAnalysis()); VirtRegAuxInfo VRAI( *MF, *LIS, *VRM, getAnalysis().getLI(), - getAnalysis().getMBFI()); + getAnalysis().getMBFI(), + &getAnalysis().getPSI()); VRAI.calculateSpillWeightsAndHints(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI)); diff --git a/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll b/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll new file mode 100644 index 0000000000000..5c3bd984087ec --- /dev/null +++ b/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc < %s -mtriple=aarch64 -regalloc=basic | FileCheck %s + +; Test that the register allocator behaves differently with minsize functions. + +declare void @foo(i32, ptr) + +define void @optsize(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) minsize { +; CHECK-LABEL: optsize: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w23, -40 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: mov w23, w5 +; CHECK-NEXT: mov x22, x4 +; CHECK-NEXT: mov x21, x3 +; CHECK-NEXT: mov x20, x2 +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: .LBB0_1: // %bb8 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cbz w19, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: cmp w19, #39 +; CHECK-NEXT: b.eq .LBB0_6 +; CHECK-NEXT: // %bb.3: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: cmp w19, #34 +; CHECK-NEXT: b.eq .LBB0_6 +; CHECK-NEXT: // %bb.4: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: cmp w19, #10 +; CHECK-NEXT: b.ne .LBB0_1 +; CHECK-NEXT: // %bb.5: // %bb9 +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: str wzr, [x20] +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_6: // %bb10 +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: mov w0, w23 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: str wzr, [x22] +; CHECK-NEXT: bl foo +; CHECK-NEXT: b .LBB0_1 +bb: + br label %bb7 + +bb7: ; preds = %bb13, %bb + %phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ] + br label %bb8 + +bb8: ; preds = %bb10, %bb9, %bb8, %bb7 + switch i32 %arg1, label %bb8 [ + i32 10, label %bb9 + i32 1, label %bb16 + i32 0, label %bb13 + i32 39, label %bb10 + i32 34, label %bb10 + ] + +bb9: ; preds = %bb8 + store i32 0, ptr %arg2, align 4 + br label %bb8 + +bb10: ; preds = %bb8, %bb8 + store i32 0, ptr %arg4, align 4 + tail call void @foo(i32 %arg5, ptr %arg3) + br label %bb8 + +bb13: ; preds = %bb8 + %not.arg6 = xor i1 %arg6, true + %spec.select = zext i1 %not.arg6 to i32 + br label %bb7 + +bb16: ; preds = %bb8 + unreachable +} + +define void @optspeed(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) { +; CHECK-LABEL: optspeed: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w23, -40 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: mov w22, w5 +; CHECK-NEXT: mov x21, x4 +; CHECK-NEXT: mov x20, x3 +; CHECK-NEXT: mov x23, x2 +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: b .LBB1_2 +; CHECK-NEXT: .LBB1_1: // %bb10 +; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: mov w0, w22 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: str wzr, [x21] +; CHECK-NEXT: bl foo +; CHECK-NEXT: .LBB1_2: // %bb8 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmp w19, #33 +; CHECK-NEXT: b.gt .LBB1_6 +; CHECK-NEXT: // %bb.3: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: cbz w19, .LBB1_2 +; CHECK-NEXT: // %bb.4: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: cmp w19, #10 +; CHECK-NEXT: b.ne .LBB1_2 +; CHECK-NEXT: // %bb.5: // %bb9 +; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: str wzr, [x23] +; CHECK-NEXT: b .LBB1_2 +; CHECK-NEXT: .LBB1_6: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: cmp w19, #34 +; CHECK-NEXT: b.eq .LBB1_1 +; CHECK-NEXT: // %bb.7: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: cmp w19, #39 +; CHECK-NEXT: b.eq .LBB1_1 +; CHECK-NEXT: b .LBB1_2 +bb: + br label %bb7 + +bb7: ; preds = %bb13, %bb + %phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ] + br label %bb8 + +bb8: ; preds = %bb10, %bb9, %bb8, %bb7 + switch i32 %arg1, label %bb8 [ + i32 10, label %bb9 + i32 1, label %bb16 + i32 0, label %bb13 + i32 39, label %bb10 + i32 34, label %bb10 + ] + +bb9: ; preds = %bb8 + store i32 0, ptr %arg2, align 4 + br label %bb8 + +bb10: ; preds = %bb8, %bb8 + store i32 0, ptr %arg4, align 4 + tail call void @foo(i32 %arg5, ptr %arg3) + br label %bb8 + +bb13: ; preds = %bb8 + %not.arg6 = xor i1 %arg6, true + %spec.select = zext i1 %not.arg6 to i32 + br label %bb7 + +bb16: ; preds = %bb8 + unreachable +}