diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 658ebd47488c7..3293f7c982749 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -38,6 +39,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -122,6 +124,7 @@ namespace { MachineDominatorTree *DT = nullptr; // Machine dominator tree MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree MachineCycleInfo *CI = nullptr; + ProfileSummaryInfo *PSI = nullptr; MachineBlockFrequencyInfo *MBFI = nullptr; const MachineBranchProbabilityInfo *MBPI = nullptr; AliasAnalysis *AA = nullptr; @@ -198,6 +201,7 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); if (UseBlockFreqInfo) AU.addRequired(); AU.addRequired(); @@ -284,6 +288,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) @@ -722,6 +727,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis().getDomTree(); PDT = &getAnalysis().getPostDomTree(); CI = &getAnalysis().getCycleInfo(); + PSI = &getAnalysis().getPSI(); MBFI = UseBlockFreqInfo ? &getAnalysis().getMBFI() : nullptr; @@ -1217,12 +1223,12 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, // Sort Successors according to their cycle depth or block frequency info. llvm::stable_sort( - AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { + AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; - bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0; - return HasBlockFreq ? LHSFreq < RHSFreq - : CI->getCycleDepth(L) < CI->getCycleDepth(R); + if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq) + return CI->getCycleDepth(L) < CI->getCycleDepth(R); + return LHSFreq < RHSFreq; }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll index cad9cf81905cd..c2653a86f53af 100644 --- a/llvm/test/CodeGen/X86/sink-blockfreq.ll +++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll @@ -1,12 +1,13 @@ ; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI ; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -force-pgso -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI ; Test that by changing BlockFrequencyInfo we change the order in which ; machine-sink looks for successor blocks. By not using BFI, both G and B ; have the same loop depth and no instructions is sinked - B is selected but ; can't be used as to avoid breaking a non profitable critical edge. By using ; BFI, "mul" is sinked into the less frequent block G. -define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp { +define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp !prof !14 { ; MSINK_BFI-LABEL: sink_freqinfo ; MSINK_BFI: jl ; MSINK_BFI-NEXT: ## %bb. @@ -22,24 +23,40 @@ B: %ee = phi i32 [ 0, %entry ], [ %inc, %F ] %xx = sub i32 %a, %ee %cond0 = icmp slt i32 %xx, 0 - br i1 %cond0, label %F, label %exit, !prof !0 + br i1 %cond0, label %F, label %exit, !prof !15 F: %inc = add nsw i32 %xx, 2 %aa = mul nsw i32 %b, %inc %exitcond = icmp slt i32 %inc, %a - br i1 %exitcond, label %B, label %G, !prof !1 + br i1 %exitcond, label %B, label %G, !prof !16 G: %ii = add nsw i32 %aa, %a %ll = add i32 %b, 45 %exitcond2 = icmp sge i32 %ii, %b - br i1 %exitcond2, label %G, label %exit, !prof !2 + br i1 %exitcond2, label %G, label %exit, !prof !17 exit: ret i32 0 } -!0 = !{!"branch_weights", i32 4, i32 1} -!1 = !{!"branch_weights", i32 128, i32 1} -!2 = !{!"branch_weights", i32 1, i32 1} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 1000} +!15 = !{!"branch_weights", i32 4, i32 1} +!16 = !{!"branch_weights", i32 128, i32 1} +!17 = !{!"branch_weights", i32 1, i32 1}