From bb75b3dd35aa755a639185b40125bbc823d9f683 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 6 Nov 2025 14:16:13 -0800 Subject: [PATCH 1/2] [LP] Assign weights when peeling last iteration. --- llvm/lib/Transforms/Utils/LoopPeel.cpp | 23 ++++++- .../peel-last-iteration.ll | 66 +++++++++++++++++++ .../peel-last-iteration-expansion-cost.ll | 51 +++++++++----- llvm/test/lit.cfg.py | 3 +- 4 files changed, 125 insertions(+), 18 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index e1dcaa85a5780..d2dbe293e028a 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -54,6 +54,7 @@ using namespace llvm::SCEVPatternMatch; STATISTIC(NumPeeled, "Number of loops peeled"); STATISTIC(NumPeeledEnd, "Number of loops peeled from end"); +namespace llvm { static cl::opt UnrollPeelCount( "unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes")); @@ -87,6 +88,9 @@ static cl::opt EnablePeelingForIV( static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; +extern cl::opt ProfcheckDisableMetadataFixes; +} // namespace llvm + // Check whether we are capable of peeling this loop. bool llvm::canPeel(const Loop *L) { // Make sure the loop is in simplified form @@ -1190,7 +1194,24 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, IRBuilder<> B(PreHeaderBR); Value *Cond = B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0)); - B.CreateCondBr(Cond, NewPreHeader, InsertTop); + auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop); + SmallVector Weights; + auto *OrigLatchBr = Latch->getTerminator(); + auto HasBranchWeights = !ProfcheckDisableMetadataFixes && + extractBranchWeights(*OrigLatchBr, Weights); + if (HasBranchWeights) { + // The probability that the new guard skips the loop to execute just one + // iteration is the original loop's probability of exiting at the latch + // after any iteration. That should maintain the original loop body + // frequency. Upon arriving at the loop, due to the guard, the + // probability of reaching iteration i of the new loop is the + // probability of reaching iteration i+1 of the original loop. The + // probability of reaching the peeled iteration is 1, which is the + // probability of reaching iteration 0 of the original loop. + if (L->getExitBlock() == OrigLatchBr->getSuccessor(0)) + std::swap(Weights[0], Weights[1]); + setBranchWeights(*BI, Weights, /*IsExpected=*/false); + } PreHeaderBR->eraseFromParent(); // PreHeader now dominates InsertTop. diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll new file mode 100644 index 0000000000000..43e2cd8dcd89c --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll @@ -0,0 +1,66 @@ +; Disable this test in profcheck because the first run would cause profcheck to fail. +; REQUIRES: !profcheck +; RUN: opt -p "print,loop-unroll,print" -scev-cheap-expansion-budget=3 -S %s -profcheck-disable-metadata-fixes 2>&1 | FileCheck %s --check-prefixes=COMMON,BAD +; RUN: opt -p "print,loop-unroll,print" -scev-cheap-expansion-budget=3 -S %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GOOD + +define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 { +entry: + %sub = add i32 %end, -1 + br label %loop.header + +loop.header: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] + %c = icmp eq i32 %iv, %sub + br i1 %c, label %then, label %loop.latch, !prof !1 + +then: + br label %loop.latch + +loop.latch: + %iv.next = add nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %end + br i1 %ec, label %exit, label %loop.header, !prof !2 + +exit: + ret i32 0 +} + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 2, i32 3} +!2 = !{!"branch_weights", i32 1, i32 50} + +; COMMON: block-frequency-info: test_expansion_cost_2 +; COMMON-NEXT: entry: float = 1.0 +; COMMON-NEXT: loop.header: float = 51.0 +; COMMON-NEXT: then: float = 20.4 +; COMMON-NEXT: loop.latch: float = 51.0 +; COMMON-NEXT: exit: float = 1.0 + +; COMMON: block-frequency-info: test_expansion_cost_2 +; GOOD-NEXT: entry: float = 1.0 +; GOOD-NEXT: entry.split: float = 0.98039 +; GOOD-NEXT: loop.header: float = 50.0 +; GOOD-NEXT: then: float = 20.0 +; GOOD-NEXT: loop.latch: float = 50.0 +; GOOD-NEXT: exit.peel.begin.loopexit: float = 0.98039 +; GOOD-NEXT: exit.peel.begin: float = 1.0 +; GOOD-NEXT: loop.header.peel: float = 1.0 +; GOOD-NEXT: then.peel: float = 0.4 +; GOOD-NEXT: loop.latch.peel: float = 1.0 +; GOOD-NEXT: exit.peel.next: float = 1.0 +; GOOD-NEXT: loop.header.peel.next: float = 1.0 +; GOOD-NEXT: exit: float = 1.0 + +; BAD-NEXT: entry: float = 1.0 +; BAD-NEXT: entry.split: float = 0.625 +; BAD-NEXT: loop.header: float = 31.875 +; BAD-NEXT: then: float = 12.75 +; BAD-NEXT: loop.latch: float = 31.875 +; BAD-NEXT: exit.peel.begin.loopexit: float = 0.625 +; BAD-NEXT: exit.peel.begin: float = 1.0 +; BAD-NEXT: loop.header.peel: float = 1.0 +; BAD-NEXT: then.peel: float = 0.4 +; BAD-NEXT: loop.latch.peel: float = 1.0 +; BAD-NEXT: exit.peel.next: float = 1.0 +; BAD-NEXT: loop.header.peel.next: float = 1.0 +; BAD-NEXT: exit: float = 1.0 \ No newline at end of file diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll index f3910f9bfc399..bc06625de0c76 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll @@ -1,46 +1,50 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=2 -S %s | FileCheck --check-prefix=BUDGET2 %s ; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=3 -S %s | FileCheck --check-prefix=BUDGET3 %s -define i32 @test_expansion_cost_2(i32 %start, i32 %end) { +; Note that BUDGET3 will expose a conditional branch going to the same label. +; This is expected, and the profile information associated will be dropped when +; the branch is simplified to unconditional. + +define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 { ; BUDGET2-LABEL: define i32 @test_expansion_cost_2( -; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) { +; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] { ; BUDGET2-NEXT: [[ENTRY:.*]]: ; BUDGET2-NEXT: [[SUB:%.*]] = add i32 [[END]], -1 ; BUDGET2-NEXT: br label %[[LOOP_HEADER:.*]] ; BUDGET2: [[LOOP_HEADER]]: ; BUDGET2-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; BUDGET2-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] -; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF1:![0-9]+]] ; BUDGET2: [[THEN]]: ; BUDGET2-NEXT: br label %[[LOOP_LATCH]] ; BUDGET2: [[LOOP_LATCH]]: ; BUDGET2-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; BUDGET2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[END]] -; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] +; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]] ; BUDGET2: [[EXIT]]: ; BUDGET2-NEXT: ret i32 0 ; ; BUDGET3-LABEL: define i32 @test_expansion_cost_2( -; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) { +; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] { ; BUDGET3-NEXT: [[ENTRY:.*]]: ; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1 ; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]] ; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 -; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]] +; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]], !prof [[PROF1:![0-9]+]] ; BUDGET3: [[ENTRY_SPLIT]]: ; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]] ; BUDGET3: [[LOOP_HEADER]]: ; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] -; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF2:![0-9]+]] ; BUDGET3: [[THEN]]: ; BUDGET3-NEXT: br label %[[LOOP_LATCH]] ; BUDGET3: [[LOOP_LATCH]]: ; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1 ; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]] -; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] +; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: ; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ] ; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]] @@ -49,13 +53,13 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) { ; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] ; BUDGET3: [[LOOP_HEADER_PEEL]]: ; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] -; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]] +; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]], !prof [[PROF2]] ; BUDGET3: [[THEN_PEEL]]: ; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]] ; BUDGET3: [[LOOP_LATCH_PEEL]]: ; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1 ; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]] -; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] +; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]], !prof [[PROF3]] ; BUDGET3: [[EXIT_PEEL_NEXT]]: ; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]] ; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]: @@ -70,7 +74,7 @@ entry: loop.header: %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] %c = icmp eq i32 %iv, %sub - br i1 %c, label %then, label %loop.latch + br i1 %c, label %then, label %loop.latch, !prof !1 then: br label %loop.latch @@ -78,12 +82,29 @@ then: loop.latch: %iv.next = add nsw i32 %iv, 1 %ec = icmp eq i32 %iv.next, %end - br i1 %ec, label %exit, label %loop.header + br i1 %ec, label %exit, label %loop.header, !prof !2, !llvm.loop !3 exit: ret i32 0 } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 2, i32 3} +!2 = !{!"branch_weights", i32 1, i32 10} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.estimated_trip_count", i32 42} +;. +; BUDGET2: [[PROF0]] = !{!"function_entry_count", i32 10} +; BUDGET2: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +; BUDGET2: [[PROF2]] = !{!"branch_weights", i32 1, i32 10} +; BUDGET2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +; BUDGET2: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 42} ;. -; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} -; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} +; BUDGET3: [[PROF0]] = !{!"function_entry_count", i32 10} +; BUDGET3: [[PROF1]] = !{!"branch_weights", i32 10, i32 1} +; BUDGET3: [[PROF2]] = !{!"branch_weights", i32 2, i32 3} +; BUDGET3: [[PROF3]] = !{!"branch_weights", i32 1, i32 10} +; BUDGET3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; BUDGET3: [[META5]] = !{!"llvm.loop.peeled.count", i32 1} +; BUDGET3: [[META6]] = !{!"llvm.loop.estimated_trip_count", i32 41} ;. diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 94cf8bc358514..1dfa280e6b82a 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -49,6 +49,7 @@ config.excludes = ["Inputs", "CMakeLists.txt", "README.txt", "LICENSE.txt"] if config.enable_profcheck: + config.available_features.add("profcheck") # Exclude llvm-reduce tests for profcheck because we substitute the FileCheck # binary with a no-op command for profcheck, but llvm-reduce tests have RUN # commands of the form llvm-reduce --test FileCheck, which explode if we @@ -69,8 +70,6 @@ # profiling doesn't work quite well on GPU, excluding config.excludes.append("AMDGPU") - config.available_features.add("profcheck") - # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) From 4ff4cc1456fa2727c37a016562684dbc80766b94 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 7 Nov 2025 15:21:38 -0800 Subject: [PATCH 2/2] [LTT][profcheck] Set branch weights for complex llvm.type.test lowering --- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 49 ++++++++++++++----- llvm/test/Other/new-pm-O0-defaults.ll | 1 + .../test/Transforms/LowerTypeTests/section.ll | 23 ++++++++- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 94663ff928a0b..31b5487ce6ec6 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -54,6 +55,7 @@ #include "llvm/IR/ModuleSummaryIndexYAML.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/ReplaceConstant.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -95,6 +97,7 @@ STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered"); STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); +namespace llvm { static cl::opt AvoidReuse( "lowertypetests-avoid-reuse", cl::desc("Try to avoid reuse of byte array addresses using aliases"), @@ -131,6 +134,9 @@ static cl::opt "Drop all type test sequences")), cl::Hidden, cl::init(DropTestKind::None)); +extern cl::opt ProfcheckDisableMetadataFixes; +} // namespace llvm + bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (Offset < ByteOffset) return false; @@ -423,8 +429,10 @@ struct ScopedSaveAliaseesAndUsed { class LowerTypeTestsModule { Module &M; - ModuleSummaryIndex *ExportSummary; - const ModuleSummaryIndex *ImportSummary; + FunctionAnalysisManager &FAM; + + ModuleSummaryIndex *const ExportSummary; + const ModuleSummaryIndex *const ImportSummary; // Set when the client has invoked this to simply drop all type test assume // sequences. DropTestKind DropTypeTests; @@ -507,9 +515,10 @@ class LowerTypeTestsModule { void allocateByteArrays(); Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, Value *BitOffset); - void lowerTypeTestCalls( - ArrayRef TypeIds, Constant *CombinedGlobalAddr, - const DenseMap &GlobalLayout); + void + lowerTypeTestCalls(ArrayRef TypeIds, Constant *CombinedGlobalAddr, + const DenseMap &GlobalLayout, + uint64_t *TotalCallCount = nullptr); Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL); @@ -803,6 +812,8 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, } IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); + setExplicitlyUnknownBranchWeightsIfProfiled(*InitialBB->getTerminator(), + DEBUG_TYPE); // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. @@ -1181,7 +1192,8 @@ buildBitSets(ArrayRef TypeIds, void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef TypeIds, Constant *CombinedGlobalAddr, - const DenseMap &GlobalLayout) { + const DenseMap &GlobalLayout, + uint64_t *TotalCallCount) { // For each type identifier in this disjoint set... for (const auto &[TypeId, BSI] : buildBitSets(TypeIds, GlobalLayout)) { ByteArrayInfo *BAI = nullptr; @@ -1227,6 +1239,18 @@ void LowerTypeTestsModule::lowerTypeTestCalls( ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); if (Lowered) { + if (TotalCallCount) { + auto *CIF = CI->getFunction(); + if (auto EC = CIF->getEntryCount()) + if (EC->getCount()) { + auto &BFI = FAM.getResult(*CIF); + *TotalCallCount += + EC->getCount() * + static_cast( + BFI.getBlockFreq(CI->getParent()).getFrequency()) / + BFI.getEntryFreq().getFrequency(); + } + } CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } @@ -1702,10 +1726,13 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayType *JumpTableEntryType = ArrayType::get(Int8Ty, EntrySize); ArrayType *JumpTableType = ArrayType::get(JumpTableEntryType, Functions.size()); - auto JumpTable = ConstantExpr::getPointerCast( + auto *JumpTable = ConstantExpr::getPointerCast( JumpTableFn, PointerType::getUnqual(M.getContext())); - lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); + uint64_t Count = 0; + lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout, &Count); + if (!ProfcheckDisableMetadataFixes && Count) + JumpTableFn->setEntryCount(Count); // Build aliases pointing to offsets into the jump table, and replace // references to the original functions with references to the aliases. @@ -1870,7 +1897,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( LowerTypeTestsModule::LowerTypeTestsModule( Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary, DropTestKind DropTypeTests) - : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), + : M(M), + FAM(AM.getResult(M).getManager()), + ExportSummary(ExportSummary), ImportSummary(ImportSummary), DropTypeTests(ClDropTypeTests > DropTypeTests ? ClDropTypeTests : DropTypeTests) { assert(!(ExportSummary && ImportSummary)); @@ -1879,8 +1908,6 @@ LowerTypeTestsModule::LowerTypeTestsModule( if (Arch == Triple::arm) CanUseArmJumpTable = true; if (Arch == Triple::arm || Arch == Triple::thumb) { - auto &FAM = - AM.getResult(M).getManager(); for (Function &F : M) { // Skip declarations since we should not query the TTI for them. if (F.isDeclaration()) diff --git a/llvm/test/Other/new-pm-O0-defaults.ll b/llvm/test/Other/new-pm-O0-defaults.ll index 278a89261691a..4e2b5d4d62a78 100644 --- a/llvm/test/Other/new-pm-O0-defaults.ll +++ b/llvm/test/Other/new-pm-O0-defaults.ll @@ -44,6 +44,7 @@ ; CHECK-PRE-LINK: Running pass: CanonicalizeAliasesPass ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass ; CHECK-THINLTO: Running pass: LowerTypeTestsPass +; CHECK-THINLTO: Running analysis: InnerAnalysisManagerProxy on [module] ; CHECK-THINLTO-NEXT: Running pass: CoroConditionalWrapper ; CHECK-THINLTO-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-THINLTO-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Transforms/LowerTypeTests/section.ll b/llvm/test/Transforms/LowerTypeTests/section.ll index bd91389c60ef0..1b0efd5bdd01d 100644 --- a/llvm/test/Transforms/LowerTypeTests/section.ll +++ b/llvm/test/Transforms/LowerTypeTests/section.ll @@ -13,14 +13,33 @@ entry: ret void } -define i1 @g() { +define i1 @g() !prof !1 { entry: %0 = call i1 @llvm.type.test(ptr @f, metadata !"_ZTSFvE") ret i1 %0 } -; CHECK: define private void @[[JT]]() #{{.*}} align {{.*}} { +define i1 @h(i1 %c) !prof !2 { +entry: + br i1 %c, label %yes, label %common, !prof !3 + +yes: + %0 = call i1 @llvm.type.test(ptr @f, metadata !"_ZTSFvE") + ret i1 %0 + +common: + ret i1 0 +} + +; CHECK: define private void @[[JT]]() #{{.*}} align {{.*}} !prof !4 { declare i1 @llvm.type.test(ptr, metadata) nounwind readnone !0 = !{i64 0, !"_ZTSFvE"} +!1 = !{!"function_entry_count", i32 20} +!2 = !{!"function_entry_count", i32 40} +!3 = !{!"branch_weights", i32 3, i32 5} +; the entry count for the jumptable function is: 20 + 40 * (3/8) = 20 + 15 +; where: 20 is the entry count of g, 40 of h, and 3/8 is the frequency of the +; llvm.type.test in h, relative to h's entry basic block. +; CHECK !4 = !{!"function_entry_count", i64 35} \ No newline at end of file