From bb75b3dd35aa755a639185b40125bbc823d9f683 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Thu, 6 Nov 2025 14:16:13 -0800
Subject: [PATCH 1/2] [LP] Assign weights when peeling last iteration.

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        | 23 ++++++-
 .../peel-last-iteration.ll                    | 66 +++++++++++++++++++
 .../peel-last-iteration-expansion-cost.ll     | 51 +++++++++-----
 llvm/test/lit.cfg.py                          |  3 +-
 4 files changed, 125 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index e1dcaa85a5780..d2dbe293e028a 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -54,6 +54,7 @@ using namespace llvm::SCEVPatternMatch;
 STATISTIC(NumPeeled, "Number of loops peeled");
 STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
 
+namespace llvm {
 static cl::opt<unsigned> UnrollPeelCount(
     "unroll-peel-count", cl::Hidden,
     cl::desc("Set the unroll peeling count, for testing purposes"));
@@ -87,6 +88,9 @@ static cl::opt<bool> EnablePeelingForIV(
 
 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
 
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 // Check whether we are capable of peeling this loop.
 bool llvm::canPeel(const Loop *L) {
   // Make sure the loop is in simplified form
@@ -1190,7 +1194,24 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
       IRBuilder<> B(PreHeaderBR);
       Value *Cond =
           B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
-      B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+      auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+      SmallVector<uint32_t> Weights;
+      auto *OrigLatchBr = Latch->getTerminator();
+      auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
+                              extractBranchWeights(*OrigLatchBr, Weights);
+      if (HasBranchWeights) {
+        // The probability that the new guard skips the loop to execute just one
+        // iteration is the original loop's probability of exiting at the latch
+        // after any iteration. That should maintain the original loop body
+        // frequency. Upon arriving at the loop, due to the guard, the
+        // probability of reaching iteration i of the new loop is the
+        // probability of reaching iteration i+1 of the original loop. The
+        // probability of reaching the peeled iteration is 1, which is the
+        // probability of reaching iteration 0 of the original loop.
+        if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
+          std::swap(Weights[0], Weights[1]);
+        setBranchWeights(*BI, Weights, /*IsExpected=*/false);
+      }
       PreHeaderBR->eraseFromParent();
 
       // PreHeader now dominates InsertTop.
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll
new file mode 100644
index 0000000000000..43e2cd8dcd89c
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration.ll
@@ -0,0 +1,66 @@
+; Disable this test in profcheck because the first run would cause profcheck to fail.
+; REQUIRES: !profcheck
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s -profcheck-disable-metadata-fixes 2>&1 | FileCheck %s --check-prefixes=COMMON,BAD
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GOOD
+
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
+entry:
+  %sub = add i32 %end, -1
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+  %c = icmp eq i32 %iv, %sub
+  br i1 %c, label %then, label %loop.latch, !prof !1
+
+then:
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, %end
+  br i1 %ec, label %exit, label %loop.header, !prof !2
+
+exit:
+  ret i32 0
+}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 50}
+
+; COMMON:        block-frequency-info: test_expansion_cost_2
+; COMMON-NEXT:   entry: float = 1.0
+; COMMON-NEXT:   loop.header: float = 51.0
+; COMMON-NEXT:   then: float = 20.4
+; COMMON-NEXT:   loop.latch: float = 51.0
+; COMMON-NEXT:   exit: float = 1.0
+
+; COMMON:       block-frequency-info: test_expansion_cost_2
+; GOOD-NEXT:    entry: float = 1.0
+; GOOD-NEXT:    entry.split: float = 0.98039
+; GOOD-NEXT:    loop.header: float = 50.0
+; GOOD-NEXT:    then: float = 20.0
+; GOOD-NEXT:    loop.latch: float = 50.0
+; GOOD-NEXT:    exit.peel.begin.loopexit: float = 0.98039
+; GOOD-NEXT:    exit.peel.begin: float = 1.0
+; GOOD-NEXT:    loop.header.peel: float = 1.0
+; GOOD-NEXT:    then.peel: float = 0.4
+; GOOD-NEXT:    loop.latch.peel: float = 1.0
+; GOOD-NEXT:    exit.peel.next: float = 1.0
+; GOOD-NEXT:    loop.header.peel.next: float = 1.0
+; GOOD-NEXT:    exit: float = 1.0
+
+; BAD-NEXT:  entry: float = 1.0
+; BAD-NEXT:  entry.split: float = 0.625
+; BAD-NEXT:  loop.header: float = 31.875
+; BAD-NEXT:  then: float = 12.75
+; BAD-NEXT:  loop.latch: float = 31.875
+; BAD-NEXT:  exit.peel.begin.loopexit: float = 0.625
+; BAD-NEXT:  exit.peel.begin: float = 1.0
+; BAD-NEXT:  loop.header.peel: float = 1.0
+; BAD-NEXT:  then.peel: float = 0.4
+; BAD-NEXT:  loop.latch.peel: float = 1.0
+; BAD-NEXT:  exit.peel.next: float = 1.0
+; BAD-NEXT:  loop.header.peel.next: float = 1.0
+; BAD-NEXT:  exit: float = 1.0
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
index f3910f9bfc399..bc06625de0c76 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
@@ -1,46 +1,50 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=2 -S %s | FileCheck --check-prefix=BUDGET2 %s
 ; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=3 -S %s | FileCheck --check-prefix=BUDGET3 %s
 
-define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
+; Note that BUDGET3 will expose a conditional branch going to the same label.
+; This is expected, and the profile information associated will be dropped when
+; the branch is simplified to unconditional.
+
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
 ; BUDGET2-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
 ; BUDGET2-NEXT:  [[ENTRY:.*]]:
 ; BUDGET2-NEXT:    [[SUB:%.*]] = add i32 [[END]], -1
 ; BUDGET2-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; BUDGET2:       [[LOOP_HEADER]]:
 ; BUDGET2-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
 ; BUDGET2-NEXT:    [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET2-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET2-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF1:![0-9]+]]
 ; BUDGET2:       [[THEN]]:
 ; BUDGET2-NEXT:    br label %[[LOOP_LATCH]]
 ; BUDGET2:       [[LOOP_LATCH]]:
 ; BUDGET2-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; BUDGET2-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[END]]
-; BUDGET2-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; BUDGET2-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]]
 ; BUDGET2:       [[EXIT]]:
 ; BUDGET2-NEXT:    ret i32 0
 ;
 ; BUDGET3-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
 ; BUDGET3-NEXT:  [[ENTRY:.*]]:
 ; BUDGET3-NEXT:    [[SUB:%.*]] = add i32 [[END]], -1
 ; BUDGET3-NEXT:    [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
 ; BUDGET3-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-; BUDGET3-NEXT:    br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; BUDGET3-NEXT:    br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]], !prof [[PROF1:![0-9]+]]
 ; BUDGET3:       [[ENTRY_SPLIT]]:
 ; BUDGET3-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; BUDGET3:       [[LOOP_HEADER]]:
 ; BUDGET3-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
 ; BUDGET3-NEXT:    [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET3-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET3-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF2:![0-9]+]]
 ; BUDGET3:       [[THEN]]:
 ; BUDGET3-NEXT:    br label %[[LOOP_LATCH]]
 ; BUDGET3:       [[LOOP_LATCH]]:
 ; BUDGET3-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; BUDGET3-NEXT:    [[TMP2:%.*]] = sub i32 [[END]], 1
 ; BUDGET3-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
-; BUDGET3-NEXT:    br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; BUDGET3-NEXT:    br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
 ; BUDGET3:       [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
 ; BUDGET3-NEXT:    [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
 ; BUDGET3-NEXT:    br label %[[EXIT_PEEL_BEGIN]]
@@ -49,13 +53,13 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
 ; BUDGET3-NEXT:    br label %[[LOOP_HEADER_PEEL:.*]]
 ; BUDGET3:       [[LOOP_HEADER_PEEL]]:
 ; BUDGET3-NEXT:    [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
-; BUDGET3-NEXT:    br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
+; BUDGET3-NEXT:    br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]], !prof [[PROF2]]
 ; BUDGET3:       [[THEN_PEEL]]:
 ; BUDGET3-NEXT:    br label %[[LOOP_LATCH_PEEL]]
 ; BUDGET3:       [[LOOP_LATCH_PEEL]]:
 ; BUDGET3-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
 ; BUDGET3-NEXT:    [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
-; BUDGET3-NEXT:    br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; BUDGET3-NEXT:    br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]], !prof [[PROF3]]
 ; BUDGET3:       [[EXIT_PEEL_NEXT]]:
 ; BUDGET3-NEXT:    br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
 ; BUDGET3:       [[LOOP_HEADER_PEEL_NEXT]]:
@@ -70,7 +74,7 @@ entry:
 loop.header:
   %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
   %c = icmp eq i32 %iv, %sub
-  br i1 %c, label %then, label %loop.latch
+  br i1 %c, label %then, label %loop.latch, !prof !1
 
 then:
   br label %loop.latch
@@ -78,12 +82,29 @@ then:
 loop.latch:
   %iv.next = add nsw i32 %iv, 1
   %ec = icmp eq i32 %iv.next, %end
-  br i1 %ec, label %exit, label %loop.header
+  br i1 %ec, label %exit, label %loop.header, !prof !2, !llvm.loop !3
 
 exit:
   ret i32 0
 }
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 10}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.estimated_trip_count", i32 42}
+;.
+; BUDGET2: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET2: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET2: [[PROF2]] = !{!"branch_weights", i32 1, i32 10}
+; BUDGET2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+; BUDGET2: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 42}
 ;.
-; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
-; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET3: [[PROF1]] = !{!"branch_weights", i32 10, i32 1}
+; BUDGET3: [[PROF2]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET3: [[PROF3]] = !{!"branch_weights", i32 1, i32 10}
+; BUDGET3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
+; BUDGET3: [[META5]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[META6]] = !{!"llvm.loop.estimated_trip_count", i32 41}
 ;.
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 94cf8bc358514..1dfa280e6b82a 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -49,6 +49,7 @@
 config.excludes = ["Inputs", "CMakeLists.txt", "README.txt", "LICENSE.txt"]
 
 if config.enable_profcheck:
+    config.available_features.add("profcheck")
     # Exclude llvm-reduce tests for profcheck because we substitute the FileCheck
     # binary with a no-op command for profcheck, but llvm-reduce tests have RUN
     # commands of the form llvm-reduce --test FileCheck, which explode if we
@@ -69,8 +70,6 @@
     # profiling doesn't work quite well on GPU, excluding
     config.excludes.append("AMDGPU")
 
-    config.available_features.add("profcheck")
-
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
 

From 4ff4cc1456fa2727c37a016562684dbc80766b94 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Fri, 7 Nov 2025 15:21:38 -0800
Subject: [PATCH 2/2] [LTT][profcheck] Set branch weights for complex
 llvm.type.test lowering

---
 llvm/lib/Transforms/IPO/LowerTypeTests.cpp    | 49 ++++++++++++++-----
 llvm/test/Other/new-pm-O0-defaults.ll         |  1 +
 .../test/Transforms/LowerTypeTests/section.ll | 23 ++++++++-
 3 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 94663ff928a0b..31b5487ce6ec6 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -54,6 +55,7 @@
 #include "llvm/IR/ModuleSummaryIndexYAML.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/ReplaceConstant.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -95,6 +97,7 @@ STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
 STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered");
 STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers");
 
+namespace llvm {
 static cl::opt<bool> AvoidReuse(
     "lowertypetests-avoid-reuse",
     cl::desc("Try to avoid reuse of byte array addresses using aliases"),
@@ -131,6 +134,9 @@ static cl::opt<DropTestKind>
                                           "Drop all type test sequences")),
                     cl::Hidden, cl::init(DropTestKind::None));
 
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
   if (Offset < ByteOffset)
     return false;
@@ -423,8 +429,10 @@ struct ScopedSaveAliaseesAndUsed {
 class LowerTypeTestsModule {
   Module &M;
 
-  ModuleSummaryIndex *ExportSummary;
-  const ModuleSummaryIndex *ImportSummary;
+  FunctionAnalysisManager &FAM;
+
+  ModuleSummaryIndex *const ExportSummary;
+  const ModuleSummaryIndex *const ImportSummary;
   // Set when the client has invoked this to simply drop all type test assume
   // sequences.
   DropTestKind DropTypeTests;
@@ -507,9 +515,10 @@ class LowerTypeTestsModule {
   void allocateByteArrays();
   Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
                           Value *BitOffset);
-  void lowerTypeTestCalls(
-      ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
-      const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
+  void
+  lowerTypeTestCalls(ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
+                     const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout,
+                     uint64_t *TotalCallCount = nullptr);
   Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
                            const TypeIdLowering &TIL);
 
@@ -803,6 +812,8 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
       }
 
   IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
+  setExplicitlyUnknownBranchWeightsIfProfiled(*InitialBB->getTerminator(),
+                                              DEBUG_TYPE);
 
   // Now that we know that the offset is in range and aligned, load the
   // appropriate bit from the bitset.
@@ -1181,7 +1192,8 @@ buildBitSets(ArrayRef<Metadata *> TypeIds,
 
 void LowerTypeTestsModule::lowerTypeTestCalls(
     ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
-    const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
+    const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout,
+    uint64_t *TotalCallCount) {
   // For each type identifier in this disjoint set...
   for (const auto &[TypeId, BSI] : buildBitSets(TypeIds, GlobalLayout)) {
     ByteArrayInfo *BAI = nullptr;
@@ -1227,6 +1239,18 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
       ++NumTypeTestCallsLowered;
       Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
       if (Lowered) {
+        if (TotalCallCount) {
+          auto *CIF = CI->getFunction();
+          if (auto EC = CIF->getEntryCount())
+            if (EC->getCount()) {
+              auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*CIF);
+              *TotalCallCount +=
+                  EC->getCount() *
+                  static_cast<double>(
+                      BFI.getBlockFreq(CI->getParent()).getFrequency()) /
+                  BFI.getEntryFreq().getFrequency();
+            }
+        }
         CI->replaceAllUsesWith(Lowered);
         CI->eraseFromParent();
       }
@@ -1702,10 +1726,13 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
   ArrayType *JumpTableEntryType = ArrayType::get(Int8Ty, EntrySize);
   ArrayType *JumpTableType =
       ArrayType::get(JumpTableEntryType, Functions.size());
-  auto JumpTable = ConstantExpr::getPointerCast(
+  auto *JumpTable = ConstantExpr::getPointerCast(
       JumpTableFn, PointerType::getUnqual(M.getContext()));
 
-  lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
+  uint64_t Count = 0;
+  lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout, &Count);
+  if (!ProfcheckDisableMetadataFixes && Count)
+    JumpTableFn->setEntryCount(Count);
 
   // Build aliases pointing to offsets into the jump table, and replace
   // references to the original functions with references to the aliases.
@@ -1870,7 +1897,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
 LowerTypeTestsModule::LowerTypeTestsModule(
     Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
     const ModuleSummaryIndex *ImportSummary, DropTestKind DropTypeTests)
-    : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+    : M(M),
+      FAM(AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
+      ExportSummary(ExportSummary), ImportSummary(ImportSummary),
       DropTypeTests(ClDropTypeTests > DropTypeTests ? ClDropTypeTests
                                                     : DropTypeTests) {
   assert(!(ExportSummary && ImportSummary));
@@ -1879,8 +1908,6 @@ LowerTypeTestsModule::LowerTypeTestsModule(
   if (Arch == Triple::arm)
     CanUseArmJumpTable = true;
   if (Arch == Triple::arm || Arch == Triple::thumb) {
-    auto &FAM =
-        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
     for (Function &F : M) {
       // Skip declarations since we should not query the TTI for them.
       if (F.isDeclaration())
diff --git a/llvm/test/Other/new-pm-O0-defaults.ll b/llvm/test/Other/new-pm-O0-defaults.ll
index 278a89261691a..4e2b5d4d62a78 100644
--- a/llvm/test/Other/new-pm-O0-defaults.ll
+++ b/llvm/test/Other/new-pm-O0-defaults.ll
@@ -44,6 +44,7 @@
 ; CHECK-PRE-LINK: Running pass: CanonicalizeAliasesPass
 ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass
 ; CHECK-THINLTO: Running pass: LowerTypeTestsPass
+; CHECK-THINLTO: Running analysis: InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> on [module]
 ; CHECK-THINLTO-NEXT: Running pass: CoroConditionalWrapper
 ; CHECK-THINLTO-NEXT: Running pass: EliminateAvailableExternallyPass
 ; CHECK-THINLTO-NEXT: Running pass: GlobalDCEPass
diff --git a/llvm/test/Transforms/LowerTypeTests/section.ll b/llvm/test/Transforms/LowerTypeTests/section.ll
index bd91389c60ef0..1b0efd5bdd01d 100644
--- a/llvm/test/Transforms/LowerTypeTests/section.ll
+++ b/llvm/test/Transforms/LowerTypeTests/section.ll
@@ -13,14 +13,33 @@ entry:
   ret void
 }
 
-define i1 @g() {
+define i1 @g() !prof !1 {
 entry:
   %0 = call i1 @llvm.type.test(ptr @f, metadata !"_ZTSFvE")
   ret i1 %0
 }
 
-; CHECK: define private void @[[JT]]() #{{.*}} align {{.*}} {
+define i1 @h(i1 %c) !prof !2 {
+entry:
+  br i1 %c, label %yes, label %common, !prof !3
+
+yes:
+  %0 = call i1 @llvm.type.test(ptr @f, metadata !"_ZTSFvE")
+  ret i1 %0
+
+common:
+  ret i1 0
+}
+
+; CHECK: define private void @[[JT]]() #{{.*}} align {{.*}} !prof !4 {
 
 declare i1 @llvm.type.test(ptr, metadata) nounwind readnone
 
 !0 = !{i64 0, !"_ZTSFvE"}
+!1 = !{!"function_entry_count", i32 20} 
+!2 = !{!"function_entry_count", i32 40}
+!3 = !{!"branch_weights", i32 3, i32 5}
+; the entry count for the jumptable function is: 20 + 40 * (3/8) = 20 + 15
+; where: 20 is the entry count of g, 40 of h, and 3/8 is the frequency of the
+; llvm.type.test in h, relative to h's entry basic block.                               
+; CHECK !4 = !{!"function_entry_count", i64 35}
\ No newline at end of file