From 6c4c36e401b60d718630e83a6b57f685128d9cc6 Mon Sep 17 00:00:00 2001
From: Xinhao Yuan <xinhaoyuan@google.com>
Date: Wed, 5 Nov 2025 12:59:11 -0800
Subject: [PATCH] No public description

PiperOrigin-RevId: 828592996
---
 centipede/centipede.cc        | 15 ++++++++++++++-
 centipede/centipede.h         |  1 +
 centipede/centipede_flags.inc |  3 +++
 centipede/corpus.cc           | 31 ++++++++++++++++++++++---------
 centipede/corpus.h            | 18 ++++++++++++++----
 centipede/corpus_test.cc      | 12 ++++++++----
 centipede/feature_set.cc      |  5 +++--
 centipede/feature_set.h       |  8 ++++----
 centipede/feature_set_test.cc |  8 ++++----
 9 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/centipede/centipede.cc b/centipede/centipede.cc
index bb3793f4f..ae6ec826a 100644
--- a/centipede/centipede.cc
+++ b/centipede/centipede.cc
@@ -133,6 +133,18 @@ Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
   FUZZTEST_CHECK(env_.seed) << "env_.seed must not be zero";
   if (!env_.input_filter.empty() && env_.fork_server)
     input_filter_cmd_.StartForkServer(TemporaryLocalDirPath(), "input_filter");
+  if (env_.corpus_weight_method == Corpus::kWeightMethodNameForUniform) {
+    corpus_weight_method_ = Corpus::WeightMethod::Uniform;
+  } else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRecency) {
+    corpus_weight_method_ = Corpus::WeightMethod::Recency;
+  } else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRarity) {
+    corpus_weight_method_ = Corpus::WeightMethod::Rarity;
+  } else {
+    FUZZTEST_LOG(WARNING) << "Unknown corpus weight method "
+                          << env_.corpus_weight_method << " - fall back to "
+                          << Corpus::kWeightMethodNameForRarity;
+    corpus_weight_method_ = Corpus::WeightMethod::Rarity;
+  }
 }
 
 void Centipede::CorpusToFiles(const Environment &env, std::string_view dir) {
@@ -474,7 +486,8 @@ bool Centipede::RunBatch(
       }
     }
   }
-  corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling);
+  corpus_.UpdateWeights(fs_, coverage_frontier_, corpus_weight_method_,
+                        env_.exec_time_weight_scaling);
   return batch_gained_new_coverage;
 }
 
diff --git a/centipede/centipede.h b/centipede/centipede.h
index 24416c66d..8f8cf6658 100644
--- a/centipede/centipede.h
+++ b/centipede/centipede.h
@@ -185,6 +185,7 @@ class Centipede {
 
   FeatureSet fs_;
   Corpus corpus_;
+  Corpus::WeightMethod corpus_weight_method_;
   CoverageFrontier coverage_frontier_;
   size_t num_runs_ = 0;  // counts executed inputs
 
diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc
index 572e56d93..7f0410b1e 100644
--- a/centipede/centipede_flags.inc
+++ b/centipede/centipede_flags.inc
@@ -192,6 +192,9 @@ CENTIPEDE_FLAG(
     bool, use_corpus_weights, true,
     "If true, use weighted distribution when choosing the corpus element "
     "to mutate. This flag is mostly for Centipede developers.")
+CENTIPEDE_FLAG(std::string, corpus_weight_method, "rarity",
+               "The weight method to use on corpus. Available options are "
+               "`uniform`, `recency`, and `rarity` (fallback).")
 CENTIPEDE_FLAG(
     bool, exec_time_weight_scaling, true,
     "If true, scale the corpus weight by the execution time of each input.")
diff --git a/centipede/corpus.cc b/centipede/corpus.cc
index 4363b2464..a68a883b8 100644
--- a/centipede/corpus.cc
+++ b/centipede/corpus.cc
@@ -45,13 +45,12 @@ namespace fuzztest::internal {
 //                                  Corpus
 //------------------------------------------------------------------------------
 
-// Returns the weight of `fv` computed using `fs` and `coverage_frontier`.
-static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
-                            const CoverageFrontier &coverage_frontier) {
-  size_t weight = fs.ComputeWeight(fv);
+// Returns the weight of `fv` computed using `coverage_frontier`.
+static size_t ComputeFrontierWeight(const FeatureVec& fv,
+                                    const CoverageFrontier& coverage_frontier) {
   // The following is checking for the cases where PCTable is not present. In
   // such cases, we cannot use any ControlFlow related features.
-  if (coverage_frontier.MaxPcIndex() == 0) return weight;
+  if (coverage_frontier.MaxPcIndex() == 0) return 1;
   size_t frontier_weights_sum = 0;
   for (const auto feature : fv) {
     if (!feature_domains::kPCs.Contains(feature)) continue;
@@ -63,7 +62,7 @@ static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
       frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index);
     }
   }
-  return weight * (frontier_weights_sum + 1);  // Multiply by at least 1.
+  return frontier_weights_sum + 1;  // Multiply by at least 1.
 }
 
 std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
@@ -79,14 +78,27 @@ std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
 
 void Corpus::UpdateWeights(const FeatureSet& fs,
                            const CoverageFrontier& coverage_frontier,
-                           bool scale_by_exec_time) {
+                           WeightMethod method, bool scale_by_exec_time) {
   std::vector<double> weights;
   weights.resize(records_.size());
   for (size_t i = 0, n = records_.size(); i < n; ++i) {
     auto& record = records_[i];
     const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features);
     FUZZTEST_CHECK_EQ(unseen, 0);
-    weights[i] = fs.ComputeWeight(record.features);
+    switch (method) {
+      case WeightMethod::Uniform:
+        weights[i] = 1;
+        break;
+      case WeightMethod::Recency:
+        weights[i] = i + 1;
+        break;
+      case WeightMethod::Rarity:
+        weights[i] = fs.ComputeRarityWeight(record.features);
+        break;
+      default:
+        FUZZTEST_LOG(FATAL) << "Unknown corpus weight method";
+    }
+    weights[i] *= ComputeFrontierWeight(record.features, coverage_frontier);
   }
   if (scale_by_exec_time) {
     double total_exec_time_usec = 0;
@@ -199,7 +211,8 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv,
       << "Got request to add empty element to corpus: ignoring";
   FUZZTEST_CHECK_EQ(records_.size(), weighted_distribution_.size());
   records_.push_back({data, fv, metadata, stats});
-  weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier));
+  // Will be updated by `UpdateWeights`.
+  weighted_distribution_.AddWeight(0);
 }
 
 const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const {
diff --git a/centipede/corpus.h b/centipede/corpus.h
index 07164663e..b1ef10104 100644
--- a/centipede/corpus.h
+++ b/centipede/corpus.h
@@ -98,6 +98,16 @@ struct CorpusRecord {
 // Allows to prune (forget) inputs that become uninteresting.
 class Corpus {
  public:
+  enum class WeightMethod {
+    Uniform,
+    Recency,
+    Rarity,
+  };
+
+  static constexpr std::string_view kWeightMethodNameForUniform = "uniform";
+  static constexpr std::string_view kWeightMethodNameForRecency = "recency";
+  static constexpr std::string_view kWeightMethodNameForRarity = "rarity";
+
   Corpus() = default;
 
   Corpus(const Corpus &) = default;
@@ -120,12 +130,12 @@ class Corpus {
   // Returns the number of removed elements.
   size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier,
                size_t max_corpus_size, Rng &rng);
-  // Updates the corpus weights according to `fs` and `coverage_frontier`. If
-  // `scale_by_exec_time` is set, scales the weights by the corpus execution
-  // time relative to the average.
+  // Updates the corpus weights according to `fs` and `coverage_frontier` using
+  // the weight `method`. If `scale_by_exec_time` is set, scales the weights by
+  // the corpus execution time relative to the average.
   void UpdateWeights(const FeatureSet& fs,
                      const CoverageFrontier& coverage_frontier,
-                     bool scale_by_exec_time);
+                     WeightMethod method, bool scale_by_exec_time);
 
   // Accessors.
 
diff --git a/centipede/corpus_test.cc b/centipede/corpus_test.cc
index a75ddb0be..58a2b8293 100644
--- a/centipede/corpus_test.cc
+++ b/centipede/corpus_test.cc
@@ -114,7 +114,8 @@ TEST(Corpus, Prune) {
   Add({{2}, {30, 40}});
   Add({{3}, {40, 50}});
   Add({{4}, {10, 20}});
-  corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
+  corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
+                       /*scale_by_exec_time=*/false);
 
   // Prune. Features 20 and 40 are frequent => input {0} will be removed.
   EXPECT_EQ(corpus.NumActive(), 5);
@@ -124,7 +125,8 @@ TEST(Corpus, Prune) {
   VerifyActiveInputs({{1}, {2}, {3}, {4}});
 
   Add({{5}, {30, 60}});
-  corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
+  corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
+                       /*scale_by_exec_time=*/false);
 
   EXPECT_EQ(corpus.NumTotal(), 6);
   // Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed.
@@ -181,14 +183,16 @@ TEST(Corpus, ScalesWeightsWithExecTime) {
   };
 
   // The weights should be equal without exec time scaling.
-  corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
+  corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
+                       /*scale_by_exec_time=*/false);
   ComputeFreq();
   EXPECT_NEAR(freq[0], kNumIter / 3, 100);
   EXPECT_NEAR(freq[1], kNumIter / 3, 100);
   EXPECT_NEAR(freq[2], kNumIter / 3, 100);
 
   // The weights should favor {0} over {1} over {2} with exec time scaling.
-  corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true);
+  corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
+                       /*scale_by_exec_time=*/true);
   ComputeFreq();
   EXPECT_GT(freq[0], freq[1] + 100);
   EXPECT_GT(freq[1], freq[2] + 100);
diff --git a/centipede/feature_set.cc b/centipede/feature_set.cc
index 6806b2f94..2daf4cec5 100644
--- a/centipede/feature_set.cc
+++ b/centipede/feature_set.cc
@@ -139,8 +139,9 @@ void FeatureSet::MergeFeatures(const FeatureVec& features) {
 }
 
 __attribute__((noinline))  // to see it in profile.
-uint64_t
-FeatureSet::ComputeWeight(const FeatureVec &features) const {
+double FeatureSet::ComputeRarityWeight(const FeatureVec& features) const {
+  // Use uint64_t to keep the previous behavior. Maybe we want to switch it to
+  // double later.
   uint64_t weight = 0;
   for (auto feature : features) {
     // The less frequent is the feature, the more valuable it is.
diff --git a/centipede/feature_set.h b/centipede/feature_set.h
index 7e85dfef4..beaa1c51b 100644
--- a/centipede/feature_set.h
+++ b/centipede/feature_set.h
@@ -94,10 +94,10 @@ class FeatureSet {
     return frequencies_[feature];
   }
 
-  // Computes combined weight of `features`.
-  // The less frequent the feature is, the bigger its weight.
-  // The weight of a FeatureVec is a sum of individual feature weights.
-  uint64_t ComputeWeight(const FeatureVec &features) const;
+  // Computes combined weight of `features` based on the feature rarity that
+  // scales linearly. The less frequent the feature is, the bigger its
+  // weight. The weight of a FeatureVec is a sum of individual feature weights.
+  double ComputeRarityWeight(const FeatureVec& features) const;
 
   // Returns a debug string representing the state of *this.
   std::string DebugString() const;
diff --git a/centipede/feature_set_test.cc b/centipede/feature_set_test.cc
index 9a81d2d73..eb954397d 100644
--- a/centipede/feature_set_test.cc
+++ b/centipede/feature_set_test.cc
@@ -27,8 +27,8 @@ namespace {
 TEST(FeatureSet, ComputeWeight) {
   FeatureSet feature_set(10, {});
 
-  auto W = [&](const FeatureVec &features) -> uint64_t {
-    return feature_set.ComputeWeight(features);
+  auto W = [&](const FeatureVec& features) -> uint64_t {
+    return feature_set.ComputeRarityWeight(features);
   };
 
   feature_set.MergeFeatures({1, 2, 3});
@@ -60,8 +60,8 @@ TEST(FeatureSet, ComputeWeightWithDifferentDomains) {
                              /* three features from domain #3 */ f3, f3 + 1,
                              f3 + 2});
 
-  auto weight = [&](const FeatureVec &features) -> uint64_t {
-    return feature_set.ComputeWeight(features);
+  auto weight = [&](const FeatureVec& features) -> uint64_t {
+    return feature_set.ComputeRarityWeight(features);
   };
 
   // Test that features from a less frequent domain have more weight.