From 6c4c36e401b60d718630e83a6b57f685128d9cc6 Mon Sep 17 00:00:00 2001 From: Xinhao Yuan Date: Wed, 5 Nov 2025 12:59:11 -0800 Subject: [PATCH] No public description PiperOrigin-RevId: 828592996 --- centipede/centipede.cc | 15 ++++++++++++++- centipede/centipede.h | 1 + centipede/centipede_flags.inc | 3 +++ centipede/corpus.cc | 31 ++++++++++++++++++++++--------- centipede/corpus.h | 18 ++++++++++++++---- centipede/corpus_test.cc | 12 ++++++++---- centipede/feature_set.cc | 5 +++-- centipede/feature_set.h | 8 ++++---- centipede/feature_set_test.cc | 8 ++++---- 9 files changed, 73 insertions(+), 28 deletions(-) diff --git a/centipede/centipede.cc b/centipede/centipede.cc index bb3793f4f..ae6ec826a 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc @@ -133,6 +133,18 @@ Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, FUZZTEST_CHECK(env_.seed) << "env_.seed must not be zero"; if (!env_.input_filter.empty() && env_.fork_server) input_filter_cmd_.StartForkServer(TemporaryLocalDirPath(), "input_filter"); + if (env_.corpus_weight_method == Corpus::kWeightMethodNameForUniform) { + corpus_weight_method_ = Corpus::WeightMethod::Uniform; + } else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRecency) { + corpus_weight_method_ = Corpus::WeightMethod::Recency; + } else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRarity) { + corpus_weight_method_ = Corpus::WeightMethod::Rarity; + } else { + FUZZTEST_LOG(WARNING) << "Unknown corpus weight method " + << env_.corpus_weight_method << " - fall back to " + << Corpus::kWeightMethodNameForRarity; + corpus_weight_method_ = Corpus::WeightMethod::Rarity; + } } void Centipede::CorpusToFiles(const Environment &env, std::string_view dir) { @@ -474,7 +486,8 @@ bool Centipede::RunBatch( } } } - corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling); + corpus_.UpdateWeights(fs_, coverage_frontier_, corpus_weight_method_, + env_.exec_time_weight_scaling); return batch_gained_new_coverage; } diff --git a/centipede/centipede.h b/centipede/centipede.h index 24416c66d..8f8cf6658 100644 --- a/centipede/centipede.h +++ b/centipede/centipede.h @@ -185,6 +185,7 @@ class Centipede { FeatureSet fs_; Corpus corpus_; + Corpus::WeightMethod corpus_weight_method_; CoverageFrontier coverage_frontier_; size_t num_runs_ = 0; // counts executed inputs diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc index 572e56d93..7f0410b1e 100644 --- a/centipede/centipede_flags.inc +++ b/centipede/centipede_flags.inc @@ -192,6 +192,9 @@ CENTIPEDE_FLAG( bool, use_corpus_weights, true, "If true, use weighted distribution when choosing the corpus element " "to mutate. This flag is mostly for Centipede developers.") +CENTIPEDE_FLAG(std::string, corpus_weight_method, "rarity", + "The weight method to use on corpus. Available options are " + "`uniform`, `recency`, and `rarity` (fallback).") CENTIPEDE_FLAG( bool, exec_time_weight_scaling, true, "If true, scale the corpus weight by the execution time of each input.") diff --git a/centipede/corpus.cc b/centipede/corpus.cc index 4363b2464..a68a883b8 100644 --- a/centipede/corpus.cc +++ b/centipede/corpus.cc @@ -45,13 +45,12 @@ namespace fuzztest::internal { // Corpus //------------------------------------------------------------------------------ -// Returns the weight of `fv` computed using `fs` and `coverage_frontier`. -static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs, - const CoverageFrontier &coverage_frontier) { - size_t weight = fs.ComputeWeight(fv); +// Returns the weight of `fv` computed using `coverage_frontier`. +static size_t ComputeFrontierWeight(const FeatureVec& fv, + const CoverageFrontier& coverage_frontier) { // The following is checking for the cases where PCTable is not present. In // such cases, we cannot use any ControlFlow related features. - if (coverage_frontier.MaxPcIndex() == 0) return weight; + if (coverage_frontier.MaxPcIndex() == 0) return 1; size_t frontier_weights_sum = 0; for (const auto feature : fv) { if (!feature_domains::kPCs.Contains(feature)) continue; @@ -63,7 +62,7 @@ static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs, frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index); } } - return weight * (frontier_weights_sum + 1); // Multiply by at least 1. + return frontier_weights_sum + 1; // Multiply by at least 1. } std::pair Corpus::MaxAndAvgSize() const { @@ -79,14 +78,27 @@ std::pair Corpus::MaxAndAvgSize() const { void Corpus::UpdateWeights(const FeatureSet& fs, const CoverageFrontier& coverage_frontier, - bool scale_by_exec_time) { + WeightMethod method, bool scale_by_exec_time) { std::vector weights; weights.resize(records_.size()); for (size_t i = 0, n = records_.size(); i < n; ++i) { auto& record = records_[i]; const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features); FUZZTEST_CHECK_EQ(unseen, 0); - weights[i] = fs.ComputeWeight(record.features); + switch (method) { + case WeightMethod::Uniform: + weights[i] = 1; + break; + case WeightMethod::Recency: + weights[i] = i + 1; + break; + case WeightMethod::Rarity: + weights[i] = fs.ComputeRarityWeight(record.features); + break; + default: + FUZZTEST_LOG(FATAL) << "Unknown corpus weight method"; + } + weights[i] *= ComputeFrontierWeight(record.features, coverage_frontier); } if (scale_by_exec_time) { double total_exec_time_usec = 0; @@ -199,7 +211,8 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv, << "Got request to add empty element to corpus: ignoring"; FUZZTEST_CHECK_EQ(records_.size(), weighted_distribution_.size()); records_.push_back({data, fv, metadata, stats}); - weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier)); + // Will be updated by `UpdateWeights`. + weighted_distribution_.AddWeight(0); } const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const { diff --git a/centipede/corpus.h b/centipede/corpus.h index 07164663e..b1ef10104 100644 --- a/centipede/corpus.h +++ b/centipede/corpus.h @@ -98,6 +98,16 @@ struct CorpusRecord { // Allows to prune (forget) inputs that become uninteresting. class Corpus { public: + enum class WeightMethod { + Uniform, + Recency, + Rarity, + }; + + static constexpr std::string_view kWeightMethodNameForUniform = "uniform"; + static constexpr std::string_view kWeightMethodNameForRecency = "recency"; + static constexpr std::string_view kWeightMethodNameForRarity = "rarity"; + Corpus() = default; Corpus(const Corpus &) = default; @@ -120,12 +130,12 @@ class Corpus { // Returns the number of removed elements. size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier, size_t max_corpus_size, Rng &rng); - // Updates the corpus weights according to `fs` and `coverage_frontier`. If - // `scale_by_exec_time` is set, scales the weights by the corpus execution - // time relative to the average. + // Updates the corpus weights according to `fs` and `coverage_frontier` using + // the weight `method`. If `scale_by_exec_time` is set, scales the weights by + // the corpus execution time relative to the average. void UpdateWeights(const FeatureSet& fs, const CoverageFrontier& coverage_frontier, - bool scale_by_exec_time); + WeightMethod method, bool scale_by_exec_time); // Accessors. diff --git a/centipede/corpus_test.cc b/centipede/corpus_test.cc index a75ddb0be..58a2b8293 100644 --- a/centipede/corpus_test.cc +++ b/centipede/corpus_test.cc @@ -114,7 +114,8 @@ TEST(Corpus, Prune) { Add({{2}, {30, 40}}); Add({{3}, {40, 50}}); Add({{4}, {10, 20}}); - corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity, + /*scale_by_exec_time=*/false); // Prune. Features 20 and 40 are frequent => input {0} will be removed. EXPECT_EQ(corpus.NumActive(), 5); @@ -124,7 +125,8 @@ TEST(Corpus, Prune) { VerifyActiveInputs({{1}, {2}, {3}, {4}}); Add({{5}, {30, 60}}); - corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity, + /*scale_by_exec_time=*/false); EXPECT_EQ(corpus.NumTotal(), 6); // Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed. @@ -181,14 +183,16 @@ TEST(Corpus, ScalesWeightsWithExecTime) { }; // The weights should be equal without exec time scaling. - corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity, + /*scale_by_exec_time=*/false); ComputeFreq(); EXPECT_NEAR(freq[0], kNumIter / 3, 100); EXPECT_NEAR(freq[1], kNumIter / 3, 100); EXPECT_NEAR(freq[2], kNumIter / 3, 100); // The weights should favor {0} over {1} over {2} with exec time scaling. - corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true); + corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity, + /*scale_by_exec_time=*/true); ComputeFreq(); EXPECT_GT(freq[0], freq[1] + 100); EXPECT_GT(freq[1], freq[2] + 100); diff --git a/centipede/feature_set.cc b/centipede/feature_set.cc index 6806b2f94..2daf4cec5 100644 --- a/centipede/feature_set.cc +++ b/centipede/feature_set.cc @@ -139,8 +139,9 @@ void FeatureSet::MergeFeatures(const FeatureVec& features) { } __attribute__((noinline)) // to see it in profile. -uint64_t -FeatureSet::ComputeWeight(const FeatureVec &features) const { +double FeatureSet::ComputeRarityWeight(const FeatureVec& features) const { + // Use uint64_t to keep the previous behavior. Maybe we want to switch it to + // double later. uint64_t weight = 0; for (auto feature : features) { // The less frequent is the feature, the more valuable it is. diff --git a/centipede/feature_set.h b/centipede/feature_set.h index 7e85dfef4..beaa1c51b 100644 --- a/centipede/feature_set.h +++ b/centipede/feature_set.h @@ -94,10 +94,10 @@ class FeatureSet { return frequencies_[feature]; } - // Computes combined weight of `features`. - // The less frequent the feature is, the bigger its weight. - // The weight of a FeatureVec is a sum of individual feature weights. - uint64_t ComputeWeight(const FeatureVec &features) const; + // Computes combined weight of `features` based on the feature rarity that + // scales linearly. The less frequent the feature is, the bigger its + // weight. The weight of a FeatureVec is a sum of individual feature weights. + double ComputeRarityWeight(const FeatureVec& features) const; // Returns a debug string representing the state of *this. std::string DebugString() const; diff --git a/centipede/feature_set_test.cc b/centipede/feature_set_test.cc index 9a81d2d73..eb954397d 100644 --- a/centipede/feature_set_test.cc +++ b/centipede/feature_set_test.cc @@ -27,8 +27,8 @@ namespace { TEST(FeatureSet, ComputeWeight) { FeatureSet feature_set(10, {}); - auto W = [&](const FeatureVec &features) -> uint64_t { - return feature_set.ComputeWeight(features); + auto W = [&](const FeatureVec& features) -> uint64_t { + return feature_set.ComputeRarityWeight(features); }; feature_set.MergeFeatures({1, 2, 3}); @@ -60,8 +60,8 @@ TEST(FeatureSet, ComputeWeightWithDifferentDomains) { /* three features from domain #3 */ f3, f3 + 1, f3 + 2}); - auto weight = [&](const FeatureVec &features) -> uint64_t { - return feature_set.ComputeWeight(features); + auto weight = [&](const FeatureVec& features) -> uint64_t { + return feature_set.ComputeRarityWeight(features); }; // Test that features from a less frequent domain have more weight.