Skip to content

Commit e3f6658

Browse files
xinhaoyuancopybara-github
authored andcommitted
Add various weight computation methods.
PiperOrigin-RevId: 828592996
1 parent 1c3e6d1 commit e3f6658

File tree

9 files changed

+146
-28
lines changed

9 files changed

+146
-28
lines changed

centipede/centipede.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#include "./centipede/centipede_callbacks.h"
7777
#include "./centipede/command.h"
7878
#include "./centipede/control_flow.h"
79+
#include "./centipede/corpus.h"
7980
#include "./centipede/corpus_io.h"
8081
#include "./centipede/coverage.h"
8182
#include "./centipede/environment.h"
@@ -133,6 +134,17 @@ Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
133134
FUZZTEST_CHECK(env_.seed) << "env_.seed must not be zero";
134135
if (!env_.input_filter.empty() && env_.fork_server)
135136
input_filter_cmd_.StartForkServer(TemporaryLocalDirPath(), "input_filter");
137+
if (env_.corpus_weight_method == Corpus::kWeightMethodNameForUniform) {
138+
corpus_weight_method_ = Corpus::WeightMethod::Uniform;
139+
} else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRecency) {
140+
corpus_weight_method_ = Corpus::WeightMethod::Recency;
141+
} else if (env_.corpus_weight_method ==
142+
Corpus::kWeightMethodNameForFeatureRarity) {
143+
corpus_weight_method_ = Corpus::WeightMethod::FeatureRarity;
144+
} else {
145+
FUZZTEST_LOG(FATAL) << "Unknown corpus weight method "
146+
<< env_.corpus_weight_method;
147+
}
136148
}
137149

138150
void Centipede::CorpusToFiles(const Environment &env, std::string_view dir) {
@@ -474,7 +486,8 @@ bool Centipede::RunBatch(
474486
}
475487
}
476488
}
477-
corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling);
489+
corpus_.UpdateWeights(fs_, coverage_frontier_, corpus_weight_method_,
490+
env_.exec_time_weight_scaling);
478491
return batch_gained_new_coverage;
479492
}
480493

centipede/centipede.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ class Centipede {
185185

186186
FeatureSet fs_;
187187
Corpus corpus_;
188+
Corpus::WeightMethod corpus_weight_method_;
188189
CoverageFrontier coverage_frontier_;
189190
size_t num_runs_ = 0; // counts executed inputs
190191

centipede/centipede_flags.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ CENTIPEDE_FLAG(
192192
bool, use_corpus_weights, true,
193193
"If true, use weighted distribution when choosing the corpus element "
194194
"to mutate. This flag is mostly for Centipede developers.")
195+
CENTIPEDE_FLAG(std::string, corpus_weight_method, "feature_rarity",
196+
"The weight method to use on corpus. Available options are "
197+
"`uniform`, `recency`, and `feature_rarity` (default).")
195198
CENTIPEDE_FLAG(
196199
bool, exec_time_weight_scaling, true,
197200
"If true, scale the corpus weight by the execution time of each input.")

centipede/corpus.cc

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,12 @@ namespace fuzztest::internal {
4545
// Corpus
4646
//------------------------------------------------------------------------------
4747

48-
// Returns the weight of `fv` computed using `fs` and `coverage_frontier`.
49-
static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
50-
const CoverageFrontier &coverage_frontier) {
51-
size_t weight = fs.ComputeWeight(fv);
48+
// Returns the weight of `fv` computed using `coverage_frontier`.
49+
static size_t ComputeFrontierWeight(const FeatureVec& fv,
50+
const CoverageFrontier& coverage_frontier) {
5251
// The following is checking for the cases where PCTable is not present. In
5352
// such cases, we cannot use any ControlFlow related features.
54-
if (coverage_frontier.MaxPcIndex() == 0) return weight;
53+
if (coverage_frontier.MaxPcIndex() == 0) return 1;
5554
size_t frontier_weights_sum = 0;
5655
for (const auto feature : fv) {
5756
if (!feature_domains::kPCs.Contains(feature)) continue;
@@ -63,7 +62,7 @@ static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
6362
frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index);
6463
}
6564
}
66-
return weight * (frontier_weights_sum + 1); // Multiply by at least 1.
65+
return frontier_weights_sum + 1; // Multiply by at least 1.
6766
}
6867

6968
std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
@@ -79,14 +78,31 @@ std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
7978

8079
void Corpus::UpdateWeights(const FeatureSet& fs,
8180
const CoverageFrontier& coverage_frontier,
82-
bool scale_by_exec_time) {
81+
WeightMethod method, bool scale_by_exec_time) {
8382
std::vector<double> weights;
8483
weights.resize(records_.size());
8584
for (size_t i = 0, n = records_.size(); i < n; ++i) {
8685
auto& record = records_[i];
8786
const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features);
8887
FUZZTEST_CHECK_EQ(unseen, 0);
89-
weights[i] = fs.ComputeWeight(record.features);
88+
if (record.features.empty()) {
89+
weights[i] = 0;
90+
continue;
91+
}
92+
switch (method) {
93+
case WeightMethod::Uniform:
94+
weights[i] = 1;
95+
break;
96+
case WeightMethod::Recency:
97+
weights[i] = i + 1;
98+
break;
99+
case WeightMethod::FeatureRarity:
100+
weights[i] = fs.ComputeRarityWeight(record.features);
101+
break;
102+
default:
103+
FUZZTEST_LOG(FATAL) << "Unknown corpus weight method";
104+
}
105+
weights[i] *= ComputeFrontierWeight(record.features, coverage_frontier);
90106
}
91107
if (scale_by_exec_time) {
92108
double total_exec_time_usec = 0;
@@ -206,7 +222,8 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv,
206222
<< "Got request to add empty element to corpus: ignoring";
207223
FUZZTEST_CHECK_EQ(records_.size(), weighted_distribution_.size());
208224
records_.push_back({data, fv, metadata, stats});
209-
weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier));
225+
// Will be updated by `UpdateWeights`.
226+
weighted_distribution_.AddWeight(0);
210227
}
211228

212229
const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const {

centipede/corpus.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@ struct CorpusRecord {
9898
// Allows to prune (forget) inputs that become uninteresting.
9999
class Corpus {
100100
public:
101+
enum class WeightMethod {
102+
Uniform,
103+
Recency,
104+
FeatureRarity,
105+
};
106+
107+
static constexpr std::string_view kWeightMethodNameForUniform = "uniform";
108+
static constexpr std::string_view kWeightMethodNameForRecency = "recency";
109+
static constexpr std::string_view kWeightMethodNameForFeatureRarity =
110+
"feature_rarity";
111+
101112
Corpus() = default;
102113

103114
Corpus(const Corpus &) = default;
@@ -120,12 +131,12 @@ class Corpus {
120131
// Returns the number of removed elements.
121132
size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier,
122133
size_t max_corpus_size, Rng &rng);
123-
// Updates the corpus weights according to `fs` and `coverage_frontier`. If
124-
// `scale_by_exec_time` is set, scales the weights by the corpus execution
125-
// time relative to the average.
134+
// Updates the corpus weights according to `fs` and `coverage_frontier` using
135+
// the weight `method`. If `scale_by_exec_time` is set, scales the weights by
136+
// the corpus execution time relative to the average.
126137
void UpdateWeights(const FeatureSet& fs,
127138
const CoverageFrontier& coverage_frontier,
128-
bool scale_by_exec_time);
139+
WeightMethod method, bool scale_by_exec_time);
129140

130141
// Accessors.
131142

centipede/corpus_test.cc

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ TEST(Corpus, Prune) {
114114
Add({{2}, {30, 40}});
115115
Add({{3}, {40, 50}});
116116
Add({{4}, {10, 20}});
117-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
117+
corpus.UpdateWeights(fs, coverage_frontier,
118+
Corpus::WeightMethod::FeatureRarity,
119+
/*scale_by_exec_time=*/false);
118120

119121
// Prune. Features 20 and 40 are frequent => input {0} will be removed.
120122
EXPECT_EQ(corpus.NumActive(), 5);
@@ -124,7 +126,9 @@ TEST(Corpus, Prune) {
124126
VerifyActiveInputs({{1}, {2}, {3}, {4}});
125127

126128
Add({{5}, {30, 60}});
127-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
129+
corpus.UpdateWeights(fs, coverage_frontier,
130+
Corpus::WeightMethod::FeatureRarity,
131+
/*scale_by_exec_time=*/false);
128132

129133
EXPECT_EQ(corpus.NumTotal(), 6);
130134
// Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed.
@@ -145,6 +149,64 @@ TEST(Corpus, Prune) {
145149
EXPECT_EQ(corpus.NumTotal(), 6);
146150
}
147151

152+
TEST(Corpus, WeightMethodsWorkAsExpected) {
153+
PCTable pc_table(100);
154+
CFTable cf_table(100);
155+
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
156+
CoverageFrontier coverage_frontier(bin_info);
157+
FeatureSet fs(3, {});
158+
Corpus corpus;
159+
160+
auto Add = [&](const CorpusRecord& record) {
161+
fs.MergeFeatures(record.features);
162+
corpus.Add(record.data, record.features, /*metadata=*/{}, /*stats=*/{}, fs,
163+
coverage_frontier);
164+
};
165+
166+
Add({/*data=*/{0}, /*features=*/{30, 20}});
167+
Add({/*data=*/{1}, /*features=*/{10, 20}});
168+
Add({/*data=*/{2}, /*features=*/{10}});
169+
170+
constexpr int kNumIter = 10000;
171+
std::vector<uint64_t> freq;
172+
173+
Rng rng;
174+
auto ComputeFreq = [&]() {
175+
freq.clear();
176+
freq.resize(corpus.NumActive());
177+
for (int i = 0; i < kNumIter; i++) {
178+
const auto& record = corpus.WeightedRandom(rng);
179+
const auto id = record.data[0];
180+
ASSERT_LT(id, freq.size());
181+
freq[id]++;
182+
}
183+
};
184+
185+
// The weights should be equal with the uniform method
186+
corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Uniform,
187+
/*scale_by_exec_time=*/false);
188+
ComputeFreq();
189+
EXPECT_NEAR(freq[0], kNumIter / 3, 100);
190+
EXPECT_NEAR(freq[1], kNumIter / 3, 100);
191+
EXPECT_NEAR(freq[2], kNumIter / 3, 100);
192+
193+
// The weights should favor {2} over {1} over {0} with the recency method.
194+
corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Recency,
195+
/*scale_by_exec_time=*/false);
196+
ComputeFreq();
197+
EXPECT_GT(freq[2], freq[1] + 100);
198+
EXPECT_GT(freq[1], freq[0] + 100);
199+
200+
// The weights should favor {0} over {1} over {2} with the feature rarity
201+
// method.
202+
corpus.UpdateWeights(fs, coverage_frontier,
203+
Corpus::WeightMethod::FeatureRarity,
204+
/*scale_by_exec_time=*/false);
205+
ComputeFreq();
206+
EXPECT_GT(freq[0], freq[1] + 100);
207+
EXPECT_GT(freq[1], freq[2] + 100);
208+
}
209+
148210
TEST(Corpus, ScalesWeightsWithExecTime) {
149211
PCTable pc_table(100);
150212
CFTable cf_table(100);
@@ -181,14 +243,18 @@ TEST(Corpus, ScalesWeightsWithExecTime) {
181243
};
182244

183245
// The weights should be equal without exec time scaling.
184-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
246+
corpus.UpdateWeights(fs, coverage_frontier,
247+
Corpus::WeightMethod::FeatureRarity,
248+
/*scale_by_exec_time=*/false);
185249
ComputeFreq();
186250
EXPECT_NEAR(freq[0], kNumIter / 3, 100);
187251
EXPECT_NEAR(freq[1], kNumIter / 3, 100);
188252
EXPECT_NEAR(freq[2], kNumIter / 3, 100);
189253

190254
// The weights should favor {0} over {1} over {2} with exec time scaling.
191-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true);
255+
corpus.UpdateWeights(fs, coverage_frontier,
256+
Corpus::WeightMethod::FeatureRarity,
257+
/*scale_by_exec_time=*/true);
192258
ComputeFreq();
193259
EXPECT_GT(freq[0], freq[1] + 100);
194260
EXPECT_GT(freq[1], freq[2] + 100);
@@ -208,6 +274,9 @@ TEST(Corpus, PruneCorpusWithAllEmptyFeatureInputs) {
208274
coverage_frontier);
209275
corpus.Add(/*data=*/{2}, /*fv=*/{}, /*metadata=*/{}, /*stats=*/{}, fs,
210276
coverage_frontier);
277+
corpus.UpdateWeights(fs, coverage_frontier,
278+
Corpus::WeightMethod::FeatureRarity,
279+
/*scale_by_exec_time=*/false);
211280
// Should not crash.
212281
corpus.Prune(fs, coverage_frontier, max_corpus_size, rng);
213282
}
@@ -231,6 +300,9 @@ TEST(Corpus, PruneRegressionTest1) {
231300

232301
Add({{1}, {10, 20}});
233302
Add({{2}, {10}});
303+
corpus.UpdateWeights(fs, coverage_frontier,
304+
Corpus::WeightMethod::FeatureRarity,
305+
/*scale_by_exec_time=*/false);
234306
corpus.Prune(fs, coverage_frontier, max_corpus_size, rng);
235307
}
236308

centipede/feature_set.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,9 @@ void FeatureSet::MergeFeatures(const FeatureVec& features) {
139139
}
140140

141141
__attribute__((noinline)) // to see it in profile.
142-
uint64_t
143-
FeatureSet::ComputeWeight(const FeatureVec &features) const {
142+
double FeatureSet::ComputeRarityWeight(const FeatureVec& features) const {
143+
// Use uint64_t to keep the previous behavior. Maybe we want to switch it to
144+
// double later.
144145
uint64_t weight = 0;
145146
for (auto feature : features) {
146147
// The less frequent is the feature, the more valuable it is.

centipede/feature_set.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ class FeatureSet {
9494
return frequencies_[feature];
9595
}
9696

97-
// Computes combined weight of `features`.
98-
// The less frequent the feature is, the bigger its weight.
99-
// The weight of a FeatureVec is a sum of individual feature weights.
100-
uint64_t ComputeWeight(const FeatureVec &features) const;
97+
// Computes combined weight of `features` based on the feature rarity that
98+
// scales linearly. The less frequent the feature is, the bigger its
99+
// weight. The weight of a FeatureVec is a sum of individual feature weights.
100+
double ComputeRarityWeight(const FeatureVec& features) const;
101101

102102
// Returns a debug string representing the state of *this.
103103
std::string DebugString() const;

centipede/feature_set_test.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ namespace {
2727
TEST(FeatureSet, ComputeWeight) {
2828
FeatureSet feature_set(10, {});
2929

30-
auto W = [&](const FeatureVec &features) -> uint64_t {
31-
return feature_set.ComputeWeight(features);
30+
auto W = [&](const FeatureVec& features) -> uint64_t {
31+
return feature_set.ComputeRarityWeight(features);
3232
};
3333

3434
feature_set.MergeFeatures({1, 2, 3});
@@ -60,8 +60,8 @@ TEST(FeatureSet, ComputeWeightWithDifferentDomains) {
6060
/* three features from domain #3 */ f3, f3 + 1,
6161
f3 + 2});
6262

63-
auto weight = [&](const FeatureVec &features) -> uint64_t {
64-
return feature_set.ComputeWeight(features);
63+
auto weight = [&](const FeatureVec& features) -> uint64_t {
64+
return feature_set.ComputeRarityWeight(features);
6565
};
6666

6767
// Test that features from a less frequent domain have more weight.

0 commit comments

Comments
 (0)