Skip to content

Commit 8c0b3cc

Browse files
xinhaoyuancopybara-github
authored andcommitted
No public description
PiperOrigin-RevId: 828592996
1 parent 775126c commit 8c0b3cc

File tree

9 files changed

+96
-32
lines changed

9 files changed

+96
-32
lines changed

centipede/centipede.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,18 @@ Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
133133
FUZZTEST_CHECK(env_.seed) << "env_.seed must not be zero";
134134
if (!env_.input_filter.empty() && env_.fork_server)
135135
input_filter_cmd_.StartForkServer(TemporaryLocalDirPath(), "input_filter");
136+
if (env_.corpus_weight_method == Corpus::kWeightMethodNameForUniform) {
137+
corpus_weight_method_ = Corpus::WeightMethod::Uniform;
138+
} else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRecency) {
139+
corpus_weight_method_ = Corpus::WeightMethod::Recency;
140+
} else if (env_.corpus_weight_method == Corpus::kWeightMethodNameForRarity) {
141+
corpus_weight_method_ = Corpus::WeightMethod::Rarity;
142+
} else {
143+
FUZZTEST_LOG(WARNING) << "Unknown corpus weight method "
144+
<< env_.corpus_weight_method << " - fall back to "
145+
<< Corpus::kWeightMethodNameForRarity;
146+
corpus_weight_method_ = Corpus::WeightMethod::Rarity;
147+
}
136148
}
137149

138150
void Centipede::CorpusToFiles(const Environment &env, std::string_view dir) {
@@ -474,7 +486,8 @@ bool Centipede::RunBatch(
474486
}
475487
}
476488
}
477-
corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling);
489+
corpus_.UpdateWeights(fs_, coverage_frontier_, corpus_weight_method_,
490+
env_.exec_time_weight_scaling);
478491
return batch_gained_new_coverage;
479492
}
480493

centipede/centipede.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ class Centipede {
185185

186186
FeatureSet fs_;
187187
Corpus corpus_;
188+
Corpus::WeightMethod corpus_weight_method_;
188189
CoverageFrontier coverage_frontier_;
189190
size_t num_runs_ = 0; // counts executed inputs
190191

centipede/centipede_flags.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ CENTIPEDE_FLAG(
192192
bool, use_corpus_weights, true,
193193
"If true, use weighted distribution when choosing the corpus element "
194194
"to mutate. This flag is mostly for Centipede developers.")
195+
CENTIPEDE_FLAG(std::string, corpus_weight_method, "rarity",
196+
"The weight method to use on corpus. Available options are "
197+
"`uniform`, `recency`, and `rarity` (fallback).")
195198
CENTIPEDE_FLAG(
196199
bool, exec_time_weight_scaling, true,
197200
"If true, scale the corpus weight by the execution time of each input.")

centipede/corpus.cc

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,15 @@ namespace fuzztest::internal {
4545
// Corpus
4646
//------------------------------------------------------------------------------
4747

48-
// Returns the weight of `fv` computed using `fs` and `coverage_frontier`.
49-
static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
50-
const CoverageFrontier &coverage_frontier) {
51-
size_t weight = fs.ComputeWeight(fv);
48+
// Returns the weight of `fv` computed using `coverage_frontier`.
49+
static size_t ComputeFrontierWeight(const FeatureVec& fv,
50+
const CoverageFrontier& coverage_frontier) {
5251
// The following is checking for the cases where PCTable is not present. In
5352
// such cases, we cannot use any ControlFlow related features.
54-
if (coverage_frontier.MaxPcIndex() == 0) return weight;
53+
if (coverage_frontier.MaxPcIndex() == 0) return 1;
5554
size_t frontier_weights_sum = 0;
56-
for (const auto feature : fv) {
55+
for (size_t i = 0; i < fv.size(); ++i) {
56+
const auto feature = fv[i];
5757
if (!feature_domains::kPCs.Contains(feature)) continue;
5858
const auto pc_index = ConvertPCFeatureToPcIndex(feature);
5959
// Avoid checking frontier for out-of-bounds indices.
@@ -63,7 +63,7 @@ static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
6363
frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index);
6464
}
6565
}
66-
return weight * (frontier_weights_sum + 1); // Multiply by at least 1.
66+
return frontier_weights_sum + 1; // Multiply by at least 1.
6767
}
6868

6969
std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
@@ -79,14 +79,27 @@ std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
7979

8080
void Corpus::UpdateWeights(const FeatureSet& fs,
8181
const CoverageFrontier& coverage_frontier,
82-
bool scale_by_exec_time) {
82+
WeightMethod method, bool scale_by_exec_time) {
8383
std::vector<double> weights;
8484
weights.resize(records_.size());
8585
for (size_t i = 0, n = records_.size(); i < n; ++i) {
8686
auto& record = records_[i];
8787
const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features);
8888
FUZZTEST_CHECK_EQ(unseen, 0);
89-
weights[i] = fs.ComputeWeight(record.features);
89+
switch (method) {
90+
case WeightMethod::Uniform:
91+
weights[i] = 1;
92+
break;
93+
case WeightMethod::Recency:
94+
weights[i] = i;
95+
break;
96+
case WeightMethod::Rarity:
97+
weights[i] = fs.ComputeRarityWeight(record.features);
98+
break;
99+
default:
100+
FUZZTEST_LOG(FATAL) << "Unknown corpus weight method";
101+
}
102+
weights[i] *= ComputeFrontierWeight(record.features, coverage_frontier);
90103
}
91104
if (scale_by_exec_time) {
92105
double total_exec_time_usec = 0;
@@ -199,7 +212,8 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv,
199212
<< "Got request to add empty element to corpus: ignoring";
200213
FUZZTEST_CHECK_EQ(records_.size(), weighted_distribution_.size());
201214
records_.push_back({data, fv, metadata, stats});
202-
weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier));
215+
// Will be updated by `UpdateWeights`.
216+
weighted_distribution_.AddWeight(0);
203217
}
204218

205219
const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const {

centipede/corpus.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,16 @@ struct CorpusRecord {
9898
// Allows to prune (forget) inputs that become uninteresting.
9999
class Corpus {
100100
public:
101+
enum class WeightMethod {
102+
Uniform,
103+
Recency,
104+
Rarity,
105+
};
106+
107+
static constexpr std::string_view kWeightMethodNameForUniform = "uniform";
108+
static constexpr std::string_view kWeightMethodNameForRecency = "recency";
109+
static constexpr std::string_view kWeightMethodNameForRarity = "rarity";
110+
101111
Corpus() = default;
102112

103113
Corpus(const Corpus &) = default;
@@ -120,12 +130,12 @@ class Corpus {
120130
// Returns the number of removed elements.
121131
size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier,
122132
size_t max_corpus_size, Rng &rng);
123-
// Updates the corpus weights according to `fs` and `coverage_frontier`. If
124-
// `scale_by_exec_time` is set, scales the weights by the corpus execution
125-
// time relative to the average.
133+
// Updates the corpus weights according to `fs` and `coverage_frontier` using
134+
// the weight `method`. If `scale_by_exec_time` is set, scales the weights by
135+
// the corpus execution time relative to the average.
126136
void UpdateWeights(const FeatureSet& fs,
127137
const CoverageFrontier& coverage_frontier,
128-
bool scale_by_exec_time);
138+
WeightMethod method, bool scale_by_exec_time);
129139

130140
// Accessors.
131141

centipede/corpus_test.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ TEST(Corpus, Prune) {
114114
Add({{2}, {30, 40}});
115115
Add({{3}, {40, 50}});
116116
Add({{4}, {10, 20}});
117-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
117+
corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
118+
/*scale_by_exec_time=*/false);
118119

119120
// Prune. Features 20 and 40 are frequent => input {0} will be removed.
120121
EXPECT_EQ(corpus.NumActive(), 5);
@@ -124,7 +125,8 @@ TEST(Corpus, Prune) {
124125
VerifyActiveInputs({{1}, {2}, {3}, {4}});
125126

126127
Add({{5}, {30, 60}});
127-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
128+
corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
129+
/*scale_by_exec_time=*/false);
128130

129131
EXPECT_EQ(corpus.NumTotal(), 6);
130132
// Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed.
@@ -181,14 +183,16 @@ TEST(Corpus, ScalesWeightsWithExecTime) {
181183
};
182184

183185
// The weights should be equal without exec time scaling.
184-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
186+
corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
187+
/*scale_by_exec_time=*/false);
185188
ComputeFreq();
186189
EXPECT_NEAR(freq[0], kNumIter / 3, 100);
187190
EXPECT_NEAR(freq[1], kNumIter / 3, 100);
188191
EXPECT_NEAR(freq[2], kNumIter / 3, 100);
189192

190193
// The weights should favor {0} over {1} over {2} with exec time scaling.
191-
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true);
194+
corpus.UpdateWeights(fs, coverage_frontier, Corpus::WeightMethod::Rarity,
195+
/*scale_by_exec_time=*/true);
192196
ComputeFreq();
193197
EXPECT_GT(freq[0], freq[1] + 100);
194198
EXPECT_GT(freq[1], freq[2] + 100);

centipede/feature_set.cc

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ void FeatureSet::PruneDiscardedDomains(FeatureVec &features) const {
110110
}
111111

112112
void FeatureSet::MergeFeatures(const FeatureVec& features) {
113+
++num_executions_;
113114
for (auto feature : features) {
114115
bool unseen = false;
115116
if (feature_domains::IsComparisonScoreFeature(feature)) {
@@ -129,18 +130,24 @@ void FeatureSet::MergeFeatures(const FeatureVec& features) {
129130
++num_features_;
130131
++features_per_domain_[feature_domains::Domain::FeatureToDomainId(
131132
feature)];
133+
rare_features_.insert(feature);
132134
} else if (unseen) {
133135
freq = 0;
136+
rare_features_.insert(feature);
134137
}
135138
if (freq < FrequencyThreshold(feature)) {
136139
++freq;
140+
if (freq >= FrequencyThreshold(feature)) {
141+
rare_features_.erase(feature);
142+
}
137143
}
138144
}
139145
}
140146

141147
__attribute__((noinline)) // to see it in profile.
142-
uint64_t
143-
FeatureSet::ComputeWeight(const FeatureVec &features) const {
148+
double FeatureSet::ComputeRarityWeight(const FeatureVec& features) const {
149+
// Use uint64_t to keep the previous behavior. Maybe we want to switch it to
150+
// double later.
144151
uint64_t weight = 0;
145152
for (auto feature : features) {
146153
// The less frequent is the feature, the more valuable it is.
@@ -178,6 +185,7 @@ std::ostream &operator<<(std::ostream &out, const FeatureSet &fs) {
178185
out << " " << name << ": " << value;
179186
};
180187
out << "ft: " << fs.size();
188+
out << " rare: " << fs.rare_features().size();
181189
LogIfNotZero(fs.CountFeatures(feature_domains::kPCs), "cov");
182190
LogIfNotZero(fs.CountFeatures(feature_domains::k8bitCounters), "cnt");
183191
LogIfNotZero(fs.CountFeatures(feature_domains::kDataFlow), "df");

centipede/feature_set.h

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ class FeatureSet {
6161
// How many different features are in the set.
6262
size_t size() const { return num_features_; }
6363

64+
size_t NumExecutions() const { return num_executions_; };
65+
66+
const std::unordered_set<feature_t>& rare_features() const {
67+
return rare_features_;
68+
}
69+
6470
// Returns features that originate from CFG counters, converted to PCIndexVec.
6571
PCIndexVec ToCoveragePCs() const;
6672

@@ -83,21 +89,22 @@ class FeatureSet {
8389
}
8490

8591
// Returns the frequency associated with `feature`.
86-
size_t Frequency(feature_t feature) const {
92+
size_t Frequency(feature_t feature, bool skip_counter_match = false) const {
8793
if (feature_domains::IsComparisonScoreFeature(feature)) {
88-
if ((feature & feature_domains::kCMPScoreBitmask) !=
89-
cmp_scores_[feature_domains::CMPScoreFeatureIndex(feature)]) {
94+
if (!skip_counter_match &&
95+
(feature & feature_domains::kCMPScoreBitmask) !=
96+
cmp_scores_[feature_domains::CMPScoreFeatureIndex(feature)]) {
9097
return 0;
9198
}
9299
feature &= ~feature_domains::kCMPScoreBitmask;
93100
}
94101
return frequencies_[feature];
95102
}
96103

97-
// Computes combined weight of `features`.
98-
// The less frequent the feature is, the bigger its weight.
99-
// The weight of a FeatureVec is a sum of individual feature weights.
100-
uint64_t ComputeWeight(const FeatureVec &features) const;
104+
// Computes combined weight of `features` based on the feature rarity that
105+
// scales linearly. The less frequent the feature is, the bigger its
106+
// weight. The weight of a FeatureVec is a sum of individual feature weights.
107+
double ComputeRarityWeight(const FeatureVec& features) const;
101108

102109
// Returns a debug string representing the state of *this.
103110
std::string DebugString() const;
@@ -135,6 +142,8 @@ class FeatureSet {
135142
// Unused parts of MmapNoReserveArray don't actually reserve memory.
136143
MmapNoReserveArray<kSize> frequencies_;
137144

145+
std::unordered_set<feature_t> rare_features_;
146+
138147
static constexpr size_t kScoresSize =
139148
(feature_domains::kCMPScoreDomains.back().end() -
140149
feature_domains::kCMPScoreDomains.front().begin()) >>
@@ -146,6 +155,8 @@ class FeatureSet {
146155
// Counts all unique features added to this.
147156
size_t num_features_ = 0;
148157

158+
size_t num_executions_ = 0;
159+
149160
// Counts features in each domain.
150161
size_t features_per_domain_[feature_domains::kNumDomains] = {};
151162

centipede/feature_set_test.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ namespace {
2727
TEST(FeatureSet, ComputeWeight) {
2828
FeatureSet feature_set(10, {});
2929

30-
auto W = [&](const FeatureVec &features) -> uint64_t {
31-
return feature_set.ComputeWeight(features);
30+
auto W = [&](const FeatureVec& features) -> uint64_t {
31+
return feature_set.ComputeRarityWeight(features);
3232
};
3333

3434
feature_set.MergeFeatures({1, 2, 3});
@@ -60,8 +60,8 @@ TEST(FeatureSet, ComputeWeightWithDifferentDomains) {
6060
/* three features from domain #3 */ f3, f3 + 1,
6161
f3 + 2});
6262

63-
auto weight = [&](const FeatureVec &features) -> uint64_t {
64-
return feature_set.ComputeWeight(features);
63+
auto weight = [&](const FeatureVec& features) -> uint64_t {
64+
return feature_set.ComputeRarityWeight(features);
6565
};
6666

6767
// Test that features from a less frequent domain have more weight.

0 commit comments

Comments
 (0)