Skip to content

Commit 0a5cae5

Browse files
xinhaoyuancopybara-github
authored andcommitted
Record the origin for mutation for further tracking.
This is to enable further corpus management and scheduling methods based on the mutation stats. For now, add the new `Mutant` type in :mutation_input. Will rename it to :mutation_data later. PiperOrigin-RevId: 821630139
1 parent ba63065 commit 0a5cae5

28 files changed

+451
-216
lines changed

centipede/BUILD

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ cc_library(
439439
# used in centipede_runner.
440440
":feature",
441441
":execution_metadata",
442+
":mutation_input",
442443
":shared_memory_blob_sequence",
443444
"@com_google_fuzztest//common:defs",
444445
],
@@ -944,6 +945,7 @@ cc_library(
944945
hdrs = ["dispatcher.h"],
945946
deps = [
946947
":execution_metadata",
948+
":mutation_input",
947949
":runner_request",
948950
":runner_result",
949951
":shared_memory_blob_sequence",
@@ -1495,6 +1497,7 @@ cc_test(
14951497
deps = [
14961498
":execution_metadata",
14971499
":feature",
1500+
":mutation_input",
14981501
":runner_result",
14991502
":shared_memory_blob_sequence",
15001503
"@com_google_fuzztest//common:defs",
@@ -1634,6 +1637,7 @@ cc_test(
16341637
":feature",
16351638
":feature_set",
16361639
":pc_info",
1640+
":runner_result",
16371641
":util",
16381642
"@com_google_fuzztest//common:defs",
16391643
"@com_google_fuzztest//common:test_util",

centipede/byte_array_mutator.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -321,27 +321,29 @@ void ByteArrayMutator::CrossOver(ByteArray &data, const ByteArray &other) {
321321
// TODO(kcc): add tests with different values of knobs.
322322
const KnobId knob_mutate_or_crossover = Knobs::NewId("mutate_or_crossover");
323323

324-
std::vector<ByteArray> ByteArrayMutator::MutateMany(
325-
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
324+
std::vector<Mutant> ByteArrayMutator::MutateMany(
325+
const std::vector<MutationInputRef>& inputs, size_t num_mutants) {
326326
if (inputs.empty()) abort();
327327
// TODO(xinhaoyuan): Consider metadata in other inputs instead of always the
328328
// first one.
329329
SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata
330330
: ExecutionMetadata());
331331
size_t num_inputs = inputs.size();
332-
std::vector<ByteArray> mutants;
332+
std::vector<Mutant> mutants;
333333
mutants.reserve(num_mutants);
334334
for (size_t i = 0; i < num_mutants; ++i) {
335-
auto mutant = inputs[rng_() % num_inputs].data;
336-
if (mutant.size() <= max_len_ &&
335+
Mutant mutant;
336+
mutant.origin = rng_() % num_inputs;
337+
mutant.data = inputs[mutant.origin].data;
338+
if (mutant.data.size() <= max_len_ &&
337339
knobs_.GenerateBool(knob_mutate_or_crossover, rng_())) {
338340
// Do crossover only if the mutant is not over the max_len_.
339341
// Perform crossover with some other input. It may be the same input.
340342
const auto &other_input = inputs[rng_() % num_inputs].data;
341-
CrossOver(mutant, other_input);
343+
CrossOver(mutant.data, other_input);
342344
} else {
343345
// Perform mutation.
344-
Mutate(mutant);
346+
Mutate(mutant.data);
345347
}
346348
mutants.push_back(std::move(mutant));
347349
}

centipede/byte_array_mutator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ class ByteArrayMutator {
108108
}
109109

110110
// Takes non-empty `inputs` and produces `num_mutants` mutants.
111-
std::vector<ByteArray> MutateMany(const std::vector<MutationInputRef> &inputs,
112-
size_t num_mutants);
111+
std::vector<Mutant> MutateMany(const std::vector<MutationInputRef>& inputs,
112+
size_t num_mutants);
113113

114114
using CrossOverFn = void (ByteArrayMutator::*)(ByteArray &,
115115
const ByteArray &);

centipede/byte_array_mutator_test.cc

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -928,12 +928,12 @@ TEST(ByteArrayMutator, MutateManyWithAlignedInputs) {
928928
{0, 1, 2, 3, 4, 5, 6, 7},
929929
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
930930
};
931-
const std::vector<ByteArray> mutants =
931+
const std::vector<Mutant> mutants =
932932
mutator.MutateMany(GetMutationInputRefsFromDataInputs(aligned_inputs),
933933
kNumMutantsToGenerate);
934934
EXPECT_EQ(mutants.size(), kNumMutantsToGenerate);
935-
for (const ByteArray &mutant : mutants) {
936-
EXPECT_EQ(mutant.size() % kSizeAlignment, 0);
935+
for (const Mutant& mutant : mutants) {
936+
EXPECT_EQ(mutant.data.size() % kSizeAlignment, 0);
937937
}
938938
}
939939

@@ -958,13 +958,13 @@ TEST(ByteArrayMutator, MutateManyWithUnalignedInputs) {
958958
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
959959
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
960960
};
961-
const std::vector<ByteArray> mutants =
961+
const std::vector<Mutant> mutants =
962962
mutator.MutateMany(GetMutationInputRefsFromDataInputs(unaligned_inputs),
963963
kNumMutantsToGenerate);
964964
EXPECT_EQ(mutants.size(), kNumMutantsToGenerate);
965-
for (const ByteArray &mutant : mutants) {
966-
if (mutant.size() % kSizeAlignment != 0) {
967-
EXPECT_LE(mutant.size(), 11);
965+
for (const Mutant& mutant : mutants) {
966+
if (mutant.data.size() % kSizeAlignment != 0) {
967+
EXPECT_LE(mutant.data.size(), 11);
968968
}
969969
}
970970
}
@@ -982,12 +982,12 @@ TEST(ByteArrayMutator, MutateManyWithMaxLen) {
982982
{0, 1, 2},
983983
{0, 1, 2, 3},
984984
};
985-
const std::vector<ByteArray> mutants = mutator.MutateMany(
985+
const std::vector<Mutant> mutants = mutator.MutateMany(
986986
GetMutationInputRefsFromDataInputs(inputs), kNumMutantsToGenerate);
987987
EXPECT_EQ(mutants.size(), kNumMutantsToGenerate);
988988

989-
for (const ByteArray &mutant : mutants) {
990-
EXPECT_LE(mutant.size(), kMaxLen);
989+
for (const Mutant& mutant : mutants) {
990+
EXPECT_LE(mutant.data.size(), kMaxLen);
991991
}
992992
}
993993

@@ -1001,16 +1001,16 @@ TEST(ByteArrayMutator, MutateManyWithMaxLenWithStartingLargeInput) {
10011001
const std::vector<ByteArray> large_input = {
10021002
{0, 1, 2, 3, 4, 5, 6, 7}, {0}, {0, 1}, {0, 1, 2}, {0, 1, 2, 3},
10031003
};
1004-
const std::vector<ByteArray> mutants = mutator.MutateMany(
1004+
const std::vector<Mutant> mutants = mutator.MutateMany(
10051005
GetMutationInputRefsFromDataInputs(large_input), kNumMutantsToGenerate);
10061006
EXPECT_EQ(mutants.size(), kNumMutantsToGenerate);
10071007

1008-
for (const ByteArray &mutant : mutants) {
1009-
if (mutant.size() > kMaxLen) {
1008+
for (const Mutant& mutant : mutants) {
1009+
if (mutant.data.size() > kMaxLen) {
10101010
// The only mutant larger than max length should be the same large input
10111011
// that mutation originally started with. All other mutants should be
10121012
// within the maximum length specified.
1013-
EXPECT_EQ(mutant, large_input[0]);
1013+
EXPECT_EQ(mutant.data, large_input[0]);
10141014
}
10151015
}
10161016
}

centipede/centipede.cc

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -416,13 +416,18 @@ size_t Centipede::AddPcPairFeatures(FeatureVec &fv) {
416416
}
417417

418418
bool Centipede::RunBatch(
419-
const std::vector<ByteArray> &input_vec,
420-
BlobFileWriter *absl_nullable corpus_file,
421-
BlobFileWriter *absl_nullable features_file,
422-
BlobFileWriter *absl_nullable unconditional_features_file) {
419+
const std::vector<ByteArray>& input_vec,
420+
const std::vector<size_t>& mutant_origins,
421+
BlobFileWriter* absl_nullable corpus_file,
422+
BlobFileWriter* absl_nullable features_file,
423+
BlobFileWriter* absl_nullable unconditional_features_file) {
423424
BatchResult batch_result;
424425
bool success = ExecuteAndReportCrash(env_.binary, input_vec, batch_result);
425426
FUZZTEST_CHECK_EQ(input_vec.size(), batch_result.results().size());
427+
FUZZTEST_CHECK(mutant_origins.empty() ||
428+
mutant_origins.size() >= input_vec.size())
429+
<< "Got " << mutant_origins.size() << " with " << input_vec.size()
430+
<< " input";
426431

427432
for (const auto &extra_binary : env_.extra_binaries) {
428433
if (ShouldStop()) break;
@@ -474,6 +479,7 @@ bool Centipede::RunBatch(
474479
}
475480
}
476481
}
482+
corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling);
477483
return batch_gained_new_coverage;
478484
}
479485

@@ -563,7 +569,7 @@ void Centipede::Rerun(std::vector<ByteArray> &to_rerun) {
563569
size_t batch_size = std::min(to_rerun.size(), env_.batch_size);
564570
std::vector<ByteArray> batch(to_rerun.end() - batch_size, to_rerun.end());
565571
to_rerun.resize(to_rerun.size() - batch_size);
566-
if (RunBatch(batch, nullptr, nullptr, features_file.get())) {
572+
if (RunBatch(batch, {}, nullptr, nullptr, features_file.get())) {
567573
UpdateAndMaybeLogStats("rerun-old", 1);
568574
}
569575
}
@@ -757,7 +763,7 @@ void Centipede::LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file,
757763
seed_inputs.push_back({0});
758764
}
759765

760-
RunBatch(seed_inputs, corpus_file, features_file,
766+
RunBatch(seed_inputs, {}, corpus_file, features_file,
761767
/*unconditional_features_file=*/nullptr);
762768
FUZZTEST_LOG(INFO) << "Number of input seeds available: "
763769
<< num_seeds_available
@@ -838,21 +844,39 @@ void Centipede::FuzzingLoop() {
838844
auto remaining_runs = env_.num_runs - new_runs;
839845
auto batch_size = std::min(env_.batch_size, remaining_runs);
840846
std::vector<MutationInputRef> mutation_inputs;
847+
std::vector<size_t> mutate_batch_origins;
841848
mutation_inputs.reserve(env_.mutate_batch_size);
849+
mutate_batch_origins.reserve(env_.mutate_batch_size);
842850
for (size_t i = 0; i < env_.mutate_batch_size; i++) {
843-
const auto& corpus_record = env_.use_corpus_weights
844-
? corpus_.WeightedRandom(rng_)
845-
: corpus_.UniformRandom(rng_);
851+
const size_t origin = env_.use_corpus_weights
852+
? corpus_.WeightedRandom(rng_)
853+
: corpus_.UniformRandom(rng_);
854+
mutate_batch_origins.push_back(origin);
855+
const auto& corpus_record = corpus_.Records()[origin];
846856
mutation_inputs.push_back(
847857
MutationInputRef{corpus_record.data, &corpus_record.metadata});
848858
}
849859

850-
const std::vector<ByteArray> mutants =
860+
const std::vector<Mutant> mutants =
851861
user_callbacks_.Mutate(mutation_inputs, batch_size);
852862
if (ShouldStop()) break;
853863

864+
std::vector<ByteArray> next_batch;
865+
next_batch.reserve(mutants.size());
866+
std::vector<size_t> mutant_origins;
867+
mutant_origins.reserve(mutants.size());
868+
for (auto& mutant : mutants) {
869+
next_batch.push_back(std::move(mutant.data));
870+
if (mutant.origin == Mutant::kOriginNone) {
871+
mutant_origins.push_back(Mutant::kOriginNone);
872+
} else {
873+
mutant_origins.push_back(mutate_batch_origins[mutant.origin]);
874+
}
875+
}
876+
854877
bool gained_new_coverage =
855-
RunBatch(mutants, corpus_file.get(), features_file.get(), nullptr);
878+
RunBatch(next_batch, mutant_origins, corpus_file.get(),
879+
features_file.get(), nullptr);
856880
new_runs += mutants.size();
857881

858882
if (gained_new_coverage) {

centipede/centipede.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,11 @@ class Centipede {
8484
// * its features are written to `features_file` (if that's non-null).
8585
// Returns true if new features were observed.
8686
// Post-condition: `batch_result.results.size()` == `input_vec.size()`.
87-
bool RunBatch(const std::vector<ByteArray> &input_vec,
88-
BlobFileWriter *absl_nullable corpus_file,
89-
BlobFileWriter *absl_nullable features_file,
90-
BlobFileWriter *absl_nullable unconditional_features_file);
87+
bool RunBatch(const std::vector<ByteArray>& input_vec,
88+
const std::vector<size_t>& mutant_origins,
89+
BlobFileWriter* absl_nullable corpus_file,
90+
BlobFileWriter* absl_nullable features_file,
91+
BlobFileWriter* absl_nullable unconditional_features_file);
9192
// Loads seed inputs from the user callbacks, execute them, and store them
9293
// with the corresponding features into `corpus_file` and `features_file`.
9394
void LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file,

centipede/centipede_callbacks.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ class CentipedeCallbacks {
7272
BatchResult &batch_result) = 0;
7373

7474
// Takes non-empty `inputs` and returns at most `num_mutants` mutated inputs.
75-
virtual std::vector<ByteArray> Mutate(
76-
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
75+
virtual std::vector<Mutant> Mutate(
76+
const std::vector<MutationInputRef>& inputs, size_t num_mutants) {
7777
return env_.use_legacy_default_mutator
7878
? byte_array_mutator_.MutateMany(inputs, num_mutants)
7979
: fuzztest_mutator_.MutateMany(inputs, num_mutants);

centipede/centipede_default_callbacks.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ CentipedeDefaultCallbacks::GetSerializedTargetConfig() {
7272
"Failed to get serialized configuration from the target binary.");
7373
}
7474

75-
std::vector<ByteArray> CentipedeDefaultCallbacks::Mutate(
76-
const std::vector<MutationInputRef> &inputs, size_t num_mutants) {
75+
std::vector<Mutant> CentipedeDefaultCallbacks::Mutate(
76+
const std::vector<MutationInputRef>& inputs, size_t num_mutants) {
7777
if (num_mutants == 0) return {};
7878
// Try to use the custom mutator if it hasn't been disabled.
7979
if (custom_mutator_is_usable_.value_or(true)) {

centipede/centipede_default_callbacks.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class CentipedeDefaultCallbacks : public CentipedeCallbacks {
4242
absl::StatusOr<std::string> GetSerializedTargetConfig() override;
4343
bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
4444
BatchResult &batch_result) override;
45-
std::vector<ByteArray> Mutate(const std::vector<MutationInputRef> &inputs,
46-
size_t num_mutants) override;
45+
std::vector<Mutant> Mutate(const std::vector<MutationInputRef>& inputs,
46+
size_t num_mutants) override;
4747

4848
private:
4949
std::optional<bool> custom_mutator_is_usable_ = std::nullopt;

centipede/centipede_flags.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ CENTIPEDE_FLAG(
192192
bool, use_corpus_weights, true,
193193
"If true, use weighted distribution when choosing the corpus element "
194194
"to mutate. This flag is mostly for Centipede developers.")
195+
CENTIPEDE_FLAG(
196+
bool, exec_time_weight_scaling, true,
197+
"If true, scale the corpus weight by the execution time of each input.")
195198
CENTIPEDE_FLAG(
196199
bool, use_coverage_frontier, false,
197200
"If true, use coverage frontier when choosing the corpus element to "

0 commit comments

Comments
 (0)