Skip to content

Commit cedf172

Browse files
xinhaoyuancopybara-github
authored andcommitted
Clean up the crash minimizer interface and rewrite the implementation.
This is for minimizing crashes during corpus database update, which needs crash signature matching. PiperOrigin-RevId: 828549802
1 parent 775126c commit cedf172

File tree

6 files changed

+233
-92
lines changed

6 files changed

+233
-92
lines changed

centipede/BUILD

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ cc_library(
365365
hdrs = ["minimize_crash.h"],
366366
deps = [
367367
":centipede_callbacks",
368+
":crash_deduplication",
368369
":environment",
369370
":mutation_input",
370371
":runner_result",
@@ -373,10 +374,15 @@ cc_library(
373374
":util",
374375
":workdir",
375376
"@abseil-cpp//absl/base:core_headers",
377+
"@abseil-cpp//absl/container:flat_hash_set",
378+
"@abseil-cpp//absl/status",
379+
"@abseil-cpp//absl/status:statusor",
380+
"@abseil-cpp//absl/strings",
376381
"@abseil-cpp//absl/synchronization",
377382
"@com_google_fuzztest//common:defs",
378383
"@com_google_fuzztest//common:hash",
379384
"@com_google_fuzztest//common:logging",
385+
"@com_google_fuzztest//common:remote_file",
380386
],
381387
)
382388

@@ -1452,6 +1458,8 @@ cc_test(
14521458
":util",
14531459
":workdir",
14541460
"@abseil-cpp//absl/base:nullability",
1461+
"@abseil-cpp//absl/container:flat_hash_set",
1462+
"@abseil-cpp//absl/random",
14551463
"@com_google_fuzztest//common:defs",
14561464
"@com_google_fuzztest//common:test_util",
14571465
"@googletest//:gtest_main",

centipede/centipede_flags.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,9 @@ CENTIPEDE_FLAG(
387387
"--first_shard_index padded on the left with zeros. "
388388
" --num_runs and --num_threads apply. "
389389
" Assumes local workdir.")
390+
CENTIPEDE_FLAG(bool, minimize_crash_with_signature, false,
391+
"If set, minimizes crash that produce the same failure "
392+
"signautre as the original input.")
390393
CENTIPEDE_FLAG(
391394
bool, batch_triage_suspect_only, false,
392395
"If set, triage the crash on only the suspected input in a crashing "

centipede/centipede_interface.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,17 @@ int CentipedeMain(const Environment& env,
875875
if (!env.minimize_crash_file_path.empty()) {
876876
ByteArray crashy_input;
877877
ReadFromLocalFile(env.minimize_crash_file_path, crashy_input);
878-
return MinimizeCrash(crashy_input, env, callbacks_factory);
878+
const auto status =
879+
MinimizeCrash(
880+
crashy_input, env, callbacks_factory,
881+
/*crash_signature=*/nullptr,
882+
/*output_dir=*/WorkDir{env}.CrashReproducerDirPaths().MyShard())
883+
.status();
884+
if (!status.ok()) {
885+
FUZZTEST_LOG(ERROR) << "Failed to minimize crash file: " << status;
886+
return EXIT_FAILURE;
887+
}
888+
return EXIT_SUCCESS;
879889
}
880890

881891
// Just export the corpus from a local dir and exit.

centipede/minimize_crash.cc

Lines changed: 115 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,22 @@
1818
#include <cstddef>
1919
#include <cstdlib>
2020
#include <filesystem> // NOLINT
21+
#include <memory>
22+
#include <optional>
2123
#include <string>
2224
#include <string_view>
25+
#include <system_error> // NOLINT
26+
#include <utility>
2327
#include <vector>
2428

2529
#include "absl/base/thread_annotations.h"
30+
#include "absl/container/flat_hash_set.h"
31+
#include "absl/status/status.h"
32+
#include "absl/status/statusor.h"
33+
#include "absl/strings/str_cat.h"
2634
#include "absl/synchronization/mutex.h"
2735
#include "./centipede/centipede_callbacks.h"
36+
#include "./centipede/crash_deduplication.h"
2837
#include "./centipede/environment.h"
2938
#include "./centipede/mutation_input.h"
3039
#include "./centipede/runner_result.h"
@@ -35,59 +44,61 @@
3544
#include "./common/defs.h"
3645
#include "./common/hash.h"
3746
#include "./common/logging.h" // IWYU pragma: keep
47+
#include "./common/remote_file.h"
3848

3949
namespace fuzztest::internal {
4050

41-
// Work queue for the minimizer.
51+
// The minimizer state shared by all worker threads.
4252
// Thread-safe.
43-
struct MinimizerWorkQueue {
53+
struct MinimizerState {
4454
public:
4555
// Creates the queue.
46-
// `crash_dir_path` is the directory path where new crashers are written.
4756
// `crasher` is the initial crashy input.
48-
MinimizerWorkQueue(const std::string_view crash_dir_path,
49-
const ByteArray crasher)
50-
: crash_dir_path_(crash_dir_path), crashers_{ByteArray(crasher)} {
51-
std::filesystem::create_directory(crash_dir_path_);
52-
}
57+
MinimizerState(size_t capacity, ByteArray crasher)
58+
: capacity_(capacity), crashers_{std::move(crasher)} {}
5359

5460
// Returns up to `max_num_crashers` most recently added crashers.
55-
std::vector<ByteArray> GetRecentCrashers(size_t max_num_crashers) {
61+
std::vector<ByteArray> GetCurrentCrashers() {
5662
absl::MutexLock lock(&mutex_);
57-
size_t num_crashers_to_return =
58-
std::min(crashers_.size(), max_num_crashers);
59-
return {crashers_.end() - num_crashers_to_return, crashers_.end()};
63+
return {crashers_.begin(), crashers_.end()};
6064
}
6165

62-
// Adds `crasher` to the queue, writes it to `crash_dir_path_/Hash(crasher)`.
63-
// The crasher must be smaller than the original one.
64-
void AddCrasher(ByteArray crasher) {
66+
void AddCrasher(ByteArray new_crasher, CrashDetails details) {
6567
absl::MutexLock lock(&mutex_);
66-
FUZZTEST_CHECK_LT(crasher.size(), crashers_.front().size());
67-
crashers_.emplace_back(crasher);
68-
// Write the crasher to disk.
69-
auto hash = Hash(crasher);
70-
auto dir = crash_dir_path_;
71-
std::string file_path = dir.append(hash);
72-
WriteToLocalFile(file_path, crasher);
68+
if (crashers_.contains(new_crasher)) {
69+
return;
70+
}
71+
if (min_crasher_.empty() || new_crasher.size() < min_crasher_.size()) {
72+
min_crasher_ = new_crasher;
73+
min_crasher_details_ = std::move(details);
74+
}
75+
crashers_.insert(std::move(new_crasher));
76+
while (crashers_.size() > capacity_) {
77+
crashers_.erase(std::max_element(
78+
crashers_.begin(), crashers_.end(),
79+
[](const auto& a, const auto& b) { return a.size() < b.size(); }));
80+
}
7381
}
7482

75-
// Returns true if new smaller crashes were found.
76-
bool SmallerCrashesFound() const {
83+
std::optional<std::pair<ByteArray, CrashDetails>> GetMinCrasherAndDetails() {
7784
absl::MutexLock lock(&mutex_);
78-
return crashers_.size() > 1;
85+
if (min_crasher_.empty()) return std::nullopt;
86+
return std::make_pair(min_crasher_, min_crasher_details_);
7987
}
8088

8189
private:
8290
mutable absl::Mutex mutex_;
83-
const std::filesystem::path crash_dir_path_;
84-
std::vector<ByteArray> crashers_ ABSL_GUARDED_BY(mutex_);
91+
size_t capacity_ ABSL_GUARDED_BY(mutex_);
92+
absl::flat_hash_set<ByteArray> crashers_ ABSL_GUARDED_BY(mutex_);
93+
ByteArray min_crasher_ ABSL_GUARDED_BY(mutex_);
94+
CrashDetails min_crasher_details_ ABSL_GUARDED_BY(mutex_);
8595
};
8696

8797
// Performs a minimization loop in one thread.
88-
static void MinimizeCrash(const Environment &env,
89-
CentipedeCallbacksFactory &callbacks_factory,
90-
MinimizerWorkQueue &queue) {
98+
static void MinimizeCrash(const Environment& env,
99+
CentipedeCallbacksFactory& callbacks_factory,
100+
const std::string* crash_signature,
101+
MinimizerState& state) {
91102
ScopedCentipedeCallbacks scoped_callback(callbacks_factory, env);
92103
auto callbacks = scoped_callback.callbacks();
93104
BatchResult batch_result;
@@ -97,72 +108,115 @@ static void MinimizeCrash(const Environment &env,
97108
FUZZTEST_LOG_EVERY_POW_2(INFO)
98109
<< "[" << i << "] Minimizing... Interrupt to stop";
99110
if (ShouldStop()) break;
111+
100112
// Get up to kMaxNumCrashersToGet most recent crashers. We don't want just
101113
// the most recent crasher to avoid being stuck in local minimum.
102-
constexpr size_t kMaxNumCrashersToGet = 20;
103-
const auto recent_crashers = queue.GetRecentCrashers(kMaxNumCrashersToGet);
104-
FUZZTEST_CHECK(!recent_crashers.empty());
114+
const auto crashers = state.GetCurrentCrashers();
115+
FUZZTEST_CHECK(!crashers.empty());
105116
// Compute the minimal known crasher size.
106-
size_t min_known_size = recent_crashers.front().size();
107-
for (const auto &crasher : recent_crashers) {
117+
size_t min_known_size = crashers.front().size();
118+
for (const auto& crasher : crashers) {
108119
min_known_size = std::min(min_known_size, crasher.size());
109120
}
110121

122+
std::vector<ByteArray> smaller_mutants;
111123
// Create several mutants that are smaller than the current smallest one.
112124
//
113125
// Currently, we do this by calling the vanilla mutator and
114126
// discarding all inputs that are too large.
115-
// TODO(kcc): modify the Mutate() interface such that max_len can be passed.
116127
//
128+
// TODO(xinhaoyuan): modify the Mutate() interface such that size hint can
129+
// be passed.
117130
const std::vector<ByteArray> mutants = callbacks->Mutate(
118-
GetMutationInputRefsFromDataInputs(recent_crashers), env.batch_size);
119-
std::vector<ByteArray> smaller_mutants;
120-
for (const auto &m : mutants) {
131+
GetMutationInputRefsFromDataInputs(crashers), env.batch_size);
132+
for (const auto& m : mutants) {
121133
if (m.size() < min_known_size) smaller_mutants.push_back(m);
122134
}
123135

124-
// Execute all mutants. If a new crasher is found, add it to `queue`.
125-
if (!callbacks->Execute(env.binary, smaller_mutants, batch_result)) {
126-
size_t crash_inputs_idx = batch_result.num_outputs_read();
127-
FUZZTEST_CHECK_LT(crash_inputs_idx, smaller_mutants.size());
128-
const auto &new_crasher = smaller_mutants[crash_inputs_idx];
129-
FUZZTEST_LOG(INFO) << "Crasher: size: " << new_crasher.size() << ": "
130-
<< AsPrintableString(new_crasher, /*max_len=*/40);
131-
queue.AddCrasher(new_crasher);
136+
if (smaller_mutants.empty()) {
137+
continue;
132138
}
139+
140+
// Try smaller mutants first to minimize the size of the new crasher.
141+
std::sort(smaller_mutants.begin(), smaller_mutants.end(),
142+
[](const auto& a, const auto& b) { return a.size() < b.size(); });
143+
144+
// Execute all mutants. If a new crasher is found, add it to `state`.
145+
if (callbacks->Execute(env.binary, smaller_mutants, batch_result)) {
146+
continue;
147+
}
148+
149+
if (crash_signature != nullptr &&
150+
batch_result.failure_signature() != *crash_signature) {
151+
continue;
152+
}
153+
154+
size_t crash_inputs_idx = batch_result.num_outputs_read();
155+
FUZZTEST_CHECK_LT(crash_inputs_idx, smaller_mutants.size());
156+
const auto& new_crasher = smaller_mutants[crash_inputs_idx];
157+
FUZZTEST_LOG(INFO) << "Crasher: size: " << new_crasher.size() << ": "
158+
<< AsPrintableString(new_crasher, /*max_len=*/40);
159+
state.AddCrasher(new_crasher,
160+
{/*input_signature=*/Hash(new_crasher),
161+
batch_result.failure_description(), /*input_path=*/""});
133162
}
134163
}
135164

136-
int MinimizeCrash(ByteSpan crashy_input, const Environment &env,
137-
CentipedeCallbacksFactory &callbacks_factory) {
165+
absl::StatusOr<CrashDetails> MinimizeCrash(
166+
ByteSpan crashy_input, const Environment& env,
167+
CentipedeCallbacksFactory& callbacks_factory,
168+
const std::string* crash_signature, std::string_view output_dir) {
138169
ScopedCentipedeCallbacks scoped_callback(callbacks_factory, env);
139170
auto callbacks = scoped_callback.callbacks();
140171

141-
FUZZTEST_LOG(INFO) << "MinimizeCrash: trying the original crashy input";
142-
143-
BatchResult batch_result;
172+
std::unique_ptr<std::string> owned_crash_signature;
144173
ByteArray original_crashy_input(crashy_input.begin(), crashy_input.end());
145-
if (callbacks->Execute(env.binary, {original_crashy_input}, batch_result)) {
146-
FUZZTEST_LOG(INFO) << "The original crashy input did not crash; exiting";
147-
return EXIT_FAILURE;
174+
if (crash_signature == nullptr) {
175+
BatchResult batch_result;
176+
if (callbacks->Execute(env.binary, {original_crashy_input}, batch_result)) {
177+
return absl::NotFoundError("The original crashy input did not crash");
178+
}
179+
if (env.minimize_crash_with_signature) {
180+
owned_crash_signature =
181+
std::make_unique<std::string>(batch_result.failure_signature());
182+
crash_signature = owned_crash_signature.get();
183+
}
148184
}
149185

150186
FUZZTEST_LOG(INFO) << "Starting the crash minimization loop in "
151-
<< env.num_threads << "threads";
187+
<< env.num_threads << " threads";
152188

153-
MinimizerWorkQueue queue(WorkDir{env}.CrashReproducerDirPaths().MyShard(),
154-
original_crashy_input);
189+
// Minimize with 20 intermediate crashers empirically - may be adjusted later.
190+
MinimizerState state(/*capacity=*/20, original_crashy_input);
155191

156192
{
157193
ThreadPool threads{static_cast<int>(env.num_threads)};
158194
for (size_t i = 0; i < env.num_threads; ++i) {
159-
threads.Schedule([&env, &callbacks_factory, &queue]() {
160-
MinimizeCrash(env, callbacks_factory, queue);
195+
threads.Schedule([&env, &callbacks_factory, crash_signature, &state]() {
196+
MinimizeCrash(env, callbacks_factory, crash_signature, state);
161197
});
162198
}
163199
} // The threads join here.
164200

165-
return queue.SmallerCrashesFound() ? EXIT_SUCCESS : EXIT_FAILURE;
201+
auto crasher_and_details = state.GetMinCrasherAndDetails();
202+
if (!crasher_and_details.has_value()) {
203+
return absl::NotFoundError("no minimized crash found");
204+
}
205+
206+
auto [crasher, details] = *std::move(crasher_and_details);
207+
const auto output_dir_path = std::filesystem::path{output_dir};
208+
std::error_code ec;
209+
std::filesystem::create_directories(output_dir_path, ec);
210+
if (ec) {
211+
return absl::InternalError(absl::StrCat("failed to create directory path ",
212+
output_dir, ": ", ec.message()));
213+
}
214+
details.input_path = output_dir_path / details.input_signature;
215+
const auto status = RemoteFileSetContents(details.input_path, crasher);
216+
if (!status.ok()) {
217+
return status;
218+
}
219+
return details;
166220
}
167221

168222
} // namespace fuzztest::internal

centipede/minimize_crash.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,25 @@
1515
#ifndef THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_
1616
#define THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_
1717

18+
#include "absl/status/statusor.h"
1819
#include "./centipede/centipede_callbacks.h"
20+
#include "./centipede/crash_deduplication.h"
1921
#include "./centipede/environment.h"
2022
#include "./common/defs.h"
2123

2224
namespace fuzztest::internal {
2325

2426
// Tries to minimize `crashy_input`.
2527
// Uses `callbacks_factory` to create `env.num_threads` workers.
26-
// Returns EXIT_SUCCESS if at least one smaller crasher was found,
27-
// EXIT_FAILURE otherwise.
28-
// Also returns EXIT_FAILURE if the original input didn't crash.
29-
// Stores the newly found crashy inputs in
30-
// `WorkDir{env}.CrashReproducerDirPath()`.
31-
int MinimizeCrash(ByteSpan crashy_input, const Environment &env,
32-
CentipedeCallbacksFactory &callbacks_factory);
28+
// When `env.minimize_crash_with_signature` is set, `crash_signature` can be
29+
// passed to match with new crashes during the minimization, or `crashy_input`
30+
// will be rerun to get the signature. Returns the details of a minimized crash
31+
// with the contents stored in `output_dir`. Otherwise an error status is
32+
// returned.
33+
absl::StatusOr<CrashDetails> MinimizeCrash(
34+
ByteSpan crashy_input, const Environment& env,
35+
CentipedeCallbacksFactory& callbacks_factory,
36+
const std::string* crash_signature, std::string_view output_dir);
3337

3438
} // namespace fuzztest::internal
3539

0 commit comments

Comments
 (0)