diff --git a/centipede/BUILD b/centipede/BUILD index d5899a10..ed5e00cc 100644 --- a/centipede/BUILD +++ b/centipede/BUILD @@ -365,6 +365,7 @@ cc_library( hdrs = ["minimize_crash.h"], deps = [ ":centipede_callbacks", + ":crash_deduplication", ":environment", ":mutation_input", ":runner_result", @@ -373,10 +374,15 @@ cc_library( ":util", ":workdir", "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/container:flat_hash_set", + "@abseil-cpp//absl/status", + "@abseil-cpp//absl/status:statusor", + "@abseil-cpp//absl/strings", "@abseil-cpp//absl/synchronization", "@com_google_fuzztest//common:defs", "@com_google_fuzztest//common:hash", "@com_google_fuzztest//common:logging", + "@com_google_fuzztest//common:remote_file", ], ) @@ -1452,6 +1458,8 @@ cc_test( ":util", ":workdir", "@abseil-cpp//absl/base:nullability", + "@abseil-cpp//absl/container:flat_hash_set", + "@abseil-cpp//absl/random", "@com_google_fuzztest//common:defs", "@com_google_fuzztest//common:test_util", "@googletest//:gtest_main", diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc index 572e56d9..bb23ede1 100644 --- a/centipede/centipede_flags.inc +++ b/centipede/centipede_flags.inc @@ -387,6 +387,9 @@ CENTIPEDE_FLAG( "--first_shard_index padded on the left with zeros. " " --num_runs and --num_threads apply. " " Assumes local workdir.") +CENTIPEDE_FLAG(bool, minimize_crash_with_signature, false, + "If set, minimizes crash that produce the same failure " + "signautre as the original input.") CENTIPEDE_FLAG( bool, batch_triage_suspect_only, false, "If set, triage the crash on only the suspected input in a crashing " diff --git a/centipede/centipede_interface.cc b/centipede/centipede_interface.cc index 90b505ab..694f705a 100644 --- a/centipede/centipede_interface.cc +++ b/centipede/centipede_interface.cc @@ -875,7 +875,17 @@ int CentipedeMain(const Environment& env, if (!env.minimize_crash_file_path.empty()) { ByteArray crashy_input; ReadFromLocalFile(env.minimize_crash_file_path, crashy_input); - return MinimizeCrash(crashy_input, env, callbacks_factory); + const auto status = + MinimizeCrash( + crashy_input, env, callbacks_factory, + /*crash_signature=*/nullptr, + /*output_dir=*/WorkDir{env}.CrashReproducerDirPaths().MyShard()) + .status(); + if (!status.ok()) { + FUZZTEST_LOG(ERROR) << "Failed to minimize crash file: " << status; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; } // Just export the corpus from a local dir and exit. diff --git a/centipede/minimize_crash.cc b/centipede/minimize_crash.cc index 66d417a2..530ecdd1 100644 --- a/centipede/minimize_crash.cc +++ b/centipede/minimize_crash.cc @@ -18,13 +18,22 @@ #include #include #include // NOLINT +#include +#include #include #include +#include // NOLINT +#include #include #include "absl/base/thread_annotations.h" +#include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/synchronization/mutex.h" #include "./centipede/centipede_callbacks.h" +#include "./centipede/crash_deduplication.h" #include "./centipede/environment.h" #include "./centipede/mutation_input.h" #include "./centipede/runner_result.h" @@ -35,59 +44,61 @@ #include "./common/defs.h" #include "./common/hash.h" #include "./common/logging.h" // IWYU pragma: keep +#include "./common/remote_file.h" namespace fuzztest::internal { -// Work queue for the minimizer. +// The minimizer state shared by all worker threads. // Thread-safe. -struct MinimizerWorkQueue { +struct MinimizerState { public: // Creates the queue. - // `crash_dir_path` is the directory path where new crashers are written. // `crasher` is the initial crashy input. - MinimizerWorkQueue(const std::string_view crash_dir_path, - const ByteArray crasher) - : crash_dir_path_(crash_dir_path), crashers_{ByteArray(crasher)} { - std::filesystem::create_directory(crash_dir_path_); - } + MinimizerState(size_t capacity, ByteArray crasher) + : capacity_(capacity), crashers_{std::move(crasher)} {} // Returns up to `max_num_crashers` most recently added crashers. - std::vector GetRecentCrashers(size_t max_num_crashers) { + std::vector GetCurrentCrashers() { absl::MutexLock lock(&mutex_); - size_t num_crashers_to_return = - std::min(crashers_.size(), max_num_crashers); - return {crashers_.end() - num_crashers_to_return, crashers_.end()}; + return {crashers_.begin(), crashers_.end()}; } - // Adds `crasher` to the queue, writes it to `crash_dir_path_/Hash(crasher)`. - // The crasher must be smaller than the original one. - void AddCrasher(ByteArray crasher) { + void AddCrasher(ByteArray new_crasher, CrashDetails details) { absl::MutexLock lock(&mutex_); - FUZZTEST_CHECK_LT(crasher.size(), crashers_.front().size()); - crashers_.emplace_back(crasher); - // Write the crasher to disk. - auto hash = Hash(crasher); - auto dir = crash_dir_path_; - std::string file_path = dir.append(hash); - WriteToLocalFile(file_path, crasher); + if (crashers_.contains(new_crasher)) { + return; + } + if (min_crasher_.empty() || new_crasher.size() < min_crasher_.size()) { + min_crasher_ = new_crasher; + min_crasher_details_ = std::move(details); + } + crashers_.insert(std::move(new_crasher)); + while (crashers_.size() > capacity_) { + crashers_.erase(std::max_element( + crashers_.begin(), crashers_.end(), + [](const auto& a, const auto& b) { return a.size() < b.size(); })); + } } - // Returns true if new smaller crashes were found. - bool SmallerCrashesFound() const { + std::optional> GetMinCrasherAndDetails() { absl::MutexLock lock(&mutex_); - return crashers_.size() > 1; + if (min_crasher_.empty()) return std::nullopt; + return std::make_pair(min_crasher_, min_crasher_details_); } private: mutable absl::Mutex mutex_; - const std::filesystem::path crash_dir_path_; - std::vector crashers_ ABSL_GUARDED_BY(mutex_); + size_t capacity_ ABSL_GUARDED_BY(mutex_); + absl::flat_hash_set crashers_ ABSL_GUARDED_BY(mutex_); + ByteArray min_crasher_ ABSL_GUARDED_BY(mutex_); + CrashDetails min_crasher_details_ ABSL_GUARDED_BY(mutex_); }; // Performs a minimization loop in one thread. -static void MinimizeCrash(const Environment &env, - CentipedeCallbacksFactory &callbacks_factory, - MinimizerWorkQueue &queue) { +static void MinimizeCrash(const Environment& env, + CentipedeCallbacksFactory& callbacks_factory, + const std::string* crash_signature, + MinimizerState& state) { ScopedCentipedeCallbacks scoped_callback(callbacks_factory, env); auto callbacks = scoped_callback.callbacks(); BatchResult batch_result; @@ -97,72 +108,115 @@ static void MinimizeCrash(const Environment &env, FUZZTEST_LOG_EVERY_POW_2(INFO) << "[" << i << "] Minimizing... Interrupt to stop"; if (ShouldStop()) break; + // Get up to kMaxNumCrashersToGet most recent crashers. We don't want just // the most recent crasher to avoid being stuck in local minimum. - constexpr size_t kMaxNumCrashersToGet = 20; - const auto recent_crashers = queue.GetRecentCrashers(kMaxNumCrashersToGet); - FUZZTEST_CHECK(!recent_crashers.empty()); + const auto crashers = state.GetCurrentCrashers(); + FUZZTEST_CHECK(!crashers.empty()); // Compute the minimal known crasher size. - size_t min_known_size = recent_crashers.front().size(); - for (const auto &crasher : recent_crashers) { + size_t min_known_size = crashers.front().size(); + for (const auto& crasher : crashers) { min_known_size = std::min(min_known_size, crasher.size()); } + std::vector smaller_mutants; // Create several mutants that are smaller than the current smallest one. // // Currently, we do this by calling the vanilla mutator and // discarding all inputs that are too large. - // TODO(kcc): modify the Mutate() interface such that max_len can be passed. // + // TODO(xinhaoyuan): modify the Mutate() interface such that size hint can + // be passed. const std::vector mutants = callbacks->Mutate( - GetMutationInputRefsFromDataInputs(recent_crashers), env.batch_size); - std::vector smaller_mutants; - for (const auto &m : mutants) { + GetMutationInputRefsFromDataInputs(crashers), env.batch_size); + for (const auto& m : mutants) { if (m.size() < min_known_size) smaller_mutants.push_back(m); } - // Execute all mutants. If a new crasher is found, add it to `queue`. - if (!callbacks->Execute(env.binary, smaller_mutants, batch_result)) { - size_t crash_inputs_idx = batch_result.num_outputs_read(); - FUZZTEST_CHECK_LT(crash_inputs_idx, smaller_mutants.size()); - const auto &new_crasher = smaller_mutants[crash_inputs_idx]; - FUZZTEST_LOG(INFO) << "Crasher: size: " << new_crasher.size() << ": " - << AsPrintableString(new_crasher, /*max_len=*/40); - queue.AddCrasher(new_crasher); + if (smaller_mutants.empty()) { + continue; } + + // Try smaller mutants first to minimize the size of the new crasher. + std::sort(smaller_mutants.begin(), smaller_mutants.end(), + [](const auto& a, const auto& b) { return a.size() < b.size(); }); + + // Execute all mutants. If a new crasher is found, add it to `state`. + if (callbacks->Execute(env.binary, smaller_mutants, batch_result)) { + continue; + } + + if (crash_signature != nullptr && + batch_result.failure_signature() != *crash_signature) { + continue; + } + + size_t crash_inputs_idx = batch_result.num_outputs_read(); + FUZZTEST_CHECK_LT(crash_inputs_idx, smaller_mutants.size()); + const auto& new_crasher = smaller_mutants[crash_inputs_idx]; + FUZZTEST_LOG(INFO) << "Crasher: size: " << new_crasher.size() << ": " + << AsPrintableString(new_crasher, /*max_len=*/40); + state.AddCrasher(new_crasher, + {/*input_signature=*/Hash(new_crasher), + batch_result.failure_description(), /*input_path=*/""}); } } -int MinimizeCrash(ByteSpan crashy_input, const Environment &env, - CentipedeCallbacksFactory &callbacks_factory) { +absl::StatusOr MinimizeCrash( + ByteSpan crashy_input, const Environment& env, + CentipedeCallbacksFactory& callbacks_factory, + const std::string* crash_signature, std::string_view output_dir) { ScopedCentipedeCallbacks scoped_callback(callbacks_factory, env); auto callbacks = scoped_callback.callbacks(); - FUZZTEST_LOG(INFO) << "MinimizeCrash: trying the original crashy input"; - - BatchResult batch_result; + std::unique_ptr owned_crash_signature; ByteArray original_crashy_input(crashy_input.begin(), crashy_input.end()); - if (callbacks->Execute(env.binary, {original_crashy_input}, batch_result)) { - FUZZTEST_LOG(INFO) << "The original crashy input did not crash; exiting"; - return EXIT_FAILURE; + if (crash_signature == nullptr) { + BatchResult batch_result; + if (callbacks->Execute(env.binary, {original_crashy_input}, batch_result)) { + return absl::NotFoundError("The original crashy input did not crash"); + } + if (env.minimize_crash_with_signature) { + owned_crash_signature = + std::make_unique(batch_result.failure_signature()); + crash_signature = owned_crash_signature.get(); + } } FUZZTEST_LOG(INFO) << "Starting the crash minimization loop in " - << env.num_threads << "threads"; + << env.num_threads << " threads"; - MinimizerWorkQueue queue(WorkDir{env}.CrashReproducerDirPaths().MyShard(), - original_crashy_input); + // Minimize with 20 intermediate crashers empirically - may be adjusted later. + MinimizerState state(/*capacity=*/20, original_crashy_input); { ThreadPool threads{static_cast(env.num_threads)}; for (size_t i = 0; i < env.num_threads; ++i) { - threads.Schedule([&env, &callbacks_factory, &queue]() { - MinimizeCrash(env, callbacks_factory, queue); + threads.Schedule([&env, &callbacks_factory, crash_signature, &state]() { + MinimizeCrash(env, callbacks_factory, crash_signature, state); }); } } // The threads join here. - return queue.SmallerCrashesFound() ? EXIT_SUCCESS : EXIT_FAILURE; + auto crasher_and_details = state.GetMinCrasherAndDetails(); + if (!crasher_and_details.has_value()) { + return absl::NotFoundError("no minimized crash found"); + } + + auto [crasher, details] = *std::move(crasher_and_details); + const auto output_dir_path = std::filesystem::path{output_dir}; + std::error_code ec; + std::filesystem::create_directories(output_dir_path, ec); + if (ec) { + return absl::InternalError(absl::StrCat("failed to create directory path ", + output_dir, ": ", ec.message())); + } + details.input_path = output_dir_path / details.input_signature; + const auto status = RemoteFileSetContents(details.input_path, crasher); + if (!status.ok()) { + return status; + } + return details; } } // namespace fuzztest::internal diff --git a/centipede/minimize_crash.h b/centipede/minimize_crash.h index 5677565d..90cbbd2b 100644 --- a/centipede/minimize_crash.h +++ b/centipede/minimize_crash.h @@ -15,7 +15,9 @@ #ifndef THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_ #define THIRD_PARTY_CENTIPEDE_MINIMIZE_CRASH_H_ +#include "absl/status/statusor.h" #include "./centipede/centipede_callbacks.h" +#include "./centipede/crash_deduplication.h" #include "./centipede/environment.h" #include "./common/defs.h" @@ -23,13 +25,15 @@ namespace fuzztest::internal { // Tries to minimize `crashy_input`. // Uses `callbacks_factory` to create `env.num_threads` workers. -// Returns EXIT_SUCCESS if at least one smaller crasher was found, -// EXIT_FAILURE otherwise. -// Also returns EXIT_FAILURE if the original input didn't crash. -// Stores the newly found crashy inputs in -// `WorkDir{env}.CrashReproducerDirPath()`. -int MinimizeCrash(ByteSpan crashy_input, const Environment &env, - CentipedeCallbacksFactory &callbacks_factory); +// When `env.minimize_crash_with_signature` is set, `crash_signature` can be +// passed to match with new crashes during the minimization, or `crashy_input` +// will be rerun to get the signature. Returns the details of a minimized crash +// with the contents stored in `output_dir`. Otherwise an error status is +// returned. +absl::StatusOr MinimizeCrash( + ByteSpan crashy_input, const Environment& env, + CentipedeCallbacksFactory& callbacks_factory, + const std::string* crash_signature, std::string_view output_dir); } // namespace fuzztest::internal diff --git a/centipede/minimize_crash_test.cc b/centipede/minimize_crash_test.cc index 30a145a9..44f349d6 100644 --- a/centipede/minimize_crash_test.cc +++ b/centipede/minimize_crash_test.cc @@ -23,6 +23,8 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/nullability.h" +#include "absl/container/flat_hash_set.h" +#include "absl/random/random.h" #include "./centipede/centipede_callbacks.h" #include "./centipede/environment.h" #include "./centipede/runner_result.h" @@ -34,6 +36,9 @@ namespace fuzztest::internal { namespace { +using ::testing::HasSubstr; +using ::testing::UnorderedElementsAre; + // A mock for CentipedeCallbacks. class MinimizerMock : public CentipedeCallbacks { public: @@ -46,6 +51,9 @@ class MinimizerMock : public CentipedeCallbacks { for (auto &input : inputs) { if (FuzzMe(input)) { batch_result.exit_code() = EXIT_FAILURE; + // Set signature differently to test signature matching behavior. + batch_result.failure_signature() = + input[0] == 'f' ? "first type" : "second type"; return false; } ++batch_result.num_outputs_read(); @@ -54,15 +62,15 @@ class MinimizerMock : public CentipedeCallbacks { } private: - // Returns true on inputs that look like 'f???u???z', false otherwise. - // The minimal input on which this function returns true is 'fuz'. + // Returns true on inputs that look like '[fz]+', false otherwise. + // The minimal input on which this function returns true is 'f' or 'z', with + // different crash signatures. bool FuzzMe(ByteSpan data) { if (data.empty()) return false; - if (data.front() == 'f' && data[data.size() / 2] == 'u' && - data.back() == 'z') { - return true; + for (const auto c : data) { + if (c != 'f' && c != 'z') return false; } - return false; + return true; } }; @@ -81,32 +89,86 @@ TEST(MinimizeTest, MinimizeTest) { env.workdir = tmp_dir.path(); env.num_runs = 100000; const WorkDir wd{env}; + const auto output_dir = wd.CrashReproducerDirPaths().MyShard(); MinimizerMockFactory factory; // Test with a non-crashy input. - EXPECT_EQ(MinimizeCrash({1, 2, 3}, env, factory), EXIT_FAILURE); + const auto non_crashy_minimize_result = MinimizeCrash( + {1, 2, 3}, env, factory, /*crash_signature=*/nullptr, output_dir); + EXPECT_FALSE(non_crashy_minimize_result.ok()); + EXPECT_THAT(non_crashy_minimize_result.status().message(), + HasSubstr("did not crash")); - ByteArray expected_minimized = {'f', 'u', 'z'}; + const ByteArray expected_minimized = {'f'}; + const ByteArray expected_minimized_alt = {'z'}; // Test with a crashy input that can't be minimized further. - EXPECT_EQ(MinimizeCrash(expected_minimized, env, factory), EXIT_FAILURE); + const auto already_minimum_minimize_result = + MinimizeCrash(expected_minimized, env, factory, + /*crash_signature=*/nullptr, output_dir); + EXPECT_FALSE(already_minimum_minimize_result.ok()); + EXPECT_THAT(already_minimum_minimize_result.status().message(), + HasSubstr("no minimized crash found")); // Test the actual minimization. - ByteArray original_crasher = {'f', '.', '.', '.', '.', '.', '.', '.', - '.', '.', '.', 'u', '.', '.', '.', '.', - '.', '.', '.', '.', '.', '.', 'z'}; - EXPECT_EQ(MinimizeCrash(original_crasher, env, factory), EXIT_SUCCESS); - // Collect the new crashers from the crasher dir. - std::vector crashers; - for (auto const &dir_entry : std::filesystem::directory_iterator{ - wd.CrashReproducerDirPaths().MyShard()}) { - ByteArray crasher; - const std::string &path = dir_entry.path(); - ReadFromLocalFile(path, crasher); - EXPECT_LT(crasher.size(), original_crasher.size()); - crashers.push_back(crasher); + ByteArray original_crasher = {'f', 'f', 'f', 'f', 'f', 'f', + 'z', 'z', 'z', 'z', 'z', 'z'}; + + // This is inheritly flaky but with 30 trials the failure rate should be + // small enough (1/2^30). + constexpr size_t kNumTrials = 30; + absl::BitGen rng; + absl::flat_hash_set minimized_crashers; + for (size_t i = 0; i < kNumTrials; ++i) { + env.seed = rng(); + EXPECT_OK(MinimizeCrash(original_crasher, env, factory, + /*crash_signature=*/nullptr, output_dir) + .status()); + // Collect the new crashers from the crasher dir. + for (auto const& dir_entry : + std::filesystem::directory_iterator{output_dir}) { + ByteArray crasher; + const std::string& path = dir_entry.path(); + ReadFromLocalFile(path, crasher); + EXPECT_LT(crasher.size(), original_crasher.size()); + minimized_crashers.insert(crasher); + } + } + EXPECT_THAT(minimized_crashers, + UnorderedElementsAre(expected_minimized, expected_minimized_alt)); +} + +TEST(MinimizeTest, MinimizesTestWithSignature) { + TempDir tmp_dir{test_info_->name()}; + Environment env; + env.workdir = tmp_dir.path(); + env.num_runs = 100000; + env.minimize_crash_with_signature = true; + const WorkDir wd{env}; + const auto output_dir = wd.CrashReproducerDirPaths().MyShard(); + MinimizerMockFactory factory; + + ByteArray original_crasher = {'f', 'f', 'f', 'f', 'f', 'f', + 'z', 'z', 'z', 'z', 'z', 'z'}; + constexpr size_t kNumTrials = 30; + absl::BitGen rng; + absl::flat_hash_set minimized_crashers; + for (size_t i = 0; i < kNumTrials; ++i) { + env.seed = rng(); + EXPECT_OK(MinimizeCrash(original_crasher, env, factory, + /*crash_signature=*/nullptr, output_dir) + .status()); + // Collect the new crashers from the crasher dir. + for (auto const& dir_entry : + std::filesystem::directory_iterator{output_dir}) { + ByteArray crasher; + const std::string& path = dir_entry.path(); + ReadFromLocalFile(path, crasher); + EXPECT_LT(crasher.size(), original_crasher.size()); + minimized_crashers.insert(crasher); + } } - EXPECT_THAT(crashers, testing::Contains(expected_minimized)); + EXPECT_THAT(minimized_crashers, UnorderedElementsAre(ByteArray{'f'})); } } // namespace