1818#include < cstddef>
1919#include < cstdlib>
2020#include < filesystem> // NOLINT
21+ #include < memory>
22+ #include < optional>
2123#include < string>
2224#include < string_view>
25+ #include < system_error> // NOLINT
26+ #include < utility>
2327#include < vector>
2428
2529#include " absl/base/thread_annotations.h"
30+ #include " absl/container/flat_hash_set.h"
31+ #include " absl/status/status.h"
32+ #include " absl/status/statusor.h"
33+ #include " absl/strings/str_cat.h"
2634#include " absl/synchronization/mutex.h"
2735#include " ./centipede/centipede_callbacks.h"
36+ #include " ./centipede/crash_deduplication.h"
2837#include " ./centipede/environment.h"
2938#include " ./centipede/mutation_input.h"
3039#include " ./centipede/runner_result.h"
3544#include " ./common/defs.h"
3645#include " ./common/hash.h"
3746#include " ./common/logging.h" // IWYU pragma: keep
47+ #include " ./common/remote_file.h"
3848
3949namespace fuzztest ::internal {
4050
41- // Work queue for the minimizer .
51+ // The minimizer state shared by all worker threads .
4252// Thread-safe.
43- struct MinimizerWorkQueue {
53+ struct MinimizerState {
4454 public:
4555 // Creates the queue.
46- // `crash_dir_path` is the directory path where new crashers are written.
4756 // `crasher` is the initial crashy input.
48- MinimizerWorkQueue (const std::string_view crash_dir_path,
49- const ByteArray crasher)
50- : crash_dir_path_(crash_dir_path), crashers_{ByteArray (crasher)} {
51- std::filesystem::create_directory (crash_dir_path_);
52- }
57+ MinimizerState (size_t capacity, ByteArray crasher)
58+ : capacity_(capacity), crashers_{std::move (crasher)} {}
5359
5460 // Returns up to `max_num_crashers` most recently added crashers.
55- std::vector<ByteArray> GetRecentCrashers ( size_t max_num_crashers ) {
61+ std::vector<ByteArray> GetCurrentCrashers ( ) {
5662 absl::MutexLock lock (&mutex_);
57- size_t num_crashers_to_return =
58- std::min (crashers_.size (), max_num_crashers);
59- return {crashers_.end () - num_crashers_to_return, crashers_.end ()};
63+ return {crashers_.begin (), crashers_.end ()};
6064 }
6165
62- // Adds `crasher` to the queue, writes it to `crash_dir_path_/Hash(crasher)`.
63- // The crasher must be smaller than the original one.
64- void AddCrasher (ByteArray crasher) {
66+ void AddCrasher (ByteArray new_crasher, CrashDetails details) {
6567 absl::MutexLock lock (&mutex_);
66- FUZZTEST_CHECK_LT (crasher.size (), crashers_.front ().size ());
67- crashers_.emplace_back (crasher);
68- // Write the crasher to disk.
69- auto hash = Hash (crasher);
70- auto dir = crash_dir_path_;
71- std::string file_path = dir.append (hash);
72- WriteToLocalFile (file_path, crasher);
68+ if (crashers_.contains (new_crasher)) {
69+ return ;
70+ }
71+ if (min_crasher_.empty () || new_crasher.size () < min_crasher_.size ()) {
72+ min_crasher_ = new_crasher;
73+ min_crasher_details_ = std::move (details);
74+ }
75+ crashers_.insert (std::move (new_crasher));
76+ while (crashers_.size () > capacity_) {
77+ crashers_.erase (std::max_element (
78+ crashers_.begin (), crashers_.end (),
79+ [](const auto & a, const auto & b) { return a.size () < b.size (); }));
80+ }
7381 }
7482
75- // Returns true if new smaller crashes were found.
76- bool SmallerCrashesFound () const {
83+ std::optional<std::pair<ByteArray, CrashDetails>> GetMinCrasherAndDetails () {
7784 absl::MutexLock lock (&mutex_);
78- return crashers_.size () > 1 ;
85+ if (min_crasher_.empty ()) return std::nullopt ;
86+ return std::make_pair (min_crasher_, min_crasher_details_);
7987 }
8088
8189 private:
8290 mutable absl::Mutex mutex_;
83- const std::filesystem::path crash_dir_path_;
84- std::vector<ByteArray> crashers_ ABSL_GUARDED_BY (mutex_);
91+ size_t capacity_ ABSL_GUARDED_BY (mutex_);
92+ absl::flat_hash_set<ByteArray> crashers_ ABSL_GUARDED_BY (mutex_);
93+ ByteArray min_crasher_ ABSL_GUARDED_BY (mutex_);
94+ CrashDetails min_crasher_details_ ABSL_GUARDED_BY (mutex_);
8595};
8696
8797// Performs a minimization loop in one thread.
88- static void MinimizeCrash (const Environment &env,
89- CentipedeCallbacksFactory &callbacks_factory,
90- MinimizerWorkQueue &queue) {
98+ static void MinimizeCrash (const Environment& env,
99+ CentipedeCallbacksFactory& callbacks_factory,
100+ const std::string* crash_signature,
101+ MinimizerState& state) {
91102 ScopedCentipedeCallbacks scoped_callback (callbacks_factory, env);
92103 auto callbacks = scoped_callback.callbacks ();
93104 BatchResult batch_result;
@@ -97,72 +108,115 @@ static void MinimizeCrash(const Environment &env,
97108 FUZZTEST_LOG_EVERY_POW_2 (INFO)
98109 << " [" << i << " ] Minimizing... Interrupt to stop" ;
99110 if (ShouldStop ()) break ;
111+
100112 // Get up to kMaxNumCrashersToGet most recent crashers. We don't want just
101113 // the most recent crasher to avoid being stuck in local minimum.
102- constexpr size_t kMaxNumCrashersToGet = 20 ;
103- const auto recent_crashers = queue.GetRecentCrashers (kMaxNumCrashersToGet );
104- FUZZTEST_CHECK (!recent_crashers.empty ());
114+ const auto crashers = state.GetCurrentCrashers ();
115+ FUZZTEST_CHECK (!crashers.empty ());
105116 // Compute the minimal known crasher size.
106- size_t min_known_size = recent_crashers .front ().size ();
107- for (const auto & crasher : recent_crashers ) {
117+ size_t min_known_size = crashers .front ().size ();
118+ for (const auto & crasher : crashers ) {
108119 min_known_size = std::min (min_known_size, crasher.size ());
109120 }
110121
122+ std::vector<ByteArray> smaller_mutants;
111123 // Create several mutants that are smaller than the current smallest one.
112124 //
113125 // Currently, we do this by calling the vanilla mutator and
114126 // discarding all inputs that are too large.
115- // TODO(kcc): modify the Mutate() interface such that max_len can be passed.
116127 //
128+ // TODO(xinhaoyuan): modify the Mutate() interface such that size hint can
129+ // be passed.
117130 const std::vector<ByteArray> mutants = callbacks->Mutate (
118- GetMutationInputRefsFromDataInputs (recent_crashers), env.batch_size );
119- std::vector<ByteArray> smaller_mutants;
120- for (const auto &m : mutants) {
131+ GetMutationInputRefsFromDataInputs (crashers), env.batch_size );
132+ for (const auto & m : mutants) {
121133 if (m.size () < min_known_size) smaller_mutants.push_back (m);
122134 }
123135
124- // Execute all mutants. If a new crasher is found, add it to `queue`.
125- if (!callbacks->Execute (env.binary , smaller_mutants, batch_result)) {
126- size_t crash_inputs_idx = batch_result.num_outputs_read ();
127- FUZZTEST_CHECK_LT (crash_inputs_idx, smaller_mutants.size ());
128- const auto &new_crasher = smaller_mutants[crash_inputs_idx];
129- FUZZTEST_LOG (INFO) << " Crasher: size: " << new_crasher.size () << " : "
130- << AsPrintableString (new_crasher, /* max_len=*/ 40 );
131- queue.AddCrasher (new_crasher);
136+ if (smaller_mutants.empty ()) {
137+ continue ;
132138 }
139+
140+ // Try smaller mutants first to minimize the size of the new crasher.
141+ std::sort (smaller_mutants.begin (), smaller_mutants.end (),
142+ [](const auto & a, const auto & b) { return a.size () < b.size (); });
143+
144+ // Execute all mutants. If a new crasher is found, add it to `state`.
145+ if (callbacks->Execute (env.binary , smaller_mutants, batch_result)) {
146+ continue ;
147+ }
148+
149+ if (crash_signature != nullptr &&
150+ batch_result.failure_signature () != *crash_signature) {
151+ continue ;
152+ }
153+
154+ size_t crash_inputs_idx = batch_result.num_outputs_read ();
155+ FUZZTEST_CHECK_LT (crash_inputs_idx, smaller_mutants.size ());
156+ const auto & new_crasher = smaller_mutants[crash_inputs_idx];
157+ FUZZTEST_LOG (INFO) << " Crasher: size: " << new_crasher.size () << " : "
158+ << AsPrintableString (new_crasher, /* max_len=*/ 40 );
159+ state.AddCrasher (new_crasher,
160+ {/* input_signature=*/ Hash (new_crasher),
161+ batch_result.failure_description (), /* input_path=*/ " " });
133162 }
134163}
135164
136- int MinimizeCrash (ByteSpan crashy_input, const Environment &env,
137- CentipedeCallbacksFactory &callbacks_factory) {
165+ absl::StatusOr<CrashDetails> MinimizeCrash (
166+ ByteSpan crashy_input, const Environment& env,
167+ CentipedeCallbacksFactory& callbacks_factory,
168+ const std::string* crash_signature, std::string_view output_dir) {
138169 ScopedCentipedeCallbacks scoped_callback (callbacks_factory, env);
139170 auto callbacks = scoped_callback.callbacks ();
140171
141- FUZZTEST_LOG (INFO) << " MinimizeCrash: trying the original crashy input" ;
142-
143- BatchResult batch_result;
172+ std::unique_ptr<std::string> owned_crash_signature;
144173 ByteArray original_crashy_input (crashy_input.begin (), crashy_input.end ());
145- if (callbacks->Execute (env.binary , {original_crashy_input}, batch_result)) {
146- FUZZTEST_LOG (INFO) << " The original crashy input did not crash; exiting" ;
147- return EXIT_FAILURE;
174+ if (crash_signature == nullptr ) {
175+ BatchResult batch_result;
176+ if (callbacks->Execute (env.binary , {original_crashy_input}, batch_result)) {
177+ return absl::NotFoundError (" The original crashy input did not crash" );
178+ }
179+ if (env.minimize_crash_with_signature ) {
180+ owned_crash_signature =
181+ std::make_unique<std::string>(batch_result.failure_signature ());
182+ crash_signature = owned_crash_signature.get ();
183+ }
148184 }
149185
150186 FUZZTEST_LOG (INFO) << " Starting the crash minimization loop in "
151- << env.num_threads << " threads" ;
187+ << env.num_threads << " threads" ;
152188
153- MinimizerWorkQueue queue (WorkDir{env}. CrashReproducerDirPaths (). MyShard (),
154- original_crashy_input);
189+ // Minimize with 20 intermediate crashers empirically - may be adjusted later.
190+ MinimizerState state ( /* capacity= */ 20 , original_crashy_input);
155191
156192 {
157193 ThreadPool threads{static_cast <int >(env.num_threads )};
158194 for (size_t i = 0 ; i < env.num_threads ; ++i) {
159- threads.Schedule ([&env, &callbacks_factory, &queue ]() {
160- MinimizeCrash (env, callbacks_factory, queue );
195+ threads.Schedule ([&env, &callbacks_factory, crash_signature, &state ]() {
196+ MinimizeCrash (env, callbacks_factory, crash_signature, state );
161197 });
162198 }
163199 } // The threads join here.
164200
165- return queue.SmallerCrashesFound () ? EXIT_SUCCESS : EXIT_FAILURE;
201+ auto crasher_and_details = state.GetMinCrasherAndDetails ();
202+ if (!crasher_and_details.has_value ()) {
203+ return absl::NotFoundError (" no minimized crash found" );
204+ }
205+
206+ auto [crasher, details] = *std::move (crasher_and_details);
207+ const auto output_dir_path = std::filesystem::path{output_dir};
208+ std::error_code ec;
209+ std::filesystem::create_directories (output_dir_path, ec);
210+ if (ec) {
211+ return absl::InternalError (absl::StrCat (" failed to create directory path " ,
212+ output_dir, " : " , ec.message ()));
213+ }
214+ details.input_path = output_dir_path / details.input_signature ;
215+ const auto status = RemoteFileSetContents (details.input_path , crasher);
216+ if (!status.ok ()) {
217+ return status;
218+ }
219+ return details;
166220}
167221
168222} // namespace fuzztest::internal
0 commit comments