3232from clusterfuzz ._internal .metrics import logs
3333from clusterfuzz ._internal .system import environment
3434from clusterfuzz ._internal .system import new_process
35+ from clusterfuzz ._internal .system import shell
3536from clusterfuzz .fuzz import engine
3637from clusterfuzz .stacktraces import constants as stacktraces_constants
3738
@@ -46,6 +47,17 @@ class CentipedeError(Exception):
4647 """Base exception class."""
4748
4849
50+ class CentipedeOptions (engine .FuzzOptions ):
51+ """Centipede engine options."""
52+
53+ def __init__ (self , corpus_dir , arguments , strategies , workdir ,
54+ new_corpus_dir ):
55+ super ().__init__ (corpus_dir , arguments , strategies )
56+ # Directory to add new units
57+ self .new_corpus_dir = new_corpus_dir
58+ self .workdir = workdir
59+
60+
4961def _get_runner (target_path ):
5062 """Gets the Centipede runner."""
5163 centipede_path = pathlib .Path (target_path ).parent / 'centipede'
@@ -198,11 +210,15 @@ def prepare(self, corpus_dir, target_path, build_dir):
198210 # 1. Centipede-readable corpus file;
199211 # 2. Centipede-readable feature file;
200212 # 3. Crash reproducing inputs.
201- workdir = self . _create_temp_dir ('workdir' )
213+ workdir = engine_common . create_temp_fuzzing_dir ('workdir' )
202214 arguments [constants .WORKDIR_FLAGNAME ] = str (workdir )
203215
204- # Directory corpus_dir saves the corpus files required by ClusterFuzz.
205- arguments [constants .CORPUS_DIR_FLAGNAME ] = corpus_dir
216+ # Directory to place new units. While fuzzing, the new corpus
217+ # elements are written to the first dir in the list of corpus directories.
218+ new_corpus_dir = engine_common .create_temp_fuzzing_dir ('new' )
219+ corpus_dirs = [new_corpus_dir , corpus_dir ]
220+ arguments [constants .CORPUS_DIR_FLAGNAME ] = ',' .join (
221+ dir for dir in corpus_dirs )
206222
207223 target_binaries = self ._get_binary_paths (target_path )
208224 if target_binaries .unsanitized is None :
@@ -214,7 +230,8 @@ def prepare(self, corpus_dir, target_path, build_dir):
214230 arguments [constants .EXTRA_BINARIES_FLAGNAME ] = str (
215231 target_binaries .sanitized )
216232
217- return engine .FuzzOptions (corpus_dir , arguments .list (), {})
233+ return CentipedeOptions (corpus_dir , arguments .list (), {}, workdir ,
234+ new_corpus_dir )
218235
219236 def _get_binary_paths (self , target_path ):
220237 """Gets the paths to the main and auxiliary binaries based on |target_path|
@@ -284,11 +301,44 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
284301 runner = _get_runner (target_path )
285302 _set_sanitizer_options (target_path )
286303 timeout = max_time + _CLEAN_EXIT_SECS
304+
305+ old_corpus_len = shell .get_directory_file_count (options .corpus_dir )
306+ logs .info (f'Corpus length before fuzzing: { old_corpus_len } ' )
307+
287308 fuzz_result = runner .run_and_wait (
288309 additional_args = options .arguments , timeout = timeout )
289310 log_lines = fuzz_result .output .splitlines ()
290311 fuzz_result .output = Engine .trim_logs (fuzz_result .output )
291312
313+ workdir = options .workdir
314+
315+ corpus_minimization_failed = False
316+ try :
317+ time_for_minimize = timeout - fuzz_result .time_executed
318+
319+ self .minimize_corpus (
320+ target_path = target_path ,
321+ arguments = [],
322+ # New units, in addition to the main corpus units,
323+ # are placed in new_corpus_dir. Minimize and merge back
324+ # to the main corpus_dir.
325+ input_dirs = [options .new_corpus_dir ],
326+ output_dir = options .corpus_dir ,
327+ reproducers_dir = reproducers_dir ,
328+ max_time = time_for_minimize ,
329+ # Use the same workdir that was used for fuzzing.
330+ # This allows us to skip rerunning the fuzzing inputs.
331+ workdir = workdir )
332+ except Exception as e :
333+ corpus_minimization_failed = True
334+ logs .error (f'corpus minimization failed: { e } ' )
335+
336+ if corpus_minimization_failed :
337+ # If we fail to minimize, fall back to moving the new units
338+ # from the new corpus_dir to the main corpus_dir.
339+ engine_common .move_mergeable_units (options .new_corpus_dir ,
340+ options .corpus_dir )
341+
292342 reproducer_path = _get_reproducer_path (fuzz_result .output , reproducers_dir )
293343 crashes = []
294344 if reproducer_path :
@@ -298,11 +348,7 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
298348 int (fuzz_result .time_executed )))
299349
300350 stats_filename = f'fuzzing-stats-{ os .path .basename (target_path )} .000000.csv'
301- args = fuzzer_options .FuzzerArguments .from_list (options .arguments )
302- assert args is not None
303- assert constants .WORKDIR_FLAGNAME in args
304351
305- workdir = args [constants .WORKDIR_FLAGNAME ]
306352 stats_file = os .path .join (workdir , stats_filename )
307353 stats = _parse_centipede_stats (stats_file )
308354 if not stats :
@@ -321,6 +367,11 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
321367 num_execs_avg = stats .get ('NumExecs_Avg' , 0.0 )
322368 stats ['average_exec_per_sec' ] = num_execs_avg / fuzz_time_secs_avg
323369 stats .update (_parse_centipede_logs (log_lines ))
370+
371+ new_corpus_len = shell .get_directory_file_count (options .corpus_dir )
372+ logs .info (f'Corpus length after fuzzing: { new_corpus_len } ' )
373+ new_units_added = new_corpus_len - old_corpus_len
374+ stats ['new_units_added' ] = new_units_added
324375 return engine .FuzzResult (fuzz_result .output , fuzz_result .command , crashes ,
325376 stats , fuzz_result .time_executed )
326377
@@ -379,14 +430,28 @@ def reproduce(self, target_path, input_path, arguments, max_time): # pylint: di
379430 return engine .ReproduceResult (result .command , result .return_code ,
380431 result .time_executed , result .output )
381432
382- def _create_temp_dir (self , name ):
383- """Creates temporary directory for fuzzing."""
384- new_directory = pathlib .Path (fuzzer_utils .get_temp_dir (), name )
385- engine_common .recreate_directory (new_directory )
386- return new_directory
433+ def _strip_fuzzing_arguments (self , arguments ):
434+ """Remove arguments only needed for fuzzing."""
435+ for argument in [
436+ constants .FORK_SERVER_FLAGNAME ,
437+ constants .MAX_LEN_FLAGNAME ,
438+ constants .RUNS_FLAGNAME ,
439+ constants .EXIT_ON_CRASH_FLAGNAME ,
440+ constants .BATCH_SIZE_FLAGNAME ,
441+ ]:
442+ if argument in arguments :
443+ del arguments [argument ]
444+
445+ return arguments
387446
388- def minimize_corpus (self , target_path , arguments , input_dirs , output_dir ,
389- reproducers_dir , max_time ):
447+ def minimize_corpus (self ,
448+ target_path ,
449+ arguments ,
450+ input_dirs ,
451+ output_dir ,
452+ reproducers_dir ,
453+ max_time ,
454+ workdir = None ):
390455 """Runs corpus minimization.
391456 Args:
392457 target_path: Path to the target.
@@ -401,16 +466,29 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
401466 A FuzzResult object.
402467 """
403468 runner = _get_runner (target_path )
469+ _set_sanitizer_options (target_path )
470+
471+ minimize_arguments = self ._get_arguments (target_path )
472+ self ._strip_fuzzing_arguments (minimize_arguments )
473+ environment .set_value ('ASAN_OPTIONS' , 'detect_odr_violation=0' )
404474
405475 # Step 1: Generate corpus file for Centipede.
406- full_corpus_workdir = self ._create_temp_dir ('full_corpus_workdir' )
476+ # When calling this during a fuzzing session, use the existing workdir.
477+ # This avoids us having to re-run inputs and waste time unnecessarily.
478+ # This saves a lot of time when the input corpus contains thousands
479+ # of files.
480+ full_corpus_workdir = workdir
481+ if not full_corpus_workdir :
482+ full_corpus_workdir = engine_common .create_temp_fuzzing_dir (
483+ 'full_corpus_workdir' )
407484 input_dirs_param = ',' .join (str (dir ) for dir in input_dirs )
408- args = [
485+ args = minimize_arguments . list () + [
409486 f'--workdir={ full_corpus_workdir } ' ,
410487 f'--binary={ target_path } ' ,
411488 f'--corpus_dir={ input_dirs_param } ' ,
412489 '--num_runs=0' ,
413490 ]
491+ logs .info (f'Running Generate Corpus file for Centipede with args: { args } ' )
414492 result = runner .run_and_wait (additional_args = args , timeout = max_time )
415493 max_time -= result .time_executed
416494
@@ -422,11 +500,12 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
422500 raise TimeoutError ('Minimization timed out.' )
423501
424502 # Step 2: Distill.
425- args = [
503+ args = minimize_arguments . list () + [
426504 f'--workdir={ full_corpus_workdir } ' ,
427505 f'--binary={ target_path } ' ,
428- '--distill' ,
506+ '--distill=true ' ,
429507 ]
508+ logs .info (f'Running Corpus Distillation with args: { args } ' )
430509 result = runner .run_and_wait (additional_args = args , timeout = max_time )
431510 max_time -= result .time_executed
432511
@@ -438,17 +517,21 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
438517
439518 # Step 3: Generate corpus files for output_dir.
440519 os .makedirs (output_dir , exist_ok = True )
441- minimized_corpus_workdir = self ._create_temp_dir ('minimized_corpus_workdir' )
520+ minimized_corpus_workdir = engine_common .create_temp_fuzzing_dir (
521+ 'minimized_corpus_workdir' )
522+ logs .info (f'Created a temporary minimized corpus '
523+ f'workdir { minimized_corpus_workdir } ' )
442524 distilled_file = os .path .join (
443525 full_corpus_workdir ,
444526 f'distilled-{ os .path .basename (target_path )} .000000' )
445527 corpus_file = os .path .join (minimized_corpus_workdir , 'corpus.000000' )
446528 shutil .copyfile (distilled_file , corpus_file )
447529
448- args = [
530+ args = minimize_arguments . list () + [
449531 f'--workdir={ minimized_corpus_workdir } ' ,
450532 f'--corpus_to_files={ output_dir } ' ,
451533 ]
534+ logs .info (f'Converting corpus to files with the following args: { args } ' )
452535 result = runner .run_and_wait (additional_args = args , timeout = max_time )
453536
454537 if result .timed_out or max_time < 0 :
@@ -461,11 +544,16 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
461544 # Step 4: Copy reproducers from full_corpus_workdir.
462545 os .makedirs (reproducers_dir , exist_ok = True )
463546 crashes_dir = os .path .join (full_corpus_workdir , 'crashes' )
464- for file in os .listdir (crashes_dir ):
465- crasher_path = os .path .join (crashes_dir , file )
466- shutil .copy (crasher_path , reproducers_dir )
467- shutil .rmtree (full_corpus_workdir )
547+
548+ if os .path .exists (crashes_dir ):
549+ for file in os .listdir (crashes_dir ):
550+ crasher_path = os .path .join (crashes_dir , file )
551+ shutil .copy (crasher_path , reproducers_dir )
552+
468553 shutil .rmtree (minimized_corpus_workdir )
554+ if not workdir :
555+ # Only remove this directory if it was created in this method.
556+ shutil .rmtree (full_corpus_workdir )
469557
470558 return engine .ReproduceResult (result .command , result .return_code ,
471559 result .time_executed , result .output )
@@ -507,7 +595,7 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
507595 TimeoutError: If the testcase minimization exceeds max_time.
508596 """
509597 runner = _get_runner (target_path )
510- workdir = self . _create_temp_dir ('workdir' )
598+ workdir = engine_common . create_temp_fuzzing_dir ('workdir' )
511599 args = [
512600 f'--binary={ target_path } ' ,
513601 f'--workdir={ workdir } ' ,
0 commit comments