3232from clusterfuzz ._internal .metrics import logs
3333from clusterfuzz ._internal .system import environment
3434from clusterfuzz ._internal .system import new_process
35+ from clusterfuzz ._internal .system import shell
3536from clusterfuzz .fuzz import engine
3637from clusterfuzz .stacktraces import constants as stacktraces_constants
3738
@@ -46,6 +47,17 @@ class CentipedeError(Exception):
4647 """Base exception class."""
4748
4849
50+ class CentipedeOptions (engine .FuzzOptions ):
51+ """Centipede engine options."""
52+
53+ def __init__ (self , corpus_dir , arguments , strategies , workdir ,
54+ new_corpus_dir ):
55+ super ().__init__ (corpus_dir , arguments , strategies )
56+ # Directory to add new units
57+ self .new_corpus_dir = new_corpus_dir
58+ self .workdir = workdir
59+
60+
4961def _get_runner (target_path ):
5062 """Gets the Centipede runner."""
5163 centipede_path = pathlib .Path (target_path ).parent / 'centipede'
@@ -198,11 +210,13 @@ def prepare(self, corpus_dir, target_path, build_dir):
198210 # 1. Centipede-readable corpus file;
199211 # 2. Centipede-readable feature file;
200212 # 3. Crash reproducing inputs.
201- workdir = self . _create_temp_dir ('workdir' )
213+ workdir = engine_common . create_temp_fuzzing_dir ('workdir' )
202214 arguments [constants .WORKDIR_FLAGNAME ] = str (workdir )
203215
204- # Directory corpus_dir saves the corpus files required by ClusterFuzz.
205- arguments [constants .CORPUS_DIR_FLAGNAME ] = corpus_dir
216+ # Directory to place new units. While fuzzing, the new corpus
217+ # elements are written to the first dir in the list of corpus directories.
218+ new_corpus_dir = engine_common .create_temp_fuzzing_dir ('new' )
219+ arguments [constants .CORPUS_DIR_FLAGNAME ] = f'{ new_corpus_dir } ,{ corpus_dir } '
206220
207221 target_binaries = self ._get_binary_paths (target_path )
208222 if target_binaries .unsanitized is None :
@@ -214,7 +228,8 @@ def prepare(self, corpus_dir, target_path, build_dir):
214228 arguments [constants .EXTRA_BINARIES_FLAGNAME ] = str (
215229 target_binaries .sanitized )
216230
217- return engine .FuzzOptions (corpus_dir , arguments .list (), {})
231+ return CentipedeOptions (corpus_dir , arguments .list (), {}, workdir ,
232+ new_corpus_dir )
218233
219234 def _get_binary_paths (self , target_path ):
220235 """Gets the paths to the main and auxiliary binaries based on |target_path|
@@ -284,11 +299,42 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
284299 runner = _get_runner (target_path )
285300 _set_sanitizer_options (target_path )
286301 timeout = max_time + _CLEAN_EXIT_SECS
302+
303+ old_corpus_len = shell .get_directory_file_count (options .corpus_dir )
304+ logs .info (f'Corpus length before fuzzing: { old_corpus_len } ' )
305+
287306 fuzz_result = runner .run_and_wait (
288307 additional_args = options .arguments , timeout = timeout )
289308 log_lines = fuzz_result .output .splitlines ()
290309 fuzz_result .output = Engine .trim_logs (fuzz_result .output )
291310
311+ workdir = options .workdir
312+
313+ try :
314+ time_for_minimize = timeout - fuzz_result .time_executed
315+
316+ self .minimize_corpus (
317+ target_path = target_path ,
318+ arguments = [],
319+ # New units, in addition to the main corpus units,
320+ # are placed in new_corpus_dir. Minimize and merge back
321+ # to the main corpus_dir.
322+ input_dirs = [options .new_corpus_dir ],
323+ output_dir = options .corpus_dir ,
324+ reproducers_dir = reproducers_dir ,
325+ max_time = time_for_minimize ,
326+ # Use the same workdir that was used for fuzzing.
327+ # This allows us to skip rerunning the fuzzing inputs.
328+ workdir = workdir )
329+ except :
330+ # TODO(alhijazi): Convert to a warning if this becomes a problem
331+ # caused by user code rather than by ClusterFuzz or Centipede.
332+ logs .error ('Corpus minimization failed.' )
333+ # If we fail to minimize, fall back to moving the new units
334+ # from the new corpus_dir to the main corpus_dir.
335+ engine_common .move_mergeable_units (options .new_corpus_dir ,
336+ options .corpus_dir )
337+
292338 reproducer_path = _get_reproducer_path (fuzz_result .output , reproducers_dir )
293339 crashes = []
294340 if reproducer_path :
@@ -298,11 +344,7 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
298344 int (fuzz_result .time_executed )))
299345
300346 stats_filename = f'fuzzing-stats-{ os .path .basename (target_path )} .000000.csv'
301- args = fuzzer_options .FuzzerArguments .from_list (options .arguments )
302- assert args is not None
303- assert constants .WORKDIR_FLAGNAME in args
304347
305- workdir = args [constants .WORKDIR_FLAGNAME ]
306348 stats_file = os .path .join (workdir , stats_filename )
307349 stats = _parse_centipede_stats (stats_file )
308350 if not stats :
@@ -321,6 +363,11 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
321363 num_execs_avg = stats .get ('NumExecs_Avg' , 0.0 )
322364 stats ['average_exec_per_sec' ] = num_execs_avg / fuzz_time_secs_avg
323365 stats .update (_parse_centipede_logs (log_lines ))
366+
367+ new_corpus_len = shell .get_directory_file_count (options .corpus_dir )
368+ logs .info (f'Corpus length after fuzzing: { new_corpus_len } ' )
369+ new_units_added = new_corpus_len - old_corpus_len
370+ stats ['new_units_added' ] = new_units_added
324371 return engine .FuzzResult (fuzz_result .output , fuzz_result .command , crashes ,
325372 stats , fuzz_result .time_executed )
326373
@@ -379,14 +426,28 @@ def reproduce(self, target_path, input_path, arguments, max_time): # pylint: di
379426 return engine .ReproduceResult (result .command , result .return_code ,
380427 result .time_executed , result .output )
381428
382- def _create_temp_dir (self , name ):
383- """Creates temporary directory for fuzzing."""
384- new_directory = pathlib .Path (fuzzer_utils .get_temp_dir (), name )
385- engine_common .recreate_directory (new_directory )
386- return new_directory
429+ def _strip_fuzzing_arguments (self , arguments ):
430+ """Remove arguments only needed for fuzzing."""
431+ for argument in [
432+ constants .FORK_SERVER_FLAGNAME ,
433+ constants .MAX_LEN_FLAGNAME ,
434+ constants .NUM_RUNS_FLAGNAME ,
435+ constants .EXIT_ON_CRASH_FLAGNAME ,
436+ constants .BATCH_SIZE_FLAGNAME ,
437+ ]:
438+ if argument in arguments :
439+ del arguments [argument ]
440+
441+ return arguments
387442
388- def minimize_corpus (self , target_path , arguments , input_dirs , output_dir ,
389- reproducers_dir , max_time ):
443+ def minimize_corpus (self ,
444+ target_path ,
445+ arguments ,
446+ input_dirs ,
447+ output_dir ,
448+ reproducers_dir ,
449+ max_time ,
450+ workdir = None ):
390451 """Runs corpus minimization.
391452 Args:
392453 target_path: Path to the target.
@@ -401,16 +462,29 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
401462 A FuzzResult object.
402463 """
403464 runner = _get_runner (target_path )
465+ _set_sanitizer_options (target_path )
466+
467+ minimize_arguments = self ._get_arguments (target_path )
468+ self ._strip_fuzzing_arguments (minimize_arguments )
469+ environment .set_value ('ASAN_OPTIONS' , 'detect_odr_violation=0' )
404470
405471 # Step 1: Generate corpus file for Centipede.
406- full_corpus_workdir = self ._create_temp_dir ('full_corpus_workdir' )
472+ # When calling this during a fuzzing session, use the existing workdir.
473+ # This avoids us having to re-run inputs and waste time unnecessarily.
474+ # This saves a lot of time when the input corpus contains thousands
475+ # of files.
476+ full_corpus_workdir = workdir
477+ if not full_corpus_workdir :
478+ full_corpus_workdir = engine_common .create_temp_fuzzing_dir (
479+ 'full_corpus_workdir' )
407480 input_dirs_param = ',' .join (str (dir ) for dir in input_dirs )
408- args = [
481+ args = minimize_arguments . list () + [
409482 f'--workdir={ full_corpus_workdir } ' ,
410483 f'--binary={ target_path } ' ,
411484 f'--corpus_dir={ input_dirs_param } ' ,
412485 '--num_runs=0' ,
413486 ]
487+ logs .info (f'Running Generate Corpus file for Centipede with args: { args } ' )
414488 result = runner .run_and_wait (additional_args = args , timeout = max_time )
415489 max_time -= result .time_executed
416490
@@ -422,11 +496,12 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
422496 raise TimeoutError ('Minimization timed out.' )
423497
424498 # Step 2: Distill.
425- args = [
499+ args = minimize_arguments . list () + [
426500 f'--workdir={ full_corpus_workdir } ' ,
427501 f'--binary={ target_path } ' ,
428- '--distill' ,
502+ '--distill=true ' ,
429503 ]
504+ logs .info (f'Running Corpus Distillation with args: { args } ' )
430505 result = runner .run_and_wait (additional_args = args , timeout = max_time )
431506 max_time -= result .time_executed
432507
@@ -438,17 +513,21 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
438513
439514 # Step 3: Generate corpus files for output_dir.
440515 os .makedirs (output_dir , exist_ok = True )
441- minimized_corpus_workdir = self ._create_temp_dir ('minimized_corpus_workdir' )
516+ minimized_corpus_workdir = engine_common .create_temp_fuzzing_dir (
517+ 'minimized_corpus_workdir' )
518+ logs .info (f'Created a temporary minimized corpus '
519+ f'workdir { minimized_corpus_workdir } ' )
442520 distilled_file = os .path .join (
443521 full_corpus_workdir ,
444522 f'distilled-{ os .path .basename (target_path )} .000000' )
445523 corpus_file = os .path .join (minimized_corpus_workdir , 'corpus.000000' )
446524 shutil .copyfile (distilled_file , corpus_file )
447525
448- args = [
526+ args = minimize_arguments . list () + [
449527 f'--workdir={ minimized_corpus_workdir } ' ,
450528 f'--corpus_to_files={ output_dir } ' ,
451529 ]
530+ logs .info (f'Converting corpus to files with the following args: { args } ' )
452531 result = runner .run_and_wait (additional_args = args , timeout = max_time )
453532
454533 if result .timed_out or max_time < 0 :
@@ -461,11 +540,16 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
461540 # Step 4: Copy reproducers from full_corpus_workdir.
462541 os .makedirs (reproducers_dir , exist_ok = True )
463542 crashes_dir = os .path .join (full_corpus_workdir , 'crashes' )
464- for file in os .listdir (crashes_dir ):
465- crasher_path = os .path .join (crashes_dir , file )
466- shutil .copy (crasher_path , reproducers_dir )
467- shutil .rmtree (full_corpus_workdir )
543+
544+ if os .path .exists (crashes_dir ):
545+ for file in os .listdir (crashes_dir ):
546+ crasher_path = os .path .join (crashes_dir , file )
547+ shutil .copy (crasher_path , reproducers_dir )
548+
468549 shutil .rmtree (minimized_corpus_workdir )
550+ if not workdir :
551+ # Only remove this directory if it was created in this method.
552+ shutil .rmtree (full_corpus_workdir )
469553
470554 return engine .ReproduceResult (result .command , result .return_code ,
471555 result .time_executed , result .output )
@@ -507,7 +591,7 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
507591 TimeoutError: If the testcase minimization exceeds max_time.
508592 """
509593 runner = _get_runner (target_path )
510- workdir = self . _create_temp_dir ('workdir' )
594+ workdir = engine_common . create_temp_fuzzing_dir ('workdir' )
511595 args = [
512596 f'--binary={ target_path } ' ,
513597 f'--workdir={ workdir } ' ,
0 commit comments