Skip to content

Commit 2a57ba9

Browse files
tmp
1 parent 9b0fe2f commit 2a57ba9

File tree

1 file changed

+112
-97
lines changed

1 file changed

+112
-97
lines changed

src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py

Lines changed: 112 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -299,23 +299,51 @@ def _cross_pollinate_other_fuzzer_corpuses(self):
299299
'Failed to unpack corpus backup from url %s.' % corpus_backup_url)
300300

301301

302-
class Runner:
303-
"""Runner for libFuzzer."""
304-
302+
class BaseRunner:
303+
"""Base Runner"""
305304
def __init__(self, build_directory, context):
306305
self.build_directory = build_directory
307306
self.context = context
308-
307+
309308
self.target_path = engine_common.find_fuzzer_path(
310-
self.build_directory, self.context.fuzz_target.binary)
309+
self.build_directory, self.context.fuzz_target.binary)
311310
if not self.target_path:
312311
raise CorpusPruningError(
313-
'Failed to get fuzzer path for %s.' % self.context.fuzz_target.binary)
314-
312+
f'Failed to get fuzzer path for {self.context.fuzz_target.binary}')
315313
self.fuzzer_options = options.get_fuzz_target_options(self.target_path)
316314

317-
def get_libfuzzer_flags(self):
318-
"""Get default libFuzzer options."""
315+
def get_fuzzer_flags(self):
316+
return []
317+
318+
def process_sanitizer_options(self):
319+
"""Process sanitizer options overrides."""
320+
if not self.fuzzer_options:
321+
return
322+
323+
# Only need to look as ASan, as that's what we prune with.
324+
overrides = self.fuzzer_options.get_asan_options()
325+
if not overrides:
326+
return
327+
328+
asan_options = environment.get_memory_tool_options('ASAN_OPTIONS')
329+
if not asan_options:
330+
return
331+
asan_options.update(overrides)
332+
environment.set_memory_tool_options('ASAN_OPTIONS', asan_options)
333+
334+
def reproduce(self, input_path, arguments, max_time):
335+
return self.context.engine.reproduce(self.target_path, input_path, arguments, max_time)
336+
337+
def minimize_corpus(self, arguments, input_dirs, output_dir, reproducers_dir, max_time):
338+
return self.context.engine.minimize_corpus(self.target_path, arguments,
339+
input_dirs, output_dir, reproducers_dir, max_time)
340+
341+
342+
class LibFuzzerRunner(Runner):
343+
"""Runner for libFuzzer."""
344+
345+
def get_fuzzer_flags(self):
346+
"""Get default libFuzzer options for pruning."""
319347
rss_limit = RSS_LIMIT
320348
max_len = engine_common.CORPUS_INPUT_SIZE_LIMIT
321349
detect_leaks = 1
@@ -350,22 +378,6 @@ def get_libfuzzer_flags(self):
350378

351379
return arguments.list()
352380

353-
def process_sanitizer_options(self):
354-
"""Process sanitizer options overrides."""
355-
if not self.fuzzer_options:
356-
return
357-
358-
# Only need to look as ASan, as that's what we prune with.
359-
overrides = self.fuzzer_options.get_asan_options()
360-
if not overrides:
361-
return
362-
363-
asan_options = environment.get_memory_tool_options('ASAN_OPTIONS')
364-
if not asan_options:
365-
return
366-
asan_options.update(overrides)
367-
environment.set_memory_tool_options('ASAN_OPTIONS', asan_options)
368-
369381
def reproduce(self, input_path, arguments, max_time):
370382
return self.context.engine.reproduce(self.target_path, input_path,
371383
arguments, max_time)
@@ -377,33 +389,77 @@ def minimize_corpus(self, arguments, input_dirs, output_dir, reproducers_dir,
377389
reproducers_dir, max_time)
378390

379391

380-
class CorpusPruner:
381-
"""Class that handles corpus pruning."""
392+
class GenericRunner(BaseRunner):
393+
"""Runner implementation for Centipede fuzzing engine."""
382394

395+
396+
class CorpusPrunerBase:
397+
"""Base class for corpus pruning that is engine‐agnostic."""
383398
def __init__(self, runner):
384399
self.runner = runner
385-
self.context = self.runner.context
400+
self.context = runner.context
401+
402+
def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
403+
if not shell.get_directory_file_count(initial_corpus_path):
404+
# Empty corpus, nothing to do.
405+
return None
406+
407+
# Unpack seed corpus if needed.
408+
engine_common.unpack_seed_corpus_if_needed(
409+
self.runner.target_path, initial_corpus_path, force_unpack=True)
386410

411+
environment.reset_current_memory_tool_options(
412+
redzone_size=MIN_REDZONE, leaks=True)
413+
self.runner.process_sanitizer_options()
414+
415+
additional_args = self.runner.get_fuzzer_flags()
416+
logs.info('Running merge...')
417+
try:
418+
result = self.runner.minimize_corpus(
419+
additional_args, [initial_corpus_path], minimized_corpus_path,
420+
bad_units_path, CORPUS_PRUNING_TIMEOUT)
421+
except TimeoutError as e:
422+
raise CorpusPruningError(
423+
'Corpus pruning timed out while minimizing corpus\n' + repr(e))
424+
except engine.Error as e:
425+
raise CorpusPruningError(
426+
'Corpus pruning failed to minimize corpus\n' + repr(e))
427+
428+
symbolized_output = stack_symbolizer.symbolize_stacktrace(result.logs)
429+
430+
if not shell.get_directory_file_count(minimized_corpus_path):
431+
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
432+
symbolized_output)
433+
434+
logs.info('Corpus merge finished successfully.',
435+
output=symbolized_output)
436+
return result.stats
437+
438+
def process_bad_units(self, bad_units_path, quarantine_corpus_path):
439+
return {}
440+
441+
442+
class LibFuzzerPruner(CorpusPrunerBase):
443+
"""
444+
LibFuzzerPruner is a specialized pruner for libFuzzer that handles
445+
quarantining of problematic units and related special cases.
446+
"""
387447
def _run_single_unit(self, unit_path):
388-
"""Run a single unit, and return the result."""
389-
arguments = self.runner.get_libfuzzer_flags()
448+
arguments = self.runner.get_fuzzer_flags() # Expect libFuzzer flags.
390449
return self.runner.reproduce(unit_path, arguments, SINGLE_UNIT_TIMEOUT)
391450

392451
def _quarantine_unit(self, unit_path, quarantine_corpus_path):
393-
"""Moves the given unit to the quarantine, and returns the path to the unit
394-
in the quarantine."""
395-
quarantined_unit_path = os.path.join(quarantine_corpus_path,
396-
os.path.basename(unit_path))
452+
quarantined_unit_path = os.path.join(
453+
quarantine_corpus_path, os.path.basename(unit_path))
397454
shutil.move(unit_path, quarantined_unit_path)
398-
399455
return quarantined_unit_path
400456

401-
def process_bad_units(self, bad_units_path, quarantine_corpus_path
402-
) -> Dict[str, uworker_msg_pb2.CrashInfo]: # pylint: disable=no-member
403-
"""Process bad units found during merge."""
404-
# TODO(ochang): A lot of this function is similar to parts of fuzz_task.
405-
# Ideally fuzz_task can be refactored in a way that lets us share the common
406-
# code.
457+
def process_bad_units(self, bad_units_path, quarantine_corpus_path):
458+
"""
459+
Process bad units by running each test case individually,
460+
quarantining those that timeout, OOM, or crash due to memory sanitizer
461+
errors.
462+
"""
407463
crashes = {}
408464

409465
environment.reset_current_memory_tool_options(redzone_size=DEFAULT_REDZONE)
@@ -413,94 +469,52 @@ def process_bad_units(self, bad_units_path, quarantine_corpus_path
413469
corpus_file_paths = _get_corpus_file_paths(bad_units_path)
414470
num_bad_units = 0
415471

416-
# Run each corpus item individually.
417472
for i, unit_path in enumerate(corpus_file_paths, 1):
418473
if i % 100 == 0:
419474
logs.info('Up to %d' % i)
420475

421476
unit_name = os.path.basename(unit_path)
422477
if unit_name.startswith('timeout-') or unit_name.startswith('oom-'):
423-
# Don't waste time re-running timeout or oom testcases.
478+
# Immediately quarantine timeouts/oom testcases.
424479
self._quarantine_unit(unit_path, quarantine_corpus_path)
425480
num_bad_units += 1
426481
continue
427482

428483
try:
429484
result = self._run_single_unit(unit_path)
430485
except TimeoutError:
431-
# Slow unit. Quarantine it.
432486
self._quarantine_unit(unit_path, quarantine_corpus_path)
433487
num_bad_units += 1
434488
continue
435489

436490
if not crash_analyzer.is_memory_tool_crash(result.output):
437-
# Didn't crash.
438491
continue
439492

440-
# Get memory tool crash information.
441493
state = stack_analyzer.get_crash_data(result.output, symbolize_flag=True)
442494

443-
# Crashed or caused a leak. Quarantine it.
495+
# Quarantine the crashing unit.
444496
unit_path = self._quarantine_unit(unit_path, quarantine_corpus_path)
445497
num_bad_units += 1
446498

447499
if crash_analyzer.ignore_stacktrace(state.crash_stacktrace):
448500
continue
449501

450-
# Local de-duplication.
451502
if state.crash_state not in crashes:
452503
security_flag = crash_analyzer.is_security_issue(
453-
state.crash_stacktrace, state.crash_type, state.crash_address)
454-
crashes[state.crash_state] = uworker_msg_pb2.CrashInfo( # pylint: disable=no-member
455-
crash_state=state.crash_state,
456-
crash_type=state.crash_type,
457-
crash_address=state.crash_address,
458-
crash_stacktrace=state.crash_stacktrace,
459-
unit_path=unit_path,
460-
security_flag=security_flag)
461-
462-
logs.info(
463-
f'Found {num_bad_units} bad units, {len(crashes)} unique crashes.')
504+
state.crash_stacktrace, state.crash_type, state.crash_address)
505+
crashes[state.crash_state] = uworker_msg_pb2.CrashInfo(
506+
crash_state=state.crash_state,
507+
crash_type=state.crash_type,
508+
crash_address=state.crash_address,
509+
crash_stacktrace=state.crash_stacktrace,
510+
unit_path=unit_path,
511+
security_flag=security_flag)
512+
logs.info('Found %d bad units, %d unique crashes.' %
513+
(num_bad_units, len(crashes)))
464514
return crashes
465515

466-
def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
467-
"""Run corpus pruning. Output result to directory."""
468-
if not shell.get_directory_file_count(initial_corpus_path):
469-
# Empty corpus, nothing to do.
470-
return None
471-
472-
# Set memory tool options and fuzzer arguments.
473-
engine_common.unpack_seed_corpus_if_needed(
474-
self.runner.target_path, initial_corpus_path, force_unpack=True)
475-
476-
environment.reset_current_memory_tool_options(
477-
redzone_size=MIN_REDZONE, leaks=True)
478-
self.runner.process_sanitizer_options()
479-
additional_args = self.runner.get_libfuzzer_flags()
480-
481-
# Execute fuzzer with arguments for corpus pruning.
482-
logs.info('Running merge...')
483-
try:
484-
result = self.runner.minimize_corpus(
485-
additional_args, [initial_corpus_path], minimized_corpus_path,
486-
bad_units_path, CORPUS_PRUNING_TIMEOUT)
487-
except TimeoutError as e:
488-
raise CorpusPruningError(
489-
'Corpus pruning timed out while minimizing corpus\n' + repr(e))
490-
except engine.Error as e:
491-
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
492-
repr(e))
493-
494-
symbolized_output = stack_symbolizer.symbolize_stacktrace(result.logs)
495-
496-
# Sanity check that there are files in minimized corpus after merging.
497-
if not shell.get_directory_file_count(minimized_corpus_path):
498-
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
499-
symbolized_output)
500-
501-
logs.info('Corpus merge finished successfully.', output=symbolized_output)
502-
503-
return result.stats
516+
class GenericPruner(BasePruner):
517+
"""Generic pruner."""
504518

505519

506520
class CrossPollinator:
@@ -593,6 +607,7 @@ def _record_cross_pollination_stats(output):
593607
client = big_query.Client(
594608
dataset_id='main', table_id='cross_pollination_statistics')
595609
client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
610+
596611

597612

598613
def do_corpus_pruning(uworker_input, context, revision) -> CorpusPruningResult:

0 commit comments

Comments
 (0)