Skip to content

Commit 98f7d7e

Browse files
authored
[centipede] add support for gathering stats for centipede runs (#4583)
This PR adds supports so that CF supports Centipede stats. This will help understand better how centipede fuzzers are performing on ClusterFuzz.
1 parent 14106f9 commit 98f7d7e

File tree

4 files changed

+586
-7
lines changed

4 files changed

+586
-7
lines changed

src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py

Lines changed: 94 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,15 @@
1414
"""Centipede engine interface."""
1515

1616
from collections import namedtuple
17+
import csv
1718
import os
1819
import pathlib
1920
import re
2021
import shutil
22+
from typing import Dict
23+
from typing import List
24+
from typing import Optional
25+
from typing import Union
2126

2227
from clusterfuzz._internal.bot.fuzzers import dictionary_manager
2328
from clusterfuzz._internal.bot.fuzzers import engine_common
@@ -28,6 +33,7 @@
2833
from clusterfuzz._internal.system import environment
2934
from clusterfuzz._internal.system import new_process
3035
from clusterfuzz.fuzz import engine
36+
from clusterfuzz.stacktraces import constants as stacktraces_constants
3137

3238
_CLEAN_EXIT_SECS = 10
3339

@@ -72,9 +78,79 @@ def _set_sanitizer_options(fuzzer_path):
7278
environment.set_memory_tool_options(sanitizer_options_var, sanitizer_options)
7379

7480

81+
def _parse_centipede_stats(
82+
stats_file: str) -> Optional[Dict[str, Union[int, float]]]:
83+
"""Parses the Centipede stats file and returns a dictionary with labels
84+
and their respective values.
85+
86+
Args:
87+
stats_file: the path to Centipede stats file.
88+
89+
Returns:
90+
a dictionary containing the stats.
91+
"""
92+
try:
93+
with open(stats_file, 'r') as statsfile:
94+
csvreader = csv.reader(statsfile)
95+
rows = list(csvreader)
96+
# If the binary could not run at all, the file will be empty or with only
97+
# the column description line.
98+
if len(rows) <= 1:
99+
return None
100+
# The format we're parsing looks like this:
101+
# NumCoveredPcs_Min,NumCoveredPcs_Max,NumCoveredPcs_Avg,NumExecs_Min,[...]
102+
# 0,0,0,0,[...]
103+
# 123,1233,43234,5433
104+
# The stats a periodically dumped, hence there can be multiple lines. The
105+
# stats are cumulative, so taking the last line will give us the latest
106+
# numbers.
107+
desc = rows[0][:-1]
108+
latest_stats = rows[-1][:-1]
109+
110+
def to_number(x: str) -> Union[int, float]:
111+
return int(x) if x.isdigit() else float(x)
112+
113+
return {desc[i]: to_number(latest_stats[i]) for i in range(0, len(desc))}
114+
except Exception as e:
115+
logs.error(f'Failed to parse centipede stats file: {str(e)}')
116+
return None
117+
118+
119+
def _parse_centipede_logs(log_lines: List[str]) -> Dict[str, int]:
120+
"""Parses Centipede outputs and generates stats for it.
121+
122+
Args:
123+
log_lines: the log lines.
124+
125+
Returns:
126+
the stats.
127+
"""
128+
stats = {
129+
'crash_count': 0,
130+
'timeout_count': 0,
131+
'oom_count': 0,
132+
'leak_count': 0,
133+
}
134+
for line in log_lines:
135+
if re.search(stacktraces_constants.CENTIPEDE_TIMEOUT_REGEX, line):
136+
stats['timeout_count'] = 1
137+
continue
138+
if re.search(stacktraces_constants.OUT_OF_MEMORY_REGEX, line):
139+
stats['oom_count'] = 1
140+
continue
141+
if re.search(CRASH_REGEX, line):
142+
stats['crash_count'] = 1
143+
continue
144+
return stats
145+
146+
75147
class Engine(engine.Engine):
76148
"""Centipede engine implementation."""
77149

150+
def __init__(self):
151+
super().__init__()
152+
self.workdir = self._create_temp_dir('workdir')
153+
78154
@property
79155
def name(self):
80156
return 'centipede'
@@ -126,8 +202,7 @@ def prepare(self, corpus_dir, target_path, build_dir):
126202
# 1. Centipede-readable corpus file;
127203
# 2. Centipede-readable feature file;
128204
# 3. Crash reproducing inputs.
129-
workdir = self._create_temp_dir('workdir')
130-
arguments[constants.WORKDIR_FLAGNAME] = str(workdir)
205+
arguments[constants.WORKDIR_FLAGNAME] = str(self.workdir)
131206

132207
# Directory corpus_dir saves the corpus files required by ClusterFuzz.
133208
arguments[constants.CORPUS_DIR_FLAGNAME] = corpus_dir
@@ -214,6 +289,7 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
214289
timeout = max_time + _CLEAN_EXIT_SECS
215290
fuzz_result = runner.run_and_wait(
216291
additional_args=options.arguments, timeout=timeout)
292+
log_lines = fuzz_result.output.splitlines()
217293
fuzz_result.output = Engine.trim_logs(fuzz_result.output)
218294

219295
reproducer_path = _get_reproducer_path(fuzz_result.output, reproducers_dir)
@@ -224,8 +300,20 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
224300
str(reproducer_path), fuzz_result.output, [],
225301
int(fuzz_result.time_executed)))
226302

227-
# Stats report is not available in Centipede yet.
228-
stats = None
303+
stats_filename = f'fuzzing-stats-{os.path.basename(target_path)}.000000.csv'
304+
stats_file = os.path.join(self.workdir, stats_filename)
305+
stats = _parse_centipede_stats(stats_file)
306+
if not stats:
307+
stats = {}
308+
actual_duration = int(
309+
stats.get('FuzzTimeSec_Avg', fuzz_result.time_executed or 0.0))
310+
fuzzing_time_percent = 100 * actual_duration / float(max_time)
311+
stats.update({
312+
'expected_duration': int(max_time),
313+
'actual_duration': actual_duration,
314+
'fuzzing_time_percent': fuzzing_time_percent,
315+
})
316+
stats.update(_parse_centipede_logs(log_lines))
229317
return engine.FuzzResult(fuzz_result.output, fuzz_result.command, crashes,
230318
stats, fuzz_result.time_executed)
231319

@@ -412,10 +500,9 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
412500
TimeoutError: If the testcase minimization exceeds max_time.
413501
"""
414502
runner = _get_runner(target_path)
415-
workdir = self._create_temp_dir('workdir')
416503
args = [
417504
f'--binary={target_path}',
418-
f'--workdir={workdir}',
505+
f'--workdir={self.workdir}',
419506
f'--minimize_crash={input_path}',
420507
f'--num_runs={constants.NUM_RUNS_PER_MINIMIZATION}',
421508
'--seed=1',
@@ -425,7 +512,7 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
425512
logs.warning(
426513
'Testcase minimization timed out.', fuzzer_output=result.output)
427514
raise TimeoutError('Minimization timed out.')
428-
minimum_testcase = self._get_smallest_crasher(workdir)
515+
minimum_testcase = self._get_smallest_crasher(self.workdir)
429516
if minimum_testcase:
430517
shutil.copyfile(minimum_testcase, output_path)
431518
else:

0 commit comments

Comments
 (0)