Skip to content

Commit 21d9af0

Browse files
authored
frontend-c: Generalise C frontend logic with other languages (#2168)
* frontend-c: Generalise C frontend logic with other languages Signed-off-by: Arthur Chan <[email protected]> * Fix logic Signed-off-by: Arthur Chan <[email protected]> * Fix formatting Signed-off-by: Arthur Chan <[email protected]> * Fix formatting Signed-off-by: Arthur Chan <[email protected]> --------- Signed-off-by: Arthur Chan <[email protected]>
1 parent 637f40c commit 21d9af0

File tree

2 files changed

+121
-170
lines changed

2 files changed

+121
-170
lines changed

src/fuzz_introspector/frontends/datatypes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ def language_specific_process(self):
8585
"""Dummy function to perform some specific processes in subclasses."""
8686
pass
8787

88+
def get_entry_function_name(self) -> str:
89+
"""Dummy function for getting the entry function name."""
90+
return ''
91+
8892
def has_libfuzzer_harness(self) -> bool:
8993
"""Dummy function for source code files."""
9094
return False

src/fuzz_introspector/frontends/oss_fuzz.py

Lines changed: 117 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -57,76 +57,6 @@ def capture_source_files_in_tree(directory_tree: str,
5757
return language_files
5858

5959

60-
def process_c_project(target_dir: str,
61-
entrypoint: str,
62-
out: str,
63-
source_files: list[str],
64-
module_only: bool = False,
65-
dump_output: bool = True) -> Project:
66-
"""Process a project in C language"""
67-
# Default entrypoint
68-
if not entrypoint:
69-
entrypoint = 'LLVMFuzzerTestOneInput'
70-
71-
logger.info('Going C route')
72-
logger.info('Found %d files to include in analysis', len(source_files))
73-
logger.info('Loading tree-sitter trees and create base project')
74-
project = frontend_c.load_treesitter_trees(source_files)
75-
76-
# We may not need to do this, but will do it while refactoring into
77-
# the new frontends.
78-
if not project.get_source_codes_with_harnesses():
79-
target = os.path.join(out, 'fuzzerLogFile-0.data.yaml')
80-
project.dump_module_logic(target, 'no-harness-in-project', '',
81-
target_dir, dump_output)
82-
target = os.path.join(out, 'full_type_defs.json')
83-
project.dump_type_definition(target, dump_output)
84-
85-
with open(os.path.join(out, 'fuzzerLogFile-0.data'), 'w') as f:
86-
f.write("Call tree\n")
87-
f.write("====================================")
88-
89-
if module_only:
90-
idx = 1
91-
target = os.path.join(out, 'report.yaml')
92-
project.dump_module_logic(target, harness_source=target_dir)
93-
target = os.path.join(out, 'full_type_defs.json')
94-
project.dump_type_definition(target, dump_output)
95-
96-
if entrypoint != 'LLVMFuzzerTestOneInput':
97-
calltree_source = project.get_source_code_with_target(entrypoint)
98-
if calltree_source:
99-
calltree = project.extract_calltree(source_code=calltree_source,
100-
function=entrypoint)
101-
with open(os.path.join(out, 'targetCalltree.txt'), 'w') as f:
102-
f.write("Call tree\n")
103-
f.write(calltree)
104-
f.write("====================================")
105-
else:
106-
for idx, harness in enumerate(
107-
project.get_source_codes_with_harnesses()):
108-
109-
logger.info('handling harness, step 1')
110-
target = os.path.join(out, f'fuzzerLogFile-{idx}.data.yaml')
111-
project.dump_module_logic(target, 'LLVMFuzzerTestOneInput', '',
112-
harness.source_file, dump_output)
113-
target = os.path.join(out, 'full_type_defs.json')
114-
project.dump_type_definition(target, dump_output)
115-
logger.info('handling harness, step 2')
116-
logger.info('Extracting calltree for %s', harness.source_file)
117-
calltree = project.extract_calltree(source_code=harness,
118-
function=entrypoint)
119-
logger.info('handling harness, step 3')
120-
with open(os.path.join(out, f'fuzzerLogFile-{idx}.data'),
121-
'w',
122-
encoding='utf-8') as f:
123-
f.write("Call tree\n")
124-
f.write(calltree)
125-
f.write("====================================")
126-
logger.info('handling harness, step 4')
127-
return project
128-
129-
13060
def analyse_folder(
13161
language: str = '',
13262
directory: str = '',
@@ -145,113 +75,130 @@ def analyse_folder(
14575
source_files.extend(files_to_include)
14676
logger.info('Found %d files to include in analysis', len(source_files))
14777

78+
project: Project = Project([])
79+
80+
# Process for different language
14881
if language == constants.LANGUAGES.C:
149-
project = process_c_project(directory,
150-
entrypoint,
151-
out,
152-
source_files,
153-
module_only,
154-
dump_output=dump_output)
82+
logger.info('Going C route')
83+
logger.info('Loading tree-sitter trees')
84+
if not entrypoint:
85+
entrypoint = 'LLVMFuzzerTestOneInput'
86+
project = frontend_c.load_treesitter_trees(source_files)
87+
if not project.get_source_codes_with_harnesses():
88+
module_only = True
89+
elif language == constants.LANGUAGES.CPP:
90+
logger.info('Going C++ route')
91+
logger.info('Loading tree-sitter trees')
92+
if not entrypoint:
93+
entrypoint = 'LLVMFuzzerTestOneInput'
94+
project = frontend_cpp.load_treesitter_trees(source_files)
95+
elif language == constants.LANGUAGES.GO:
96+
logger.info('Going Go route')
97+
logger.info('Loading tree-sitter trees and create base project')
98+
project = frontend_go.load_treesitter_trees(source_files)
99+
elif language == constants.LANGUAGES.JAVA:
100+
logger.info('Going JVM route')
101+
logger.info('Loading tree-sitter trees and create base project')
102+
if not entrypoint:
103+
entrypoint = 'fuzzerTestOneInput'
104+
project = frontend_jvm.load_treesitter_trees(source_files, entrypoint)
105+
elif language == constants.LANGUAGES.RUST:
106+
logger.info('Going Rust route')
107+
logger.info('Loading tree-sitter trees and create base project')
108+
project = frontend_rust.load_treesitter_trees(source_files)
155109
else:
156-
# Process for different language
157-
if language == constants.LANGUAGES.CPP:
158-
logger.info('Going C++ route')
159-
logger.info('Loading tree-sitter trees')
160-
if not entrypoint:
161-
entrypoint = 'LLVMFuzzerTestOneInput'
162-
project = frontend_cpp.load_treesitter_trees(source_files)
163-
elif language == constants.LANGUAGES.GO:
164-
logger.info('Going Go route')
165-
logger.info('Loading tree-sitter trees and create base project')
166-
project = frontend_go.load_treesitter_trees(source_files)
167-
elif language == constants.LANGUAGES.JAVA:
168-
logger.info('Going JVM route')
169-
logger.info('Loading tree-sitter trees and create base project')
170-
if not entrypoint:
171-
entrypoint = 'fuzzerTestOneInput'
172-
project = frontend_jvm.load_treesitter_trees(
173-
source_files, entrypoint)
174-
elif language == constants.LANGUAGES.RUST:
175-
logger.info('Going Rust route')
176-
logger.info('Loading tree-sitter trees and create base project')
177-
project = frontend_rust.load_treesitter_trees(source_files)
178-
else:
179-
logger.error('Unsupported language: %s', language)
180-
return Project([]), []
181-
182-
pairings = []
183-
textcov_reports = []
184-
if os.environ.get('OUT', ''):
185-
textcovs_path = os.path.join(os.environ.get('OUT', '/out/'),
186-
'textcov_reports')
187-
if os.path.isdir(textcovs_path):
188-
for report_name in os.listdir(textcovs_path):
189-
textcov_reports.append(report_name)
190-
191-
# Perform analysis where there is no harness and only do the module
192-
if not project.get_source_codes_with_harnesses() and module_only:
193-
if not dump_output:
194-
logger.info('Running in-memory analysis')
195-
report = project.get_report('empty')
196-
calltree = 'Call tree\n===================================='
197-
return project, [(report, calltree)]
198-
199-
logger.info('Found no harnesses')
200-
target = os.path.join(out, 'fuzzerLogFile-empty.data.yaml')
201-
project.dump_module_logic(target,
202-
entry_function='',
203-
harness_name='empty',
204-
harness_source='empty-file.cpp',
205-
dump_output=dump_output)
206-
with open(os.path.join(out, 'fuzzerLogFile-empty.data'),
207-
'w',
208-
encoding='utf-8') as f:
209-
f.write("Call tree\n")
210-
f.write("====================================")
110+
logger.error('Unsupported language: %s', language)
111+
return Project([]), []
112+
113+
# Extract textcov pairing
114+
pairings = []
115+
textcov_reports = []
116+
if os.environ.get('OUT', ''):
117+
textcovs_path = os.path.join(os.environ.get('OUT', '/out/'),
118+
'textcov_reports')
119+
if os.path.isdir(textcovs_path):
120+
for report_name in os.listdir(textcovs_path):
121+
textcov_reports.append(report_name)
122+
123+
# Perform analysis where there is no harness and only do the module
124+
if not project.get_source_codes_with_harnesses() and module_only:
125+
if not dump_output:
126+
logger.info('Running in-memory analysis')
127+
report = project.get_report('empty')
128+
calltree = 'Call tree\n===================================='
129+
return project, [(report, calltree)]
130+
131+
logger.info('Found no harnesses')
132+
target = os.path.join(out, 'fuzzerLogFile-empty.data.yaml')
133+
project.dump_module_logic(target,
134+
entry_function='',
135+
harness_name='empty',
136+
harness_source='empty-file.cpp',
137+
dump_output=dump_output)
138+
with open(os.path.join(out, 'fuzzerLogFile-empty.data'),
139+
'w',
140+
encoding='utf-8') as f:
141+
f.write("Call tree\n")
142+
f.write("====================================")
211143

212-
# Process calltree and method data
213-
for harness in project.get_source_codes_with_harnesses():
214-
if language == 'go':
215-
entry_function = harness.get_entry_function_name()
216-
else:
217-
entry_function = entrypoint
144+
target = os.path.join(out, 'full_type_defs.json')
145+
project.dump_type_definition(target, dump_output)
218146

219-
harness_name = harness.source_file.split('/')[-1].split('.')[0]
147+
# TODO Initialise once it is ready
148+
# target = os.path.join(out, 'macro_block_info.json')
149+
# project.dump_macro_block_info(target, dump_output)
220150

221-
# Functions/Methods data
222-
logger.info('Dump methods for %s', harness_name)
223-
target = os.path.join(out,
224-
f'fuzzerLogFile-{harness_name}.data.yaml')
225-
project.dump_module_logic(target,
226-
entry_function=entry_function,
227-
harness_name=harness_name,
228-
harness_source=harness.source_file,
229-
dump_output=dump_output)
151+
# Process calltree and method data
152+
for harness in project.get_source_codes_with_harnesses():
153+
if language == 'go':
154+
entry_function = harness.get_entry_function_name()
155+
else:
156+
entry_function = entrypoint
157+
158+
harness_name = harness.source_file.split('/')[-1].split('.')[0]
159+
160+
# Functions/Methods data
161+
logger.info('Dump methods for %s', harness_name)
162+
target = os.path.join(out, f'fuzzerLogFile-{harness_name}.data.yaml')
163+
project.dump_module_logic(target,
164+
entry_function=entry_function,
165+
harness_name=harness_name,
166+
harness_source=harness.source_file,
167+
dump_output=dump_output)
168+
169+
# Calltree
170+
logger.info('Extracting calltree for %s', harness_name)
171+
calltree = project.extract_calltree(harness.source_file, harness,
172+
entry_function)
173+
logger.info('Calltree extracted')
174+
if dump_output:
175+
target = os.path.join(out, f'fuzzerLogFile-{harness_name}.data')
176+
with open(target, 'w', encoding='utf-8') as f:
177+
f.write(f'Call tree\n{calltree}')
178+
179+
for textcov in textcov_reports:
180+
cov_name = textcov.replace('.covreport', '')
181+
if cov_name == harness_name:
182+
pairings.append({
183+
'executable_path':
184+
f'/out/{cov_name}',
185+
'fuzzer_log_file':
186+
f'fuzzerLogFile-{harness_name}.data'
187+
})
188+
189+
# Type definition
190+
target = os.path.join(out, 'full_type_defs.json')
191+
project.dump_type_definition(target, dump_output)
230192

231-
# Calltree
232-
logger.info('Extracting calltree for %s', harness_name)
233-
calltree = project.extract_calltree(harness.source_file, harness,
234-
entry_function)
235-
logger.info('Calltree extracted')
236-
if dump_output:
237-
target = os.path.join(out,
238-
f'fuzzerLogFile-{harness_name}.data')
239-
with open(target, 'w', encoding='utf-8') as f:
240-
f.write(f'Call tree\n{calltree}')
241193

242-
for textcov in textcov_reports:
243-
cov_name = textcov.replace('.covreport', '')
244-
if cov_name == harness_name:
245-
pairings.append({
246-
'executable_path':
247-
f'/out/{cov_name}',
248-
'fuzzer_log_file':
249-
f'fuzzerLogFile-{harness_name}.data'
250-
})
194+
# TODO Initialise once it is ready
195+
# # Macro block information
196+
# target = os.path.join(out, 'macro_block_info.json')
197+
# project.dump_macro_block_info(target, dump_output)
251198

252-
if pairings:
253-
with open(os.path.join(out, 'exe_to_fuzz_introspector_logs.yaml'),
254-
'w') as f:
255-
f.write(yaml.dump({'pairings': pairings}))
199+
if pairings:
200+
with open(os.path.join(out, 'exe_to_fuzz_introspector_logs.yaml'),
201+
'w') as f:
202+
f.write(yaml.dump({'pairings': pairings}))
256203

257204
return project, None

0 commit comments

Comments
 (0)