Skip to content

Commit f837c6b

Browse files
authored
frontend-c: Extract macro condition blocks for C project (#2170)
* frontend-c: Extract macro condition blocks for C project Signed-off-by: Arthur Chan <[email protected]> * Fix typing Signed-off-by: Arthur Chan <[email protected]> * Fix formatting Signed-off-by: Arthur Chan <[email protected]> * Fix formatting Signed-off-by: Arthur Chan <[email protected]> --------- Signed-off-by: Arthur Chan <[email protected]>
1 parent bcd321b commit f837c6b

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

src/fuzz_introspector/frontends/datatypes.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def __init__(self,
6262
self.LANGUAGE['cpp'])
6363
self.parser = Parser(self.tree_sitter_lang)
6464
self.full_type_defs: list[dict[str, Any]] = []
65+
self.macro_blocks: list[dict[str, Any]] = []
6566

6667
if source_content:
6768
self.source_content = source_content
@@ -137,6 +138,22 @@ def dump_type_definition(self,
137138
f.write(json.dumps(result))
138139
logger.info('Custom type definitions dumping completed.')
139140

141+
def dump_macro_block_info(self,
142+
report_name: str = '',
143+
dump_output: bool = True) -> None:
144+
"""Dumps the macro block information for this project if exists."""
145+
result = []
146+
for source_code in self.source_code_files:
147+
result.extend(source_code.macro_blocks)
148+
149+
if not result or not dump_output:
150+
return
151+
152+
logger.info('Dumping macro blocks information.')
153+
with open(report_name, 'w', encoding='utf-8') as f:
154+
f.write(json.dumps(result))
155+
logger.info('Macro blocks information dumping completed.')
156+
140157
def dump_module_logic(self,
141158
report_name: str = '',
142159
entry_function: str = '',

src/fuzz_introspector/frontends/frontend_c.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
from typing import Any, Optional
1919

20+
from tree_sitter import Node
21+
2022
import os
2123
import logging
2224
import copy
@@ -594,9 +596,14 @@ def language_specific_process(self):
594596

595597
# Load function definitions
596598
self._set_function_defintions()
599+
600+
# Extract type definition
597601
self.extract_types()
598602
self.process_type_defs()
599603

604+
# Extract macro blocks information
605+
self.extract_macros()
606+
600607
def process_type_defs(self):
601608
"""Helper to gather all custom type definitions."""
602609
self.full_type_defs.extend(self.struct_defs)
@@ -831,6 +838,91 @@ def extract_imported_header_files(self):
831838
'"', '').replace('>', '').replace('<', '')
832839
self.includes.add(include_path)
833840

841+
def extract_macros(self):
842+
"""Extracts the macro blocks in the source code"""
843+
# Process #ifdef and #ifndef
844+
macro_query = self.tree_sitter_lang.query('( preproc_ifdef ) @sp')
845+
macro_query_res = macro_query.captures(self.root)
846+
for _, macros in macro_query_res.items():
847+
for macro in macros:
848+
self._process_macro_node(macro, [])
849+
850+
# Process #if
851+
macro_query = self.tree_sitter_lang.query('( preproc_if ) @sp')
852+
macro_query_res = macro_query.captures(self.root)
853+
for _, macros in macro_query_res.items():
854+
for macro in macros:
855+
self._process_macro_node(macro, [])
856+
857+
def _process_macro_node(self, macro: Node, conditions: list[dict[str,
858+
str]]):
859+
"""Recursive function to process macro nodes and extract all #elif
860+
and #else macro sub-branches."""
861+
# if it is the #elif or #else branches, previous condition must be reversed.
862+
if conditions:
863+
if conditions[-1]['type'] == 'ifdef':
864+
conditions[-1]['type'] = 'ifndef'
865+
elif conditions[-1]['type'] == 'ifndef':
866+
conditions[-1]['type'] = 'ifdef'
867+
else:
868+
conditions[-1]['type'] = 'not'
869+
870+
if macro.type == 'preproc_ifdef':
871+
var_name = macro.child_by_field_name('name')
872+
873+
# Skip invalid macro
874+
if not var_name or not var_name.text:
875+
return
876+
877+
if macro and macro.text and macro.text.decode().startswith(
878+
'#ifdef'):
879+
type = 'ifdef'
880+
else:
881+
type = 'ifndef'
882+
conditions.append({
883+
'type': type,
884+
'condition': var_name.text.decode(),
885+
})
886+
elif macro.type in ['preproc_if', 'preproc_elif']:
887+
condition = macro.child_by_field_name('condition')
888+
889+
# Skip invalid macro
890+
if not condition or not condition.text:
891+
return
892+
893+
conditions.append({
894+
'type': 'if',
895+
'condition': condition.text.decode(),
896+
})
897+
898+
# Extract #else #elif branches
899+
alternative = macro.child_by_field_name('alternative')
900+
901+
if alternative:
902+
# Have #elif or #else branches
903+
self.macro_blocks.append({
904+
'conditions': conditions,
905+
'pos': {
906+
'source_file': self.source_file,
907+
'line_start': macro.start_point.row,
908+
'line_end': alternative.start_point.row,
909+
}
910+
})
911+
else:
912+
# No more #elif or #else branches
913+
self.macro_blocks.append({
914+
'conditions': conditions,
915+
'pos': {
916+
'source_file': self.source_file,
917+
'line_start': macro.start_point.row,
918+
'line_end': macro.end_point.row,
919+
}
920+
})
921+
return
922+
923+
# Recursively extract more #else or #elseif branches
924+
self._process_macro_node(alternative, copy.deepcopy(conditions))
925+
834926
def _set_function_defintions(self):
835927
func_def_query_str = '( function_definition ) @fd '
836928
func_def_query = self.tree_sitter_lang.query(func_def_query_str)

0 commit comments

Comments
 (0)