From 86089f9cdcf8868ad3bf1f8638b104f863cb0573 Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Fri, 18 Apr 2025 16:21:49 +0000 Subject: [PATCH 1/6] support custom base address --- ghidriff/__main__.py | 3 ++- ghidriff/ghidra_diff_engine.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py index 654f599..8d7a8a7 100644 --- a/ghidriff/__main__.py +++ b/ghidriff/__main__.py @@ -67,7 +67,8 @@ def main(): use_calling_counts=args.use_calling_counts, bsim=args.bsim, bsim_full=args.bsim_full, - gdts=args.gdt + gdts=args.gdt, + base_address=args.base_address ) d.setup_project(binary_paths, project_path, project_name, symbols_path) diff --git a/ghidriff/ghidra_diff_engine.py b/ghidriff/ghidra_diff_engine.py index 02cb045..590a57f 100644 --- a/ghidriff/ghidra_diff_engine.py +++ b/ghidriff/ghidra_diff_engine.py @@ -74,7 +74,8 @@ def __init__( use_calling_counts: bool = False, bsim: bool = True, bsim_full: bool = False, - gdts: list = []) -> None: + gdts: list = [], + base_address: int = None) -> None: # setup engine logging self.logger = self.setup_logger(engine_log_level) @@ -162,15 +163,26 @@ def __init__( self.bsim_full = bsim_full self.gdts = gdts + self.base_address = base_address self.logger.debug(f'{vars(self)}') - @ staticmethod + @staticmethod def add_ghidra_args_to_parser(parser: argparse.ArgumentParser) -> None: """ Add required Ghidra args to a parser """ + def _parse_ba(input_str: str) -> int: + try: + # Check if the string is hexadecimal + if input_str.lower().startswith("0x") or any(char in "abcdefABCDEF" for char in input_str): + return int(input_str, 16) # Convert from hexadecimal + else: + return int(input_str, 10) # Convert from decimal + except ValueError: + raise ValueError(f"Invalid input string: {input_str}. Ensure it's a valid hex or decimal value.") + group = parser.add_argument_group('Ghidra Project Options') group.add_argument('-p', '--project-location', help='Ghidra Project Path', default='ghidra_projects') group.add_argument('-n', '--project-name', help='Ghidra Project Name', default='ghidriff') @@ -196,6 +208,8 @@ def add_ghidra_args_to_parser(parser: argparse.ArgumentParser) -> None: group.add_argument('--use-calling-counts', help='Add calling/called reference counts', default=False, action=argparse.BooleanOptionalAction) group.add_argument('--gdt', action='append', help='Path to GDT file for analysis', default=[]) + group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba, + help='Set base address from both programs. 0x2000 or 8192') group = parser.add_argument_group('BSIM Options') group.add_argument('--bsim', help='Toggle using BSIM correlation', default=True, @@ -473,6 +487,16 @@ def setup_project( self.logger.info(f'Loaded {program}') + # set base address if provided + img_base = program.getImageBase() + if self.base_address is not None and self.base_address != img_base.offset: + self.logger.info(f'Setting {program} base address: 0x{img_base} to {hex(self.base_address)}') + new_image_base = img_base.getNewAddress(self.base_address) + program.setImageBase(new_image_base, True) + project.save(program) + else: + self.logger.info(f'Image base address: 0x{img_base}') + proj_programs.append(program) # Print of project files @@ -1044,7 +1068,7 @@ def get_funcs_from_addr_set( return funcs - @ abstractmethod + @abstractmethod def find_matches( self, p1: "ghidra.program.model.listing.Program", From 3b3e05f6a7491dd36e8dd3f03114e8c3d7d485fd Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Fri, 18 Apr 2025 20:45:44 +0000 Subject: [PATCH 2/6] bump version and ghidra devcontiner addresss --- .devcontainer/devcontainer.json | 2 +- ghidriff/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 88c0e74..60890ca 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,7 @@ { "name": "ghidriff", // image from https://github.com/clearbluejar/ghidra-python - "image": "ghcr.io/clearbluejar/ghidra-python:11.3.1ghidra3.12python-bookworm", + "image": "ghcr.io/clearbluejar/ghidra-python:11.3.2ghidra3.12python-bookworm", // Configure tool-specific properties. "customizations": { // Configure properties specific to VS Code. diff --git a/ghidriff/__init__.py b/ghidriff/__init__.py index 271d526..a0add2b 100644 --- a/ghidriff/__init__.py +++ b/ghidriff/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.8.0' +__version__ = '0.9.0' __author__ = 'clearbluejar' # Expose API From d1f03611b05eefd6d64600cc572e4e49b0d55883 Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Fri, 18 Apr 2025 21:03:43 +0000 Subject: [PATCH 3/6] add custom program options and tests --- ghidriff/__main__.py | 3 +- ghidriff/ghidra_diff_engine.py | 145 ++++++++++++++++-- tests/test_custom_base_address.py | 210 +++++++++++++++++++++++++++ tests/test_custom_program_options.py | 113 ++++++++++++++ 4 files changed, 460 insertions(+), 11 deletions(-) create mode 100644 tests/test_custom_base_address.py create mode 100644 tests/test_custom_program_options.py diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py index 8d7a8a7..1945a83 100644 --- a/ghidriff/__main__.py +++ b/ghidriff/__main__.py @@ -68,7 +68,8 @@ def main(): bsim=args.bsim, bsim_full=args.bsim_full, gdts=args.gdt, - base_address=args.base_address + base_address=args.base_address, + program_options=args.program_options ) d.setup_project(binary_paths, project_path, project_name, symbols_path) diff --git a/ghidriff/ghidra_diff_engine.py b/ghidriff/ghidra_diff_engine.py index 590a57f..8d5ff48 100644 --- a/ghidriff/ghidra_diff_engine.py +++ b/ghidriff/ghidra_diff_engine.py @@ -75,7 +75,8 @@ def __init__( bsim: bool = True, bsim_full: bool = False, gdts: list = [], - base_address: int = None) -> None: + base_address: int = None, + program_options: dict = None) -> None: # setup engine logging self.logger = self.setup_logger(engine_log_level) @@ -164,6 +165,10 @@ def __init__( self.gdts = gdts self.base_address = base_address + if program_options is not None: + self.program_options = json.loads(Path(program_options).read_text()) + else: + self.program_options = None self.logger.debug(f'{vars(self)}') @@ -183,10 +188,40 @@ def _parse_ba(input_str: str) -> int: except ValueError: raise ValueError(f"Invalid input string: {input_str}. Ensure it's a valid hex or decimal value.") + def _load_program_options(file_path: str) -> int: + + # try: + # Ensure the input is a valid Path object + path = Path(file_path) + + # Check if the file exists and is a valid JSON file + if not path.is_file(): + raise FileNotFoundError(f"The file '{file_path}' does not exist.") + + # Load the JSON content + + data = None + try: + data = json.loads(path.read_text()) + except Exception as ex: + raise argparse.ArgumentTypeError( + f"Json {path.absolute()} could not be loaded as json. Check file. Exception:{ex}") + + # Check for the existence of keys + if not data.get('program_options') or not data['program_options'].get('Analyzers'): + raise argparse.ArgumentTypeError( + f"Missing keys in json: {path.absolute()}. Missing 'program_options' or 'Analyzers' key.") + + return file_path + group = parser.add_argument_group('Ghidra Project Options') group.add_argument('-p', '--project-location', help='Ghidra Project Path', default='ghidra_projects') group.add_argument('-n', '--project-name', help='Ghidra Project Name', default='ghidriff') group.add_argument('-s', '--symbols-path', help='Ghidra local symbol store directory', default='symbols') + group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba, + help='Set base address from both programs. 0x2000 or 8192'), + group.add_argument('--program-options', type=_load_program_options, + help='Path to json file with Program Options (custom analyzer settings)') group = parser.add_argument_group('Engine Options') group.add_argument('--threaded', help='Use threading during import, analysis, and diffing. Recommended', @@ -208,8 +243,6 @@ def _parse_ba(input_str: str) -> int: group.add_argument('--use-calling-counts', help='Add calling/called reference counts', default=False, action=argparse.BooleanOptionalAction) group.add_argument('--gdt', action='append', help='Path to GDT file for analysis', default=[]) - group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba, - help='Set base address from both programs. 0x2000 or 8192') group = parser.add_argument_group('BSIM Options') group.add_argument('--bsim', help='Toggle using BSIM correlation', default=True, @@ -844,7 +877,7 @@ def analyze_program(self, df_or_prog: Union["ghidra.framework.model.DomainFile", force_reload_for_symbols = False if force_reload_for_symbols: - self.set_analysis_option_bool(program, 'PDB Universal', True) + self.set_analysis_option(program, 'PDB Universal', True) self.logger.info('Symbols missing. Re-analysis is required. Setting PDB Universal: True') self.logger.debug(f'pdb loaded: {pdb_attr.isPdbLoaded()} prog analyzed: {pdb_attr.isProgramAnalyzed()}') @@ -854,17 +887,25 @@ def analyze_program(self, df_or_prog: Union["ghidra.framework.model.DomainFile", # handle large binaries more efficiently # see ghidra/issues/4573 (turn off feature Shared Return Calls ) if program and program.getFunctionManager().getFunctionCount() > 1000: - self.logger.warn(f"Turning off 'Shared Return Calls' for {program}") - self.set_analysis_option_bool( - program, 'Shared Return Calls.Assume Contiguous Functions Only', False) + if self.program_options is not None and self.program_options['program_options']['Analyzers'].get('Shared Return Calls.Assume Contiguous Functions Only') is None: + self.logger.warn(f"Turning off 'Shared Return Calls' for {program}") + self.set_analysis_option( + program, 'Shared Return Calls.Assume Contiguous Functions Only', False) - # TODO make this argument optional, or provide custom analyzer config parsing # This really helps with decompilation, was turned off by default in 10.x - self.set_analysis_option_bool(program, 'Decompiler Parameter ID', True) + # Will set by default unless specified by user + if self.program_options is not None and self.program_options['program_options']['Analyzers'].get('Decompiler Parameter ID') is None: + self.set_analysis_option(program, 'Decompiler Parameter ID', True) + + if self.program_options: + analyzer_options = self.program_options['program_options']['Analyzers'] + for k, v in analyzer_options.items(): + self.logger.info(f"Setting prog option:{k} with value:{v}") + self.set_analysis_option(program, k, v) if self.no_symbols: self.logger.warn(f'Disabling symbols for analysis! --no-symbols flag: {self.no_symbols}') - self.set_analysis_option_bool(program, 'PDB Universal', False) + self.set_analysis_option(program, 'PDB Universal', False) self.logger.info(f'Starting Ghidra analysis of {program}...') try: @@ -1000,6 +1041,83 @@ def get_program_options( return options + def set_analysis_option( + self, + prog: "ghidra.program.model.listing.Program", + option_name: str, + value: bool + ) -> None: + """ + Set boolean program analysis options + Inspired by: Ghidra/Features/Base/src/main/java/ghidra/app/script/GhidraScript.java#L1272 + """ + + from ghidra.program.model.listing import Program + + prog_options = prog.getOptions(Program.ANALYSIS_PROPERTIES) + + # prog_options = prog.getOptions(name) + options = {} + + for propName in prog_options.getOptionNames(): + prog_options.getType(propName) + + option_type = prog_options.getType(option_name) + + match str(option_type): + case "INT_TYPE": + self.logger.debug(f'Setting type: INT') + prog_options.setInt(option_name, int(value)) + case "LONG_TYPE": + self.logger.debug(f'Setting type: LONG') + prog_options.setLong(option_name, int(value)) + case "STRING_TYPE": + self.logger.debug(f'Setting type: STRING') + prog_options.setString(option_name, value) + case "DOUBLE_TYPE": + self.logger.debug(f'Setting type: DOUBLE') + prog_options.setDouble(option_name, float(value)) + case "FLOAT_TYPE": + self.logger.debug(f'Setting type: FLOAT') + prog_options.setFloat(option_name, float(value)) + case "BOOLEAN_TYPE": + self.logger.debug(f'Setting type: BOOLEAN') + if isinstance(value, str): + temp_bool = value.lower() + if temp_bool in {"true", "false"}: + prog_options.setBoolean(option_name, temp_bool == "true") + elif isinstance(value, bool): + prog_options.setBoolean(option_name, value) + else: + raise ValueError(f"Failed to setBoolean on {option_name} {option_type}") + + case "ENUM_TYPE": + self.logger.debug(f'Setting type: ENUM') + enum_for_option = prog_options.getEnum(option_name, None) + if enum_for_option is None: + raise ValueError( + f"Attempted to set an Enum option {option_name} without an " + "existing enum value alreday set.") + + from java.lang import Enum + new_enum = None + try: + new_enum = Enum.valueOf(enum_for_option.getClass(), value) + except: + for enumValue in enum_for_option.values(): + if value == enumValue.toString(): + new_enum = enumValue + break + + if new_enum is None: + raise ValueError( + f"Attempted to set an Enum option {option_name} without an " + "existing enum value alreday set.") + + prog_options.setEnum(option_name, new_enum) + + case _: + # do nothing; don't allow user to set these options (doesn't make any sense) + self.logger.warning(f'option {option_type} set not supported, ignoring') + def set_analysis_option_bool( self, prog: "ghidra.program.model.listing.Program", @@ -1015,6 +1133,13 @@ def set_analysis_option_bool( prog_options = prog.getOptions(Program.ANALYSIS_PROPERTIES) + # prog_options = prog.getOptions(name) + options = {} + + for propName in prog_options.getOptionNames(): + options[propName] = prog_options.getValueAsString(propName) + prog_options.getType(propName) + prog_options.setBoolean(option_name, value) def set_proginfo_option_bool( diff --git a/tests/test_custom_base_address.py b/tests/test_custom_base_address.py new file mode 100644 index 0000000..0337ce0 --- /dev/null +++ b/tests/test_custom_base_address.py @@ -0,0 +1,210 @@ +from pathlib import Path +import json +import pytest + +from ghidriff import get_parser, GhidraDiffEngine, VersionTrackingDiff + +SYMBOLS_DIR = 'symbols' +BINS_DIR = 'bins' +PROG_OPTIONS_DIR = 'prog_options' + +BASE_ADDR_HEX = "0x2f000" +BASE_ADDR_DEC = "192512" + + +@pytest.mark.forked +def test_custom_base_addr_hex_afd(shared_datadir: Path): + """ + Tests end to end diff of CVE + runs forked because each jpype jvm can only be initialized 1x + """ + + test_name = 'test_afd_prog_options' + output_path = shared_datadir / test_name + output_path.mkdir(exist_ok=True, parents=True) + symbols_path = shared_datadir / SYMBOLS_DIR + bins_path = shared_datadir / BINS_DIR + prog_options_path = shared_datadir / PROG_OPTIONS_DIR / 'prog_options.json' + ghidra_project_path = output_path / 'ghidra_projects' + ghidra_project_path.mkdir(exist_ok=True, parents=True) + + # setup bins + old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028' + new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415' + + assert old_bin_path.exists() + assert new_bin_path.exists() + + parser = get_parser() + + GhidraDiffEngine.add_ghidra_args_to_parser(parser) + + args = parser.parse_args([ + '-s', + str(symbols_path), + str(old_bin_path.absolute()), + str(new_bin_path.absolute()), + '-p', + str(ghidra_project_path.absolute()), + '--base-address', + BASE_ADDR_HEX + ]) + + engine_log_path = output_path / parser.get_default('log_path') + + binary_paths = args.old + [bin for sublist in args.new for bin in sublist] + + binary_paths = [Path(path) for path in binary_paths] + + if any([not path.exists() for path in binary_paths]): + missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()] + raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}") + + project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}' + + DiffEngine: GhidraDiffEngine = VersionTrackingDiff + + d: GhidraDiffEngine = DiffEngine(args=args, + verbose=True, + threaded=args.threaded, + max_ram_percent=args.max_ram_percent, + print_jvm_flags=args.print_flags, + jvm_args=args.jvm_args, + force_analysis=args.force_analysis, + force_diff=args.force_diff, + verbose_analysis=args.va, + no_symbols=args.no_symbols, + engine_log_path=engine_log_path, + engine_log_level=args.log_level, + engine_file_log_level=args.file_log_level, + base_address=args.base_address + ) + + d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path) + + d.analyze_project() + + pdiff = d.diff_bins(old_bin_path, new_bin_path) + pdiff_json = json.dumps(pdiff) + + d.validate_diff_json(pdiff_json) + + diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff" + + d.dump_pdiff_to_path(diff_name, + pdiff, + output_path, + side_by_side=args.side_by_side, + max_section_funcs=args.max_section_funcs, + md_title=args.md_title) + + assert len(pdiff['functions']['modified']) == 11 + assert len(pdiff['functions']['added']) == 28 + assert len(pdiff['functions']['deleted']) == 0 + + func_name = "AfdNotifyRemoveIoCompletion" + assert any([func_name in func['old']['name'] or func_name in func['new']['name'] + for func in pdiff['functions']['modified']]) is True + + # check to see if minimum address matches set base address + assert (pdiff['old_meta']['Minimum Address'] == '0002f000') + assert (pdiff['new_meta']['Minimum Address'] == '0002f000') + + +@pytest.mark.forked +def test_custom_base_addr_dec_afd(shared_datadir: Path): + """ + Tests end to end diff of CVE + runs forked because each jpype jvm can only be initialized 1x + """ + + test_name = 'test_afd_prog_options' + output_path = shared_datadir / test_name + output_path.mkdir(exist_ok=True, parents=True) + symbols_path = shared_datadir / SYMBOLS_DIR + bins_path = shared_datadir / BINS_DIR + prog_options_path = shared_datadir / PROG_OPTIONS_DIR / 'prog_options.json' + ghidra_project_path = output_path / 'ghidra_projects' + ghidra_project_path.mkdir(exist_ok=True, parents=True) + + # setup bins + old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028' + new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415' + + assert old_bin_path.exists() + assert new_bin_path.exists() + + parser = get_parser() + + GhidraDiffEngine.add_ghidra_args_to_parser(parser) + + args = parser.parse_args([ + '-s', + str(symbols_path), + str(old_bin_path.absolute()), + str(new_bin_path.absolute()), + '-p', + str(ghidra_project_path.absolute()), + '--base-address', + BASE_ADDR_DEC + ]) + + engine_log_path = output_path / parser.get_default('log_path') + + binary_paths = args.old + [bin for sublist in args.new for bin in sublist] + + binary_paths = [Path(path) for path in binary_paths] + + if any([not path.exists() for path in binary_paths]): + missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()] + raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}") + + project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}' + + DiffEngine: GhidraDiffEngine = VersionTrackingDiff + + d: GhidraDiffEngine = DiffEngine(args=args, + verbose=True, + threaded=args.threaded, + max_ram_percent=args.max_ram_percent, + print_jvm_flags=args.print_flags, + jvm_args=args.jvm_args, + force_analysis=args.force_analysis, + force_diff=args.force_diff, + verbose_analysis=args.va, + no_symbols=args.no_symbols, + engine_log_path=engine_log_path, + engine_log_level=args.log_level, + engine_file_log_level=args.file_log_level, + base_address=args.base_address + ) + + d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path) + + d.analyze_project() + + pdiff = d.diff_bins(old_bin_path, new_bin_path) + pdiff_json = json.dumps(pdiff) + + d.validate_diff_json(pdiff_json) + + diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff" + + d.dump_pdiff_to_path(diff_name, + pdiff, + output_path, + side_by_side=args.side_by_side, + max_section_funcs=args.max_section_funcs, + md_title=args.md_title) + + assert len(pdiff['functions']['modified']) == 11 + assert len(pdiff['functions']['added']) == 28 + assert len(pdiff['functions']['deleted']) == 0 + + func_name = "AfdNotifyRemoveIoCompletion" + assert any([func_name in func['old']['name'] or func_name in func['new']['name'] + for func in pdiff['functions']['modified']]) is True + + # check to see if minimum address matches set base address + assert (pdiff['old_meta']['Minimum Address'] == '0002f000') + assert (pdiff['new_meta']['Minimum Address'] == '0002f000') diff --git a/tests/test_custom_program_options.py b/tests/test_custom_program_options.py new file mode 100644 index 0000000..e4a1076 --- /dev/null +++ b/tests/test_custom_program_options.py @@ -0,0 +1,113 @@ +from pathlib import Path +import json +import pytest + +from ghidriff import get_parser, GhidraDiffEngine, VersionTrackingDiff + +SYMBOLS_DIR = 'symbols' +BINS_DIR = 'bins' +PROG_OPTIONS_DIR = 'prog_options' + + +@pytest.mark.forked +def test_custom_program_options_afd_cve_2023_21768(shared_datadir: Path): + """ + Tests end to end diff of CVE + runs forked because each jpype jvm can only be initialized 1x + """ + + test_name = 'test_afd_prog_options' + output_path = shared_datadir / test_name + output_path.mkdir(exist_ok=True, parents=True) + symbols_path = shared_datadir / SYMBOLS_DIR + bins_path = shared_datadir / BINS_DIR + prog_options_path = shared_datadir / PROG_OPTIONS_DIR / 'prog_options.json' + ghidra_project_path = output_path / 'ghidra_projects' + ghidra_project_path.mkdir(exist_ok=True, parents=True) + + # setup bins + old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028' + new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415' + + assert old_bin_path.exists() + assert new_bin_path.exists() + + parser = get_parser() + + GhidraDiffEngine.add_ghidra_args_to_parser(parser) + + args = parser.parse_args([ + '-s', + str(symbols_path), + str(old_bin_path.absolute()), + str(new_bin_path.absolute()), + '-p', + str(ghidra_project_path.absolute()), + '--program-options', + str(prog_options_path.absolute()) + ]) + + engine_log_path = output_path / parser.get_default('log_path') + + binary_paths = args.old + [bin for sublist in args.new for bin in sublist] + + binary_paths = [Path(path) for path in binary_paths] + + if any([not path.exists() for path in binary_paths]): + missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()] + raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}") + + project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}' + + DiffEngine: GhidraDiffEngine = VersionTrackingDiff + + d: GhidraDiffEngine = DiffEngine(args=args, + verbose=True, + threaded=args.threaded, + max_ram_percent=args.max_ram_percent, + print_jvm_flags=args.print_flags, + jvm_args=args.jvm_args, + force_analysis=args.force_analysis, + force_diff=args.force_diff, + verbose_analysis=args.va, + no_symbols=args.no_symbols, + engine_log_path=engine_log_path, + engine_log_level=args.log_level, + engine_file_log_level=args.file_log_level, + program_options=args.program_options + ) + + d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path) + + d.analyze_project() + + pdiff = d.diff_bins(old_bin_path, new_bin_path) + pdiff_json = json.dumps(pdiff) + + d.validate_diff_json(pdiff_json) + + diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff" + + d.dump_pdiff_to_path(diff_name, + pdiff, + output_path, + side_by_side=args.side_by_side, + max_section_funcs=args.max_section_funcs, + md_title=args.md_title) + + assert len(pdiff['functions']['modified']) == 12 + assert len(pdiff['functions']['added']) == 28 + assert len(pdiff['functions']['deleted']) == 0 + + func_name = "AfdNotifyRemoveIoCompletion" + assert any([func_name in func['old']['name'] or func_name in func['new']['name'] + for func in pdiff['functions']['modified']]) is True + + # check to see if no default setting is set + # "ASCII Strings.Force Model Reload": "false", <- normal + # "ASCII Strings.Force Model Reload": "true", <- check this is true + + # print(pdiff['program_options']['Analyzers']["ASCII Strings.Force Model Reload"]) + # print(pdiff['program_options']['Analyzers']) + assert (pdiff['program_options']['afd.sys.x64.10.0.22621.1028']['Analyzers']["ASCII Strings.Force Model Reload"] == 'true') + assert (pdiff['program_options']['afd.sys.x64.10.0.22621.1415']['Analyzers']["ASCII Strings.Force Model Reload"] == 'true') From 6fe5fb449e4a37812e96a1fa480dcc0a9639aa42 Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Fri, 18 Apr 2025 23:10:58 +0000 Subject: [PATCH 4/6] fix test names --- tests/test_custom_base_address.py | 4 ++-- tests/test_custom_program_options.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_custom_base_address.py b/tests/test_custom_base_address.py index 0337ce0..895e6d4 100644 --- a/tests/test_custom_base_address.py +++ b/tests/test_custom_base_address.py @@ -19,7 +19,7 @@ def test_custom_base_addr_hex_afd(shared_datadir: Path): runs forked because each jpype jvm can only be initialized 1x """ - test_name = 'test_afd_prog_options' + test_name = 'test_custom_base_addr_hex_afd' output_path = shared_datadir / test_name output_path.mkdir(exist_ok=True, parents=True) symbols_path = shared_datadir / SYMBOLS_DIR @@ -118,7 +118,7 @@ def test_custom_base_addr_dec_afd(shared_datadir: Path): runs forked because each jpype jvm can only be initialized 1x """ - test_name = 'test_afd_prog_options' + test_name = 'test_custom_base_addr_dec_afd' output_path = shared_datadir / test_name output_path.mkdir(exist_ok=True, parents=True) symbols_path = shared_datadir / SYMBOLS_DIR diff --git a/tests/test_custom_program_options.py b/tests/test_custom_program_options.py index e4a1076..87ed188 100644 --- a/tests/test_custom_program_options.py +++ b/tests/test_custom_program_options.py @@ -10,13 +10,13 @@ @pytest.mark.forked -def test_custom_program_options_afd_cve_2023_21768(shared_datadir: Path): +def test_custom_program_options_afd(shared_datadir: Path): """ Tests end to end diff of CVE runs forked because each jpype jvm can only be initialized 1x """ - test_name = 'test_afd_prog_options' + test_name = 'test_custom_program_options_afd' output_path = shared_datadir / test_name output_path.mkdir(exist_ok=True, parents=True) symbols_path = shared_datadir / SYMBOLS_DIR From 5711306d629d50b952bda0659795d51a8fd3e925 Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Fri, 18 Apr 2025 23:11:33 +0000 Subject: [PATCH 5/6] add gzf ability. fixes #108 --- ghidriff/__main__.py | 7 ++- ghidriff/ghidra_diff_engine.py | 15 +++++- tests/test_gzfs.py | 88 ++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 tests/test_gzfs.py diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py index 1945a83..d4659cf 100644 --- a/ghidriff/__main__.py +++ b/ghidriff/__main__.py @@ -37,6 +37,11 @@ def main(): else: symbols_path = Path(args.symbols_path) + if args.gzfs_path == parser.get_default('gzfs_path'): + gzfs_path = output_path / parser.get_default('gzfs_path') + else: + gzfs_path = Path(args.gzfs_path) + binary_paths = args.old + [bin for sublist in args.new for bin in sublist] binary_paths = [Path(path) for path in binary_paths] @@ -72,7 +77,7 @@ def main(): program_options=args.program_options ) - d.setup_project(binary_paths, project_path, project_name, symbols_path) + d.setup_project(binary_paths, project_path, project_name, symbols_path, gzfs_path) d.analyze_project() diff --git a/ghidriff/ghidra_diff_engine.py b/ghidriff/ghidra_diff_engine.py index 8d5ff48..22729fc 100644 --- a/ghidriff/ghidra_diff_engine.py +++ b/ghidriff/ghidra_diff_engine.py @@ -218,6 +218,7 @@ def _load_program_options(file_path: str) -> int: group.add_argument('-p', '--project-location', help='Ghidra Project Path', default='ghidra_projects') group.add_argument('-n', '--project-name', help='Ghidra Project Name', default='ghidriff') group.add_argument('-s', '--symbols-path', help='Ghidra local symbol store directory', default='symbols') + group.add_argument('-g', '--gzfs-path', help='Location to store GZFs of analyzed binaries', default='gzfs') group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba, help='Set base address from both programs. 0x2000 or 8192'), group.add_argument('--program-options', type=_load_program_options, @@ -455,7 +456,9 @@ def setup_project( project_location: Union[str, Path], project_name: str, symbols_path: Union[str, Path], + gzfs_path: Union[str, Path] = None, symbol_urls: list = None, + ) -> list: """ Setup and verify Ghidra Project @@ -470,6 +473,12 @@ def setup_project( project_location = Path(project_location) / project_name project_location.mkdir(exist_ok=True, parents=True) + + if gzfs_path is not None: + gzfs_path = Path(gzfs_path) + gzfs_path.mkdir(exist_ok=True, parents=True) + self.gzfs_path = gzfs_path + pdb = None self.logger.info(f'Setting Up Ghidra Project...') @@ -922,8 +931,10 @@ def analyze_program(self, df_or_prog: Union["ghidra.framework.model.DomainFile", else: self.logger.info(f"Analysis already complete.. skipping {program}!") finally: - # from java.io import File - # self.project.saveAsPackedFile(program,File(f'/tmp/{program.name}.gzf'), True) + # optionally save GZF + if self.gzfs_path is not None: + from java.io import File + self.project.saveAsPackedFile(program, File((self.gzfs_path / f"{df_or_prog.getName()}.gzf").absolute()), True) self.project.close(program) self.logger.info(f"Analysis for {df_or_prog} complete") diff --git a/tests/test_gzfs.py b/tests/test_gzfs.py new file mode 100644 index 0000000..8bbb999 --- /dev/null +++ b/tests/test_gzfs.py @@ -0,0 +1,88 @@ +from pathlib import Path +import json +import pytest + +from ghidriff import get_parser, GhidraDiffEngine, VersionTrackingDiff + +SYMBOLS_DIR = 'symbols' +BINS_DIR = 'bins' +PROG_OPTIONS_DIR = 'prog_options' + +BASE_ADDR_HEX = "0x2f000" +BASE_ADDR_DEC = "192512" + + +@pytest.mark.forked +def test_gzfs_exist(shared_datadir: Path): + """ + Tests end to end diff of CVE + runs forked because each jpype jvm can only be initialized 1x + """ + + test_name = 'test_gzfs_exist' + output_path = shared_datadir / test_name + output_path.mkdir(exist_ok=True, parents=True) + symbols_path = shared_datadir / SYMBOLS_DIR + bins_path = shared_datadir / BINS_DIR + ghidra_project_path = output_path / 'ghidra_projects' + ghidra_project_path.mkdir(exist_ok=True, parents=True) + + # setup bins + old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028' + new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415' + + assert old_bin_path.exists() + assert new_bin_path.exists() + + parser = get_parser() + + GhidraDiffEngine.add_ghidra_args_to_parser(parser) + + args = parser.parse_args([ + '-s', + str(symbols_path), + str(old_bin_path.absolute()), + str(new_bin_path.absolute()), + '-p', + str(ghidra_project_path.absolute()), + ]) + + engine_log_path = output_path / parser.get_default('log_path') + + binary_paths = args.old + [bin for sublist in args.new for bin in sublist] + + binary_paths = [Path(path) for path in binary_paths] + + if any([not path.exists() for path in binary_paths]): + missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()] + raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}") + + project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}' + + DiffEngine: GhidraDiffEngine = VersionTrackingDiff + + d: GhidraDiffEngine = DiffEngine(args=args, + verbose=True, + threaded=args.threaded, + max_ram_percent=args.max_ram_percent, + print_jvm_flags=args.print_flags, + jvm_args=args.jvm_args, + force_analysis=args.force_analysis, + force_diff=args.force_diff, + verbose_analysis=args.va, + no_symbols=args.no_symbols, + engine_log_path=engine_log_path, + engine_log_level=args.log_level, + engine_file_log_level=args.file_log_level, + base_address=args.base_address + ) + + gzfs_path: Path = output_path / parser.get_default('gzfs_path') + + d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path, gzfs_path) + + d.analyze_project() + + assert (gzfs_path.exists()) + count = len([file for file in gzfs_path.iterdir()]) + assert (count == 2) From d3dfb0f754cd5baadaa7ad7989384bdf2bb771f0 Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Fri, 18 Apr 2025 23:44:17 +0000 Subject: [PATCH 6/6] Update README with new features. --- README.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c15859b..6bb181d 100644 --- a/README.md +++ b/README.md @@ -138,8 +138,12 @@ Each implementation leverages the base class, and implements `find_changes`. ## Usage ```bash -usage: ghidriff [-h] [--engine {SimpleDiff,StructualGraphDiff,VersionTrackingDiff}] [-o OUTPUT_PATH] [--summary SUMMARY] [-p PROJECT_LOCATION] [-n PROJECT_NAME] [-s SYMBOLS_PATH] [--threaded | --no-threaded] [--force-analysis] [--force-diff] [--no-symbols] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] - [--file-log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] [--log-path LOG_PATH] [--va] [--min-func-len MIN_FUNC_LEN] [--use-calling-counts USE_CALLING_COUNTS] [--max-ram-percent MAX_RAM_PERCENT] [--print-flags] [--jvm-args [JVM_ARGS]] [--sxs] [--max-section-funcs MAX_SECTION_FUNCS] +usage: ghidriff [-h] [--engine {SimpleDiff,StructualGraphDiff,VersionTrackingDiff}] [-o OUTPUT_PATH] [--summary SUMMARY] [-p PROJECT_LOCATION] + [-n PROJECT_NAME] [-s SYMBOLS_PATH] [-g GZFS_PATH] [--ba BASE_ADDRESS] [--program-options PROGRAM_OPTIONS] [--threaded | --no-threaded] + [--force-analysis] [--force-diff] [--no-symbols] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] + [--file-log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] [--log-path LOG_PATH] [--va] [--min-func-len MIN_FUNC_LEN] + [--use-calling-counts | --no-use-calling-counts] [--gdt GDT] [--bsim | --no-bsim] [--bsim-full | --no-bsim-full] + [--max-ram-percent MAX_RAM_PERCENT] [--print-flags] [--jvm-args [JVM_ARGS]] [--sxs] [--max-section-funcs MAX_SECTION_FUNCS] [--md-title MD_TITLE] old new [new ...] @@ -166,6 +170,7 @@ There are quite a few options here, and some complexity. Generally you can succe
Show Extended Usage ```bash + Ghidra Project Options: -p PROJECT_LOCATION, --project-location PROJECT_LOCATION Ghidra Project Path (default: ghidra_projects) @@ -173,6 +178,12 @@ Ghidra Project Options: Ghidra Project Name (default: ghidriff) -s SYMBOLS_PATH, --symbols-path SYMBOLS_PATH Ghidra local symbol store directory (default: symbols) + -g GZFS_PATH, --gzfs-path GZFS_PATH + Location to store GZFs of analyzed binaries (default: gzfs) + --ba BASE_ADDRESS, --base-address BASE_ADDRESS + Set base address from both programs. 0x2000 or 8192 (default: None) + --program-options PROGRAM_OPTIONS + Path to json file with Program Options (custom analyzer settings) (default: None) Engine Options: --threaded, --no-threaded @@ -189,8 +200,14 @@ Engine Options: Verbose logging for analysis step. (default: False) --min-func-len MIN_FUNC_LEN Minimum function length to consider for diff (default: 10) - --use-calling-counts USE_CALLING_COUNTS - Add calling/called reference counts (default: True) + --use-calling-counts, --no-use-calling-counts + Add calling/called reference counts (default: False) + --gdt GDT Path to GDT file for analysis (default: []) + +BSIM Options: + --bsim, --no-bsim Toggle using BSIM correlation (default: True) + --bsim-full, --no-bsim-full + Slower but better matching. Use only when needed (default: False) JVM Options: --max-ram-percent MAX_RAM_PERCENT @@ -208,6 +225,58 @@ Markdown Options:
+### Using Custom Analyzer Settings + +If you want to configure specific analyzers for your Ghidra binary analysis, set a custom program_options.json with `--program-options`. + +```bash +ghidriff --prog-options prog_options.json tapisrv.dll.x64.10.0.10240.20708 tapisrv.dll.x64.10.0.10240.20708 +``` + +The `program_options.json` would need to look something like this: + +
+ +```json +{ + "program_options": { + "binary_name": null, + "Analyzers": { + "ASCII Strings": "true", + "ASCII Strings.Create Strings Containing Existing Strings": "true", + "ASCII Strings.Create Strings Containing References": "true", + "ASCII Strings.Force Model Reload": "true", + "ASCII Strings.Minimum String Length": "LEN_5", + "ASCII Strings.Model File": "StringModel.sng", + "ASCII Strings.Require Null Termination for String": "true", + "ASCII Strings.Search Only in Accessible Memory Blocks": "true", + "ASCII Strings.String Start Alignment": "ALIGN_1", + "ASCII Strings.String end alignment": "4", + "Aggressive Instruction Finder": "false", + "Aggressive Instruction Finder.Create Analysis Bookmarks": "true", + "Apply Data Archives": "true", + "Apply Data Archives.Archive Chooser": "[Auto-Detect]", + "Apply Data Archives.Create Analysis Bookmarks": "true", + "Apply Data Archives.GDT User File Archive Path": null, + "Apply Data Archives.User Project Archive Path": null, + "Call Convention ID": "true", + } + } +} +``` + +
+ +The custom settings will then be used for your binary analysis. + +### Setting a Custom Image Base Address (Bootloaders, etc.) + +If you are reverse engineering firmware or other fun binary and want to change the base address for the binary, use the `--base-address` parameter to change the base address. + +```bash +$ ghidriff --base-address 0x80000 STM32F103C-firmware.bin STM32F103Ca-firmware.bin +``` + ## Quick Start Environment Setup 1. [Download](https://github.com/NationalSecurityAgency/ghidra/releases) and [install Ghidra](https://htmlpreview.github.io/?https://github.com/NationalSecurityAgency/ghidra/blob/stable/GhidraDocs/InstallationGuide.html#Install). @@ -227,6 +296,13 @@ export GHIDRA_INSTALL_DIR="/path/to/ghidra/" pip install ghidriff ``` +### UV + +```bash +export GHIDRA_INSTALL_DIR="/path/to/ghidra/" +uvx ghidriff +``` + ## Ghidriff in a Box Don't want to install Ghidra and Java on your host? Try "Ghidriff in a box". It supports multiple-platforms (x64 and arm64).