From 86089f9cdcf8868ad3bf1f8638b104f863cb0573 Mon Sep 17 00:00:00 2001
From: clearbluejar <3752074+clearbluejar@users.noreply.github.com>
Date: Fri, 18 Apr 2025 16:21:49 +0000
Subject: [PATCH 1/6] support custom base address
---
ghidriff/__main__.py | 3 ++-
ghidriff/ghidra_diff_engine.py | 30 +++++++++++++++++++++++++++---
2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py
index 654f599..8d7a8a7 100644
--- a/ghidriff/__main__.py
+++ b/ghidriff/__main__.py
@@ -67,7 +67,8 @@ def main():
use_calling_counts=args.use_calling_counts,
bsim=args.bsim,
bsim_full=args.bsim_full,
- gdts=args.gdt
+ gdts=args.gdt,
+ base_address=args.base_address
)
d.setup_project(binary_paths, project_path, project_name, symbols_path)
diff --git a/ghidriff/ghidra_diff_engine.py b/ghidriff/ghidra_diff_engine.py
index 02cb045..590a57f 100644
--- a/ghidriff/ghidra_diff_engine.py
+++ b/ghidriff/ghidra_diff_engine.py
@@ -74,7 +74,8 @@ def __init__(
use_calling_counts: bool = False,
bsim: bool = True,
bsim_full: bool = False,
- gdts: list = []) -> None:
+ gdts: list = [],
+ base_address: int = None) -> None:
# setup engine logging
self.logger = self.setup_logger(engine_log_level)
@@ -162,15 +163,26 @@ def __init__(
self.bsim_full = bsim_full
self.gdts = gdts
+ self.base_address = base_address
self.logger.debug(f'{vars(self)}')
- @ staticmethod
+ @staticmethod
def add_ghidra_args_to_parser(parser: argparse.ArgumentParser) -> None:
"""
Add required Ghidra args to a parser
"""
+ def _parse_ba(input_str: str) -> int:
+ try:
+ # Check if the string is hexadecimal
+ if input_str.lower().startswith("0x") or any(char in "abcdefABCDEF" for char in input_str):
+ return int(input_str, 16) # Convert from hexadecimal
+ else:
+ return int(input_str, 10) # Convert from decimal
+ except ValueError:
+ raise ValueError(f"Invalid input string: {input_str}. Ensure it's a valid hex or decimal value.")
+
group = parser.add_argument_group('Ghidra Project Options')
group.add_argument('-p', '--project-location', help='Ghidra Project Path', default='ghidra_projects')
group.add_argument('-n', '--project-name', help='Ghidra Project Name', default='ghidriff')
@@ -196,6 +208,8 @@ def add_ghidra_args_to_parser(parser: argparse.ArgumentParser) -> None:
group.add_argument('--use-calling-counts', help='Add calling/called reference counts', default=False,
action=argparse.BooleanOptionalAction)
group.add_argument('--gdt', action='append', help='Path to GDT file for analysis', default=[])
+ group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba,
+ help='Set base address from both programs. 0x2000 or 8192')
group = parser.add_argument_group('BSIM Options')
group.add_argument('--bsim', help='Toggle using BSIM correlation', default=True,
@@ -473,6 +487,16 @@ def setup_project(
self.logger.info(f'Loaded {program}')
+ # set base address if provided
+ img_base = program.getImageBase()
+ if self.base_address is not None and self.base_address != img_base.offset:
+ self.logger.info(f'Setting {program} base address: 0x{img_base} to {hex(self.base_address)}')
+ new_image_base = img_base.getNewAddress(self.base_address)
+ program.setImageBase(new_image_base, True)
+ project.save(program)
+ else:
+ self.logger.info(f'Image base address: 0x{img_base}')
+
proj_programs.append(program)
# Print of project files
@@ -1044,7 +1068,7 @@ def get_funcs_from_addr_set(
return funcs
- @ abstractmethod
+ @abstractmethod
def find_matches(
self,
p1: "ghidra.program.model.listing.Program",
From 3b3e05f6a7491dd36e8dd3f03114e8c3d7d485fd Mon Sep 17 00:00:00 2001
From: clearbluejar <3752074+clearbluejar@users.noreply.github.com>
Date: Fri, 18 Apr 2025 20:45:44 +0000
Subject: [PATCH 2/6] bump version and ghidra devcontiner addresss
---
.devcontainer/devcontainer.json | 2 +-
ghidriff/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 88c0e74..60890ca 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -3,7 +3,7 @@
{
"name": "ghidriff",
// image from https://github.com/clearbluejar/ghidra-python
- "image": "ghcr.io/clearbluejar/ghidra-python:11.3.1ghidra3.12python-bookworm",
+ "image": "ghcr.io/clearbluejar/ghidra-python:11.3.2ghidra3.12python-bookworm",
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
diff --git a/ghidriff/__init__.py b/ghidriff/__init__.py
index 271d526..a0add2b 100644
--- a/ghidriff/__init__.py
+++ b/ghidriff/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.8.0'
+__version__ = '0.9.0'
__author__ = 'clearbluejar'
# Expose API
From d1f03611b05eefd6d64600cc572e4e49b0d55883 Mon Sep 17 00:00:00 2001
From: clearbluejar <3752074+clearbluejar@users.noreply.github.com>
Date: Fri, 18 Apr 2025 21:03:43 +0000
Subject: [PATCH 3/6] add custom program options and tests
---
ghidriff/__main__.py | 3 +-
ghidriff/ghidra_diff_engine.py | 145 ++++++++++++++++--
tests/test_custom_base_address.py | 210 +++++++++++++++++++++++++++
tests/test_custom_program_options.py | 113 ++++++++++++++
4 files changed, 460 insertions(+), 11 deletions(-)
create mode 100644 tests/test_custom_base_address.py
create mode 100644 tests/test_custom_program_options.py
diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py
index 8d7a8a7..1945a83 100644
--- a/ghidriff/__main__.py
+++ b/ghidriff/__main__.py
@@ -68,7 +68,8 @@ def main():
bsim=args.bsim,
bsim_full=args.bsim_full,
gdts=args.gdt,
- base_address=args.base_address
+ base_address=args.base_address,
+ program_options=args.program_options
)
d.setup_project(binary_paths, project_path, project_name, symbols_path)
diff --git a/ghidriff/ghidra_diff_engine.py b/ghidriff/ghidra_diff_engine.py
index 590a57f..8d5ff48 100644
--- a/ghidriff/ghidra_diff_engine.py
+++ b/ghidriff/ghidra_diff_engine.py
@@ -75,7 +75,8 @@ def __init__(
bsim: bool = True,
bsim_full: bool = False,
gdts: list = [],
- base_address: int = None) -> None:
+ base_address: int = None,
+ program_options: dict = None) -> None:
# setup engine logging
self.logger = self.setup_logger(engine_log_level)
@@ -164,6 +165,10 @@ def __init__(
self.gdts = gdts
self.base_address = base_address
+ if program_options is not None:
+ self.program_options = json.loads(Path(program_options).read_text())
+ else:
+ self.program_options = None
self.logger.debug(f'{vars(self)}')
@@ -183,10 +188,40 @@ def _parse_ba(input_str: str) -> int:
except ValueError:
raise ValueError(f"Invalid input string: {input_str}. Ensure it's a valid hex or decimal value.")
+ def _load_program_options(file_path: str) -> int:
+
+ # try:
+ # Ensure the input is a valid Path object
+ path = Path(file_path)
+
+ # Check if the file exists and is a valid JSON file
+ if not path.is_file():
+ raise FileNotFoundError(f"The file '{file_path}' does not exist.")
+
+ # Load the JSON content
+
+ data = None
+ try:
+ data = json.loads(path.read_text())
+ except Exception as ex:
+ raise argparse.ArgumentTypeError(
+ f"Json {path.absolute()} could not be loaded as json. Check file. Exception:{ex}")
+
+ # Check for the existence of keys
+ if not data.get('program_options') or not data['program_options'].get('Analyzers'):
+ raise argparse.ArgumentTypeError(
+ f"Missing keys in json: {path.absolute()}. Missing 'program_options' or 'Analyzers' key.")
+
+ return file_path
+
group = parser.add_argument_group('Ghidra Project Options')
group.add_argument('-p', '--project-location', help='Ghidra Project Path', default='ghidra_projects')
group.add_argument('-n', '--project-name', help='Ghidra Project Name', default='ghidriff')
group.add_argument('-s', '--symbols-path', help='Ghidra local symbol store directory', default='symbols')
+ group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba,
+ help='Set base address from both programs. 0x2000 or 8192'),
+ group.add_argument('--program-options', type=_load_program_options,
+ help='Path to json file with Program Options (custom analyzer settings)')
group = parser.add_argument_group('Engine Options')
group.add_argument('--threaded', help='Use threading during import, analysis, and diffing. Recommended',
@@ -208,8 +243,6 @@ def _parse_ba(input_str: str) -> int:
group.add_argument('--use-calling-counts', help='Add calling/called reference counts', default=False,
action=argparse.BooleanOptionalAction)
group.add_argument('--gdt', action='append', help='Path to GDT file for analysis', default=[])
- group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba,
- help='Set base address from both programs. 0x2000 or 8192')
group = parser.add_argument_group('BSIM Options')
group.add_argument('--bsim', help='Toggle using BSIM correlation', default=True,
@@ -844,7 +877,7 @@ def analyze_program(self, df_or_prog: Union["ghidra.framework.model.DomainFile",
force_reload_for_symbols = False
if force_reload_for_symbols:
- self.set_analysis_option_bool(program, 'PDB Universal', True)
+ self.set_analysis_option(program, 'PDB Universal', True)
self.logger.info('Symbols missing. Re-analysis is required. Setting PDB Universal: True')
self.logger.debug(f'pdb loaded: {pdb_attr.isPdbLoaded()} prog analyzed: {pdb_attr.isProgramAnalyzed()}')
@@ -854,17 +887,25 @@ def analyze_program(self, df_or_prog: Union["ghidra.framework.model.DomainFile",
# handle large binaries more efficiently
# see ghidra/issues/4573 (turn off feature Shared Return Calls )
if program and program.getFunctionManager().getFunctionCount() > 1000:
- self.logger.warn(f"Turning off 'Shared Return Calls' for {program}")
- self.set_analysis_option_bool(
- program, 'Shared Return Calls.Assume Contiguous Functions Only', False)
+ if self.program_options is not None and self.program_options['program_options']['Analyzers'].get('Shared Return Calls.Assume Contiguous Functions Only') is None:
+ self.logger.warn(f"Turning off 'Shared Return Calls' for {program}")
+ self.set_analysis_option(
+ program, 'Shared Return Calls.Assume Contiguous Functions Only', False)
- # TODO make this argument optional, or provide custom analyzer config parsing
# This really helps with decompilation, was turned off by default in 10.x
- self.set_analysis_option_bool(program, 'Decompiler Parameter ID', True)
+ # Will set by default unless specified by user
+ if self.program_options is not None and self.program_options['program_options']['Analyzers'].get('Decompiler Parameter ID') is None:
+ self.set_analysis_option(program, 'Decompiler Parameter ID', True)
+
+ if self.program_options:
+ analyzer_options = self.program_options['program_options']['Analyzers']
+ for k, v in analyzer_options.items():
+ self.logger.info(f"Setting prog option:{k} with value:{v}")
+ self.set_analysis_option(program, k, v)
if self.no_symbols:
self.logger.warn(f'Disabling symbols for analysis! --no-symbols flag: {self.no_symbols}')
- self.set_analysis_option_bool(program, 'PDB Universal', False)
+ self.set_analysis_option(program, 'PDB Universal', False)
self.logger.info(f'Starting Ghidra analysis of {program}...')
try:
@@ -1000,6 +1041,83 @@ def get_program_options(
return options
+ def set_analysis_option(
+ self,
+ prog: "ghidra.program.model.listing.Program",
+ option_name: str,
+ value: bool
+ ) -> None:
+ """
+ Set boolean program analysis options
+ Inspired by: Ghidra/Features/Base/src/main/java/ghidra/app/script/GhidraScript.java#L1272
+ """
+
+ from ghidra.program.model.listing import Program
+
+ prog_options = prog.getOptions(Program.ANALYSIS_PROPERTIES)
+
+ # prog_options = prog.getOptions(name)
+ options = {}
+
+ for propName in prog_options.getOptionNames():
+ prog_options.getType(propName)
+
+ option_type = prog_options.getType(option_name)
+
+ match str(option_type):
+ case "INT_TYPE":
+ self.logger.debug(f'Setting type: INT')
+ prog_options.setInt(option_name, int(value))
+ case "LONG_TYPE":
+ self.logger.debug(f'Setting type: LONG')
+ prog_options.setLong(option_name, int(value))
+ case "STRING_TYPE":
+ self.logger.debug(f'Setting type: STRING')
+ prog_options.setString(option_name, value)
+ case "DOUBLE_TYPE":
+ self.logger.debug(f'Setting type: DOUBLE')
+ prog_options.setDouble(option_name, float(value))
+ case "FLOAT_TYPE":
+ self.logger.debug(f'Setting type: FLOAT')
+ prog_options.setFloat(option_name, float(value))
+ case "BOOLEAN_TYPE":
+ self.logger.debug(f'Setting type: BOOLEAN')
+ if isinstance(value, str):
+ temp_bool = value.lower()
+ if temp_bool in {"true", "false"}:
+ prog_options.setBoolean(option_name, temp_bool == "true")
+ elif isinstance(value, bool):
+ prog_options.setBoolean(option_name, value)
+ else:
+ raise ValueError(f"Failed to setBoolean on {option_name} {option_type}")
+
+ case "ENUM_TYPE":
+ self.logger.debug(f'Setting type: ENUM')
+ enum_for_option = prog_options.getEnum(option_name, None)
+ if enum_for_option is None:
+ raise ValueError(
+ f"Attempted to set an Enum option {option_name} without an " + "existing enum value alreday set.")
+
+ from java.lang import Enum
+ new_enum = None
+ try:
+ new_enum = Enum.valueOf(enum_for_option.getClass(), value)
+ except:
+ for enumValue in enum_for_option.values():
+ if value == enumValue.toString():
+ new_enum = enumValue
+ break
+
+ if new_enum is None:
+ raise ValueError(
+ f"Attempted to set an Enum option {option_name} without an " + "existing enum value alreday set.")
+
+ prog_options.setEnum(option_name, new_enum)
+
+ case _:
+ # do nothing; don't allow user to set these options (doesn't make any sense)
+ self.logger.warning(f'option {option_type} set not supported, ignoring')
+
def set_analysis_option_bool(
self,
prog: "ghidra.program.model.listing.Program",
@@ -1015,6 +1133,13 @@ def set_analysis_option_bool(
prog_options = prog.getOptions(Program.ANALYSIS_PROPERTIES)
+ # prog_options = prog.getOptions(name)
+ options = {}
+
+ for propName in prog_options.getOptionNames():
+ options[propName] = prog_options.getValueAsString(propName)
+ prog_options.getType(propName)
+
prog_options.setBoolean(option_name, value)
def set_proginfo_option_bool(
diff --git a/tests/test_custom_base_address.py b/tests/test_custom_base_address.py
new file mode 100644
index 0000000..0337ce0
--- /dev/null
+++ b/tests/test_custom_base_address.py
@@ -0,0 +1,210 @@
+from pathlib import Path
+import json
+import pytest
+
+from ghidriff import get_parser, GhidraDiffEngine, VersionTrackingDiff
+
+SYMBOLS_DIR = 'symbols'
+BINS_DIR = 'bins'
+PROG_OPTIONS_DIR = 'prog_options'
+
+BASE_ADDR_HEX = "0x2f000"
+BASE_ADDR_DEC = "192512"
+
+
+@pytest.mark.forked
+def test_custom_base_addr_hex_afd(shared_datadir: Path):
+ """
+ Tests end to end diff of CVE
+ runs forked because each jpype jvm can only be initialized 1x
+ """
+
+ test_name = 'test_afd_prog_options'
+ output_path = shared_datadir / test_name
+ output_path.mkdir(exist_ok=True, parents=True)
+ symbols_path = shared_datadir / SYMBOLS_DIR
+ bins_path = shared_datadir / BINS_DIR
+ prog_options_path = shared_datadir / PROG_OPTIONS_DIR / 'prog_options.json'
+ ghidra_project_path = output_path / 'ghidra_projects'
+ ghidra_project_path.mkdir(exist_ok=True, parents=True)
+
+ # setup bins
+ old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028'
+ new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415'
+
+ assert old_bin_path.exists()
+ assert new_bin_path.exists()
+
+ parser = get_parser()
+
+ GhidraDiffEngine.add_ghidra_args_to_parser(parser)
+
+ args = parser.parse_args([
+ '-s',
+ str(symbols_path),
+ str(old_bin_path.absolute()),
+ str(new_bin_path.absolute()),
+ '-p',
+ str(ghidra_project_path.absolute()),
+ '--base-address',
+ BASE_ADDR_HEX
+ ])
+
+ engine_log_path = output_path / parser.get_default('log_path')
+
+ binary_paths = args.old + [bin for sublist in args.new for bin in sublist]
+
+ binary_paths = [Path(path) for path in binary_paths]
+
+ if any([not path.exists() for path in binary_paths]):
+ missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()]
+ raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}")
+
+ project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}'
+
+ DiffEngine: GhidraDiffEngine = VersionTrackingDiff
+
+ d: GhidraDiffEngine = DiffEngine(args=args,
+ verbose=True,
+ threaded=args.threaded,
+ max_ram_percent=args.max_ram_percent,
+ print_jvm_flags=args.print_flags,
+ jvm_args=args.jvm_args,
+ force_analysis=args.force_analysis,
+ force_diff=args.force_diff,
+ verbose_analysis=args.va,
+ no_symbols=args.no_symbols,
+ engine_log_path=engine_log_path,
+ engine_log_level=args.log_level,
+ engine_file_log_level=args.file_log_level,
+ base_address=args.base_address
+ )
+
+ d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path)
+
+ d.analyze_project()
+
+ pdiff = d.diff_bins(old_bin_path, new_bin_path)
+ pdiff_json = json.dumps(pdiff)
+
+ d.validate_diff_json(pdiff_json)
+
+ diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff"
+
+ d.dump_pdiff_to_path(diff_name,
+ pdiff,
+ output_path,
+ side_by_side=args.side_by_side,
+ max_section_funcs=args.max_section_funcs,
+ md_title=args.md_title)
+
+ assert len(pdiff['functions']['modified']) == 11
+ assert len(pdiff['functions']['added']) == 28
+ assert len(pdiff['functions']['deleted']) == 0
+
+ func_name = "AfdNotifyRemoveIoCompletion"
+ assert any([func_name in func['old']['name'] or func_name in func['new']['name']
+ for func in pdiff['functions']['modified']]) is True
+
+ # check to see if minimum address matches set base address
+ assert (pdiff['old_meta']['Minimum Address'] == '0002f000')
+ assert (pdiff['new_meta']['Minimum Address'] == '0002f000')
+
+
+@pytest.mark.forked
+def test_custom_base_addr_dec_afd(shared_datadir: Path):
+ """
+ Tests end to end diff of CVE
+ runs forked because each jpype jvm can only be initialized 1x
+ """
+
+ test_name = 'test_afd_prog_options'
+ output_path = shared_datadir / test_name
+ output_path.mkdir(exist_ok=True, parents=True)
+ symbols_path = shared_datadir / SYMBOLS_DIR
+ bins_path = shared_datadir / BINS_DIR
+ prog_options_path = shared_datadir / PROG_OPTIONS_DIR / 'prog_options.json'
+ ghidra_project_path = output_path / 'ghidra_projects'
+ ghidra_project_path.mkdir(exist_ok=True, parents=True)
+
+ # setup bins
+ old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028'
+ new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415'
+
+ assert old_bin_path.exists()
+ assert new_bin_path.exists()
+
+ parser = get_parser()
+
+ GhidraDiffEngine.add_ghidra_args_to_parser(parser)
+
+ args = parser.parse_args([
+ '-s',
+ str(symbols_path),
+ str(old_bin_path.absolute()),
+ str(new_bin_path.absolute()),
+ '-p',
+ str(ghidra_project_path.absolute()),
+ '--base-address',
+ BASE_ADDR_DEC
+ ])
+
+ engine_log_path = output_path / parser.get_default('log_path')
+
+ binary_paths = args.old + [bin for sublist in args.new for bin in sublist]
+
+ binary_paths = [Path(path) for path in binary_paths]
+
+ if any([not path.exists() for path in binary_paths]):
+ missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()]
+ raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}")
+
+ project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}'
+
+ DiffEngine: GhidraDiffEngine = VersionTrackingDiff
+
+ d: GhidraDiffEngine = DiffEngine(args=args,
+ verbose=True,
+ threaded=args.threaded,
+ max_ram_percent=args.max_ram_percent,
+ print_jvm_flags=args.print_flags,
+ jvm_args=args.jvm_args,
+ force_analysis=args.force_analysis,
+ force_diff=args.force_diff,
+ verbose_analysis=args.va,
+ no_symbols=args.no_symbols,
+ engine_log_path=engine_log_path,
+ engine_log_level=args.log_level,
+ engine_file_log_level=args.file_log_level,
+ base_address=args.base_address
+ )
+
+ d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path)
+
+ d.analyze_project()
+
+ pdiff = d.diff_bins(old_bin_path, new_bin_path)
+ pdiff_json = json.dumps(pdiff)
+
+ d.validate_diff_json(pdiff_json)
+
+ diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff"
+
+ d.dump_pdiff_to_path(diff_name,
+ pdiff,
+ output_path,
+ side_by_side=args.side_by_side,
+ max_section_funcs=args.max_section_funcs,
+ md_title=args.md_title)
+
+ assert len(pdiff['functions']['modified']) == 11
+ assert len(pdiff['functions']['added']) == 28
+ assert len(pdiff['functions']['deleted']) == 0
+
+ func_name = "AfdNotifyRemoveIoCompletion"
+ assert any([func_name in func['old']['name'] or func_name in func['new']['name']
+ for func in pdiff['functions']['modified']]) is True
+
+ # check to see if minimum address matches set base address
+ assert (pdiff['old_meta']['Minimum Address'] == '0002f000')
+ assert (pdiff['new_meta']['Minimum Address'] == '0002f000')
diff --git a/tests/test_custom_program_options.py b/tests/test_custom_program_options.py
new file mode 100644
index 0000000..e4a1076
--- /dev/null
+++ b/tests/test_custom_program_options.py
@@ -0,0 +1,113 @@
+from pathlib import Path
+import json
+import pytest
+
+from ghidriff import get_parser, GhidraDiffEngine, VersionTrackingDiff
+
+SYMBOLS_DIR = 'symbols'
+BINS_DIR = 'bins'
+PROG_OPTIONS_DIR = 'prog_options'
+
+
+@pytest.mark.forked
+def test_custom_program_options_afd_cve_2023_21768(shared_datadir: Path):
+ """
+ Tests end to end diff of CVE
+ runs forked because each jpype jvm can only be initialized 1x
+ """
+
+ test_name = 'test_afd_prog_options'
+ output_path = shared_datadir / test_name
+ output_path.mkdir(exist_ok=True, parents=True)
+ symbols_path = shared_datadir / SYMBOLS_DIR
+ bins_path = shared_datadir / BINS_DIR
+ prog_options_path = shared_datadir / PROG_OPTIONS_DIR / 'prog_options.json'
+ ghidra_project_path = output_path / 'ghidra_projects'
+ ghidra_project_path.mkdir(exist_ok=True, parents=True)
+
+ # setup bins
+ old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028'
+ new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415'
+
+ assert old_bin_path.exists()
+ assert new_bin_path.exists()
+
+ parser = get_parser()
+
+ GhidraDiffEngine.add_ghidra_args_to_parser(parser)
+
+ args = parser.parse_args([
+ '-s',
+ str(symbols_path),
+ str(old_bin_path.absolute()),
+ str(new_bin_path.absolute()),
+ '-p',
+ str(ghidra_project_path.absolute()),
+ '--program-options',
+ str(prog_options_path.absolute())
+ ])
+
+ engine_log_path = output_path / parser.get_default('log_path')
+
+ binary_paths = args.old + [bin for sublist in args.new for bin in sublist]
+
+ binary_paths = [Path(path) for path in binary_paths]
+
+ if any([not path.exists() for path in binary_paths]):
+ missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()]
+ raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}")
+
+ project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}'
+
+ DiffEngine: GhidraDiffEngine = VersionTrackingDiff
+
+ d: GhidraDiffEngine = DiffEngine(args=args,
+ verbose=True,
+ threaded=args.threaded,
+ max_ram_percent=args.max_ram_percent,
+ print_jvm_flags=args.print_flags,
+ jvm_args=args.jvm_args,
+ force_analysis=args.force_analysis,
+ force_diff=args.force_diff,
+ verbose_analysis=args.va,
+ no_symbols=args.no_symbols,
+ engine_log_path=engine_log_path,
+ engine_log_level=args.log_level,
+ engine_file_log_level=args.file_log_level,
+ program_options=args.program_options
+ )
+
+ d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path)
+
+ d.analyze_project()
+
+ pdiff = d.diff_bins(old_bin_path, new_bin_path)
+ pdiff_json = json.dumps(pdiff)
+
+ d.validate_diff_json(pdiff_json)
+
+ diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff"
+
+ d.dump_pdiff_to_path(diff_name,
+ pdiff,
+ output_path,
+ side_by_side=args.side_by_side,
+ max_section_funcs=args.max_section_funcs,
+ md_title=args.md_title)
+
+ assert len(pdiff['functions']['modified']) == 12
+ assert len(pdiff['functions']['added']) == 28
+ assert len(pdiff['functions']['deleted']) == 0
+
+ func_name = "AfdNotifyRemoveIoCompletion"
+ assert any([func_name in func['old']['name'] or func_name in func['new']['name']
+ for func in pdiff['functions']['modified']]) is True
+
+ # check to see if no default setting is set
+ # "ASCII Strings.Force Model Reload": "false", <- normal
+ # "ASCII Strings.Force Model Reload": "true", <- check this is true
+
+ # print(pdiff['program_options']['Analyzers']["ASCII Strings.Force Model Reload"])
+ # print(pdiff['program_options']['Analyzers'])
+ assert (pdiff['program_options']['afd.sys.x64.10.0.22621.1028']['Analyzers']["ASCII Strings.Force Model Reload"] == 'true')
+ assert (pdiff['program_options']['afd.sys.x64.10.0.22621.1415']['Analyzers']["ASCII Strings.Force Model Reload"] == 'true')
From 6fe5fb449e4a37812e96a1fa480dcc0a9639aa42 Mon Sep 17 00:00:00 2001
From: clearbluejar <3752074+clearbluejar@users.noreply.github.com>
Date: Fri, 18 Apr 2025 23:10:58 +0000
Subject: [PATCH 4/6] fix test names
---
tests/test_custom_base_address.py | 4 ++--
tests/test_custom_program_options.py | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tests/test_custom_base_address.py b/tests/test_custom_base_address.py
index 0337ce0..895e6d4 100644
--- a/tests/test_custom_base_address.py
+++ b/tests/test_custom_base_address.py
@@ -19,7 +19,7 @@ def test_custom_base_addr_hex_afd(shared_datadir: Path):
runs forked because each jpype jvm can only be initialized 1x
"""
- test_name = 'test_afd_prog_options'
+ test_name = 'test_custom_base_addr_hex_afd'
output_path = shared_datadir / test_name
output_path.mkdir(exist_ok=True, parents=True)
symbols_path = shared_datadir / SYMBOLS_DIR
@@ -118,7 +118,7 @@ def test_custom_base_addr_dec_afd(shared_datadir: Path):
runs forked because each jpype jvm can only be initialized 1x
"""
- test_name = 'test_afd_prog_options'
+ test_name = 'test_custom_base_addr_dec_afd'
output_path = shared_datadir / test_name
output_path.mkdir(exist_ok=True, parents=True)
symbols_path = shared_datadir / SYMBOLS_DIR
diff --git a/tests/test_custom_program_options.py b/tests/test_custom_program_options.py
index e4a1076..87ed188 100644
--- a/tests/test_custom_program_options.py
+++ b/tests/test_custom_program_options.py
@@ -10,13 +10,13 @@
@pytest.mark.forked
-def test_custom_program_options_afd_cve_2023_21768(shared_datadir: Path):
+def test_custom_program_options_afd(shared_datadir: Path):
"""
Tests end to end diff of CVE
runs forked because each jpype jvm can only be initialized 1x
"""
- test_name = 'test_afd_prog_options'
+ test_name = 'test_custom_program_options_afd'
output_path = shared_datadir / test_name
output_path.mkdir(exist_ok=True, parents=True)
symbols_path = shared_datadir / SYMBOLS_DIR
From 5711306d629d50b952bda0659795d51a8fd3e925 Mon Sep 17 00:00:00 2001
From: clearbluejar <3752074+clearbluejar@users.noreply.github.com>
Date: Fri, 18 Apr 2025 23:11:33 +0000
Subject: [PATCH 5/6] add gzf ability. fixes #108
---
ghidriff/__main__.py | 7 ++-
ghidriff/ghidra_diff_engine.py | 15 +++++-
tests/test_gzfs.py | 88 ++++++++++++++++++++++++++++++++++
3 files changed, 107 insertions(+), 3 deletions(-)
create mode 100644 tests/test_gzfs.py
diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py
index 1945a83..d4659cf 100644
--- a/ghidriff/__main__.py
+++ b/ghidriff/__main__.py
@@ -37,6 +37,11 @@ def main():
else:
symbols_path = Path(args.symbols_path)
+ if args.gzfs_path == parser.get_default('gzfs_path'):
+ gzfs_path = output_path / parser.get_default('gzfs_path')
+ else:
+ gzfs_path = Path(args.gzfs_path)
+
binary_paths = args.old + [bin for sublist in args.new for bin in sublist]
binary_paths = [Path(path) for path in binary_paths]
@@ -72,7 +77,7 @@ def main():
program_options=args.program_options
)
- d.setup_project(binary_paths, project_path, project_name, symbols_path)
+ d.setup_project(binary_paths, project_path, project_name, symbols_path, gzfs_path)
d.analyze_project()
diff --git a/ghidriff/ghidra_diff_engine.py b/ghidriff/ghidra_diff_engine.py
index 8d5ff48..22729fc 100644
--- a/ghidriff/ghidra_diff_engine.py
+++ b/ghidriff/ghidra_diff_engine.py
@@ -218,6 +218,7 @@ def _load_program_options(file_path: str) -> int:
group.add_argument('-p', '--project-location', help='Ghidra Project Path', default='ghidra_projects')
group.add_argument('-n', '--project-name', help='Ghidra Project Name', default='ghidriff')
group.add_argument('-s', '--symbols-path', help='Ghidra local symbol store directory', default='symbols')
+ group.add_argument('-g', '--gzfs-path', help='Location to store GZFs of analyzed binaries', default='gzfs')
group.add_argument('--ba', '--base-address', dest='base_address', type=_parse_ba,
help='Set base address from both programs. 0x2000 or 8192'),
group.add_argument('--program-options', type=_load_program_options,
@@ -455,7 +456,9 @@ def setup_project(
project_location: Union[str, Path],
project_name: str,
symbols_path: Union[str, Path],
+ gzfs_path: Union[str, Path] = None,
symbol_urls: list = None,
+
) -> list:
"""
Setup and verify Ghidra Project
@@ -470,6 +473,12 @@ def setup_project(
project_location = Path(project_location) / project_name
project_location.mkdir(exist_ok=True, parents=True)
+
+ if gzfs_path is not None:
+ gzfs_path = Path(gzfs_path)
+ gzfs_path.mkdir(exist_ok=True, parents=True)
+ self.gzfs_path = gzfs_path
+
pdb = None
self.logger.info(f'Setting Up Ghidra Project...')
@@ -922,8 +931,10 @@ def analyze_program(self, df_or_prog: Union["ghidra.framework.model.DomainFile",
else:
self.logger.info(f"Analysis already complete.. skipping {program}!")
finally:
- # from java.io import File
- # self.project.saveAsPackedFile(program,File(f'/tmp/{program.name}.gzf'), True)
+ # optionally save GZF
+ if self.gzfs_path is not None:
+ from java.io import File
+ self.project.saveAsPackedFile(program, File((self.gzfs_path / f"{df_or_prog.getName()}.gzf").absolute()), True)
self.project.close(program)
self.logger.info(f"Analysis for {df_or_prog} complete")
diff --git a/tests/test_gzfs.py b/tests/test_gzfs.py
new file mode 100644
index 0000000..8bbb999
--- /dev/null
+++ b/tests/test_gzfs.py
@@ -0,0 +1,88 @@
+from pathlib import Path
+import json
+import pytest
+
+from ghidriff import get_parser, GhidraDiffEngine, VersionTrackingDiff
+
+SYMBOLS_DIR = 'symbols'
+BINS_DIR = 'bins'
+PROG_OPTIONS_DIR = 'prog_options'
+
+BASE_ADDR_HEX = "0x2f000"
+BASE_ADDR_DEC = "192512"
+
+
+@pytest.mark.forked
+def test_gzfs_exist(shared_datadir: Path):
+ """
+ Tests end to end diff of CVE
+ runs forked because each jpype jvm can only be initialized 1x
+ """
+
+ test_name = 'test_gzfs_exist'
+ output_path = shared_datadir / test_name
+ output_path.mkdir(exist_ok=True, parents=True)
+ symbols_path = shared_datadir / SYMBOLS_DIR
+ bins_path = shared_datadir / BINS_DIR
+ ghidra_project_path = output_path / 'ghidra_projects'
+ ghidra_project_path.mkdir(exist_ok=True, parents=True)
+
+ # setup bins
+ old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028'
+ new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415'
+
+ assert old_bin_path.exists()
+ assert new_bin_path.exists()
+
+ parser = get_parser()
+
+ GhidraDiffEngine.add_ghidra_args_to_parser(parser)
+
+ args = parser.parse_args([
+ '-s',
+ str(symbols_path),
+ str(old_bin_path.absolute()),
+ str(new_bin_path.absolute()),
+ '-p',
+ str(ghidra_project_path.absolute()),
+ ])
+
+ engine_log_path = output_path / parser.get_default('log_path')
+
+ binary_paths = args.old + [bin for sublist in args.new for bin in sublist]
+
+ binary_paths = [Path(path) for path in binary_paths]
+
+ if any([not path.exists() for path in binary_paths]):
+ missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()]
+ raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}")
+
+ project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}'
+
+ DiffEngine: GhidraDiffEngine = VersionTrackingDiff
+
+ d: GhidraDiffEngine = DiffEngine(args=args,
+ verbose=True,
+ threaded=args.threaded,
+ max_ram_percent=args.max_ram_percent,
+ print_jvm_flags=args.print_flags,
+ jvm_args=args.jvm_args,
+ force_analysis=args.force_analysis,
+ force_diff=args.force_diff,
+ verbose_analysis=args.va,
+ no_symbols=args.no_symbols,
+ engine_log_path=engine_log_path,
+ engine_log_level=args.log_level,
+ engine_file_log_level=args.file_log_level,
+ base_address=args.base_address
+ )
+
+ gzfs_path: Path = output_path / parser.get_default('gzfs_path')
+
+ d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path, gzfs_path)
+
+ d.analyze_project()
+
+ assert (gzfs_path.exists())
+ count = len([file for file in gzfs_path.iterdir()])
+ assert (count == 2)
From d3dfb0f754cd5baadaa7ad7989384bdf2bb771f0 Mon Sep 17 00:00:00 2001
From: clearbluejar <3752074+clearbluejar@users.noreply.github.com>
Date: Fri, 18 Apr 2025 23:44:17 +0000
Subject: [PATCH 6/6] Update README with new features.
---
README.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 80 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index c15859b..6bb181d 100644
--- a/README.md
+++ b/README.md
@@ -138,8 +138,12 @@ Each implementation leverages the base class, and implements `find_changes`.
## Usage
```bash
-usage: ghidriff [-h] [--engine {SimpleDiff,StructualGraphDiff,VersionTrackingDiff}] [-o OUTPUT_PATH] [--summary SUMMARY] [-p PROJECT_LOCATION] [-n PROJECT_NAME] [-s SYMBOLS_PATH] [--threaded | --no-threaded] [--force-analysis] [--force-diff] [--no-symbols] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}]
- [--file-log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] [--log-path LOG_PATH] [--va] [--min-func-len MIN_FUNC_LEN] [--use-calling-counts USE_CALLING_COUNTS] [--max-ram-percent MAX_RAM_PERCENT] [--print-flags] [--jvm-args [JVM_ARGS]] [--sxs] [--max-section-funcs MAX_SECTION_FUNCS]
+usage: ghidriff [-h] [--engine {SimpleDiff,StructualGraphDiff,VersionTrackingDiff}] [-o OUTPUT_PATH] [--summary SUMMARY] [-p PROJECT_LOCATION]
+ [-n PROJECT_NAME] [-s SYMBOLS_PATH] [-g GZFS_PATH] [--ba BASE_ADDRESS] [--program-options PROGRAM_OPTIONS] [--threaded | --no-threaded]
+ [--force-analysis] [--force-diff] [--no-symbols] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}]
+ [--file-log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] [--log-path LOG_PATH] [--va] [--min-func-len MIN_FUNC_LEN]
+ [--use-calling-counts | --no-use-calling-counts] [--gdt GDT] [--bsim | --no-bsim] [--bsim-full | --no-bsim-full]
+ [--max-ram-percent MAX_RAM_PERCENT] [--print-flags] [--jvm-args [JVM_ARGS]] [--sxs] [--max-section-funcs MAX_SECTION_FUNCS]
[--md-title MD_TITLE]
old new [new ...]
@@ -166,6 +170,7 @@ There are quite a few options here, and some complexity. Generally you can succe
Show Extended Usage
```bash
+
Ghidra Project Options:
-p PROJECT_LOCATION, --project-location PROJECT_LOCATION
Ghidra Project Path (default: ghidra_projects)
@@ -173,6 +178,12 @@ Ghidra Project Options:
Ghidra Project Name (default: ghidriff)
-s SYMBOLS_PATH, --symbols-path SYMBOLS_PATH
Ghidra local symbol store directory (default: symbols)
+ -g GZFS_PATH, --gzfs-path GZFS_PATH
+ Location to store GZFs of analyzed binaries (default: gzfs)
+ --ba BASE_ADDRESS, --base-address BASE_ADDRESS
+ Set base address from both programs. 0x2000 or 8192 (default: None)
+ --program-options PROGRAM_OPTIONS
+ Path to json file with Program Options (custom analyzer settings) (default: None)
Engine Options:
--threaded, --no-threaded
@@ -189,8 +200,14 @@ Engine Options:
Verbose logging for analysis step. (default: False)
--min-func-len MIN_FUNC_LEN
Minimum function length to consider for diff (default: 10)
- --use-calling-counts USE_CALLING_COUNTS
- Add calling/called reference counts (default: True)
+ --use-calling-counts, --no-use-calling-counts
+ Add calling/called reference counts (default: False)
+ --gdt GDT Path to GDT file for analysis (default: [])
+
+BSIM Options:
+ --bsim, --no-bsim Toggle using BSIM correlation (default: True)
+ --bsim-full, --no-bsim-full
+ Slower but better matching. Use only when needed (default: False)
JVM Options:
--max-ram-percent MAX_RAM_PERCENT
@@ -208,6 +225,58 @@ Markdown Options:
+### Using Custom Analyzer Settings
+
+If you want to configure specific analyzers for your Ghidra binary analysis, set a custom program_options.json with `--program-options`.
+
+```bash
+ghidriff --prog-options prog_options.json tapisrv.dll.x64.10.0.10240.20708 tapisrv.dll.x64.10.0.10240.20708
+```
+
+The `program_options.json` would need to look something like this:
+
+
+
+```json
+{
+ "program_options": {
+ "binary_name": null,
+ "Analyzers": {
+ "ASCII Strings": "true",
+ "ASCII Strings.Create Strings Containing Existing Strings": "true",
+ "ASCII Strings.Create Strings Containing References": "true",
+ "ASCII Strings.Force Model Reload": "true",
+ "ASCII Strings.Minimum String Length": "LEN_5",
+ "ASCII Strings.Model File": "StringModel.sng",
+ "ASCII Strings.Require Null Termination for String": "true",
+ "ASCII Strings.Search Only in Accessible Memory Blocks": "true",
+ "ASCII Strings.String Start Alignment": "ALIGN_1",
+ "ASCII Strings.String end alignment": "4",
+ "Aggressive Instruction Finder": "false",
+ "Aggressive Instruction Finder.Create Analysis Bookmarks": "true",
+ "Apply Data Archives": "true",
+ "Apply Data Archives.Archive Chooser": "[Auto-Detect]",
+ "Apply Data Archives.Create Analysis Bookmarks": "true",
+ "Apply Data Archives.GDT User File Archive Path": null,
+ "Apply Data Archives.User Project Archive Path": null,
+ "Call Convention ID": "true",
+ }
+ }
+}
+```
+
+
+
+The custom settings will then be used for your binary analysis.
+
+### Setting a Custom Image Base Address (Bootloaders, etc.)
+
+If you are reverse engineering firmware or other fun binary and want to change the base address for the binary, use the `--base-address` parameter to change the base address.
+
+```bash
+$ ghidriff --base-address 0x80000 STM32F103C-firmware.bin STM32F103Ca-firmware.bin
+```
+
## Quick Start Environment Setup
1. [Download](https://github.com/NationalSecurityAgency/ghidra/releases) and [install Ghidra](https://htmlpreview.github.io/?https://github.com/NationalSecurityAgency/ghidra/blob/stable/GhidraDocs/InstallationGuide.html#Install).
@@ -227,6 +296,13 @@ export GHIDRA_INSTALL_DIR="/path/to/ghidra/"
pip install ghidriff
```
+### UV
+
+```bash
+export GHIDRA_INSTALL_DIR="/path/to/ghidra/"
+uvx ghidriff
+```
+
## Ghidriff in a Box
Don't want to install Ghidra and Java on your host? Try "Ghidriff in a box". It supports multiple-platforms (x64 and arm64).