Skip to content

Commit 130d657

Browse files
hbrodinret2libc
andauthored
Fix ARM64 crash reproduction and helper.py patching (#378)
The tracer bot failed to reproduce crashes on ARM64 due to incorrect parameter passing in the reproduce_impl patch. The err_result value was being passed as a positional argument to docker_run(), which interpreted it as print_output=False, redirecting all output to /dev/null. Changes: - Fix reproduce_impl to pass architecture as keyword arg only - Refactor ARM64 patching into separate dockerfile and runner functions - Add comprehensive helper.py patching for ARM64: * Patch image_name variables to use :manifest-arm64v8 tag * Patch BASE_RUNNER_IMAGE assignment with tag stripping * Fix debug mode tag insertion (insert -debug before tag, not after) * Prevent double-tagging in _get_base_runner_image() - Default CLI architecture parameters to ARCHITECTURE constant This enables the tracer bot to see fuzzer output and successfully detect crash reproduction on ARM64 systems. Co-authored-by: Riccardo Schirone <562321+ret2libc@users.noreply.github.com>
1 parent b514d45 commit 130d657

File tree

2 files changed

+125
-6
lines changed

2 files changed

+125
-6
lines changed

common/src/buttercup/common/challenge_task.py

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ def apply_patch_diff(self, diff_file: Path | None = None) -> bool:
865865
logger.exception(f"[task {self.task_dir}] Error applying diff: {e!s}")
866866
raise ChallengeTaskError(f"[task {self.task_dir}] Error applying diff: {e!s}") from e
867867

868-
def _hack_oss_fuzz_aarch64(self, task: ChallengeTask) -> None:
868+
def _hack_oss_fuzz_aarch64_dockerfile(self, task: ChallengeTask) -> None:
869869
# We find the oss-fuzz/projects/<project>/Dockerfile and make sure the
870870
# base image has `:manifest-arm64v8` tag
871871
dockerfile_path = task.get_oss_fuzz_path() / "projects" / task.project_name / "Dockerfile"
@@ -875,18 +875,136 @@ def _hack_oss_fuzz_aarch64(self, task: ChallengeTask) -> None:
875875
dockerfile_content = dockerfile_path.read_text()
876876

877877
# Regex to match FROM gcr.io/oss-fuzz-base/base-builder* [optional tag] [optional as builder]
878+
# Only patch base-builder variants, not base-clang or others that may not have manifest-arm64v8 tag
878879
def _replace_from(match: re.Match) -> str:
879880
image = match.group(1)
880881
as_clause = match.group(2) or ""
881882
# Always ensure tag is :manifest-arm64v8 regardless if there was a tag before
882883
return f"FROM {image}:manifest-arm64v8{as_clause}"
883884

884-
# This regex matches various FROM lines
885+
# This regex matches FROM lines with base-builder images only
885886
pattern = r"^FROM\s+(gcr\.io/oss-fuzz-base/base-builder(?:[^\s:]*)?)(?::[^\s]+)?(\s+as\s+\w+)?\s*$"
886887
new_content = re.sub(pattern, _replace_from, dockerfile_content, flags=re.MULTILINE)
887888

888889
if new_content != dockerfile_content:
889890
dockerfile_path.write_text(new_content)
891+
logger.info("Patched oss-fuzz %s/Dockerfile to use the :manifest-arm64v8 tag", task.project_name)
892+
893+
def _hack_oss_fuzz_aarch64_runner(self, task: ChallengeTask) -> None:
894+
# Patch oss-fuzz infra/helper.py to use the :manifest-arm64v8 tag for image_name,
895+
# patch BASE_RUNNER_IMAGE assignment, and fix architecture parameter passing.
896+
helper_path = task.get_oss_fuzz_path() / "infra" / "helper.py"
897+
if not helper_path.exists():
898+
return
899+
900+
content = helper_path.read_text()
901+
replaced = False
902+
903+
def _replace_image_name(match: re.Match) -> str:
904+
nonlocal replaced
905+
replaced = True
906+
original = match.group(0)
907+
if "base-runner-debug" in original:
908+
return f"{match.group(1)}image_name = 'base-runner-debug:manifest-arm64v8'"
909+
else:
910+
return f"{match.group(1)}image_name = 'base-runner:manifest-arm64v8'"
911+
912+
# Patch image_name variables
913+
pattern_img = r"(\s*)image_name\s*=\s*['\"]base-runner(?:-debug)?['\"]"
914+
new_content = re.sub(pattern_img, _replace_image_name, content, flags=re.MULTILINE)
915+
if new_content != content:
916+
replaced = True
917+
content = new_content
918+
919+
# Patch BASE_RUNNER_IMAGE assignment (ex: BASE_RUNNER_IMAGE = 'gcr.io/oss-fuzz-base/base-runner')
920+
def _replace_base_runner_image(match: re.Match) -> str:
921+
nonlocal replaced
922+
replaced = True
923+
prefix = match.group(1)
924+
image = match.group(2)
925+
suffix = match.group(3) or ""
926+
# Avoid double-appending tag
927+
if ":manifest-arm64v8" not in image:
928+
# Remove any existing tag first (split on last colon only)
929+
if ":" in image:
930+
image = image.rsplit(":", 1)[0]
931+
image = image + ":manifest-arm64v8"
932+
return f"{prefix}BASE_RUNNER_IMAGE = '{image}'{suffix}"
933+
934+
pattern_base_img = (
935+
r"(^\s*)BASE_RUNNER_IMAGE\s*=\s*['\"](gcr\.io/oss-fuzz-base/base-runner(?:[^\s'\"]*)?)['\"](\s*)"
936+
)
937+
new_content2 = re.sub(pattern_base_img, _replace_base_runner_image, content, flags=re.MULTILINE)
938+
if new_content2 != content:
939+
replaced = True
940+
content = new_content2
941+
942+
# Patch the debug mode handling in _get_base_runner_image()
943+
# The function does: image += '-debug'
944+
# This appends -debug after the tag, creating invalid tags like base-runner:manifest-arm64v8-debug
945+
# We need to insert -debug BEFORE the tag if one exists
946+
def _replace_debug_append(match: re.Match) -> str:
947+
nonlocal replaced
948+
replaced = True
949+
indent = match.group(1)
950+
# Replace the simple append with logic that inserts -debug before the tag
951+
# Changes: image += '-debug' -> image = image.replace(':', '-debug:') if ':' in image else image + '-debug'
952+
return f"{indent}image = image.replace(':', '-debug:', 1) if ':' in image else image + '-debug'"
953+
954+
# Match: image += '-debug' or image += "-debug" (with any amount of whitespace)
955+
pattern_debug_append = r"^(\s+)image\s*\+=\s*['\"]-debug['\"]\s*$"
956+
new_content3 = re.sub(pattern_debug_append, _replace_debug_append, content, flags=re.MULTILINE)
957+
if new_content3 != content:
958+
replaced = True
959+
content = new_content3
960+
961+
# Patch the _get_base_runner_image() function to avoid appending tag if one exists
962+
# The function does: return f'{image}:{tag}'
963+
# We need to check if image already has a tag (contains ':') before appending
964+
def _replace_get_base_runner_return(match: re.Match) -> str:
965+
nonlocal replaced
966+
replaced = True
967+
indent = match.group(1)
968+
# Replace the return statement to check for existing tag before appending
969+
# Changes: return f'{image}:{tag}' -> return image if ':' in image else f'{image}:{tag}'
970+
return f"{indent}return image if ':' in image else f'{{image}}:{{tag}}'"
971+
972+
# Match: return f'{image}:{tag}' or return f"{image}:{tag}"
973+
# This is the exact line in OSS-Fuzz's _get_base_runner_image() function
974+
# Capture group 1 is indentation, group 2 is the quote character (backreferenced as \2)
975+
pattern_get_base_runner = r"^(\s+)return f(['\"])\{image\}:\{tag\}\2\s*$"
976+
new_content4 = re.sub(pattern_get_base_runner, _replace_get_base_runner_return, content, flags=re.MULTILINE)
977+
if new_content4 != content:
978+
replaced = True
979+
content = new_content4
980+
981+
# Patch reproduce_impl to pass architecture parameter to docker_run
982+
# The function calls: return run_function(run_args, err_result)
983+
# But docker_run signature is: docker_run(run_args, print_output=True, architecture='x86_64')
984+
# So err_result was being passed to print_output, causing output suppression!
985+
# Fix: remove err_result and just pass architecture as keyword argument
986+
def _replace_reproduce_run_function(match: re.Match) -> str:
987+
nonlocal replaced
988+
replaced = True
989+
indent = match.group(1)
990+
# Remove err_result and add architecture parameter correctly
991+
return f"{indent}return run_function(run_args, architecture=architecture)"
992+
993+
# Match: return run_function(run_args, err_result)
994+
# This is in the reproduce_impl function
995+
pattern_reproduce_run = r"^(\s+)return run_function\(run_args,\s*err_result\)\s*$"
996+
new_content5 = re.sub(pattern_reproduce_run, _replace_reproduce_run_function, content, flags=re.MULTILINE)
997+
if new_content5 != content:
998+
replaced = True
999+
content = new_content5
1000+
1001+
if replaced:
1002+
helper_path.write_text(content)
1003+
logger.info("Patched oss-fuzz helper.py to use the :manifest-arm64v8 tag")
1004+
1005+
def _hack_oss_fuzz_aarch64(self, task: ChallengeTask) -> None:
1006+
self._hack_oss_fuzz_aarch64_dockerfile(task)
1007+
self._hack_oss_fuzz_aarch64_runner(task)
8901008

8911009
@contextmanager
8921010
def get_rw_copy(self, work_dir: PathLike | None, delete: bool = True) -> Iterator[ChallengeTask]:

common/src/buttercup/common/challenge_task_cli.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,30 @@
66
from pydantic_settings import BaseSettings, CliImplicitFlag, CliSubCommand, get_subcommand
77

88
from buttercup.common.challenge_task import ChallengeTask, CommandResult, ReproduceResult
9+
from buttercup.common.constants import ARCHITECTURE
910
from buttercup.common.logger import setup_package_logger
1011

1112

1213
class BuildImageCommand(BaseModel):
1314
pull_latest_base_image: bool = False
1415
cache: bool | None = None
15-
architecture: str | None = None
16+
architecture: str = ARCHITECTURE
1617

1718

1819
class ApplyPatchCommand(BaseModel):
1920
diff_file: Path | None = None
2021

2122

2223
class BuildFuzzersCommand(BaseModel):
23-
architecture: str | None = None
24+
architecture: str = ARCHITECTURE
2425
engine: str | None = None
2526
sanitizer: str | None = None
2627
env: dict[str, str] | None = None
2728
use_cache: bool = True
2829

2930

3031
class CheckBuildCommand(BaseModel):
31-
architecture: str | None = None
32+
architecture: str = ARCHITECTURE
3233
engine: str | None = None
3334
sanitizer: str | None = None
3435
env: dict[str, str] | None = None
@@ -38,7 +39,7 @@ class ReproducePovCommand(BaseModel):
3839
fuzzer_name: str
3940
crash_path: Path
4041
fuzzer_args: list[str] | None = None
41-
architecture: str | None = None
42+
architecture: str = ARCHITECTURE
4243
env: dict[str, str] | None = None
4344

4445

0 commit comments

Comments
 (0)