diff --git a/Makefile b/Makefile index 0366b04..ab545d0 100644 --- a/Makefile +++ b/Makefile @@ -17,12 +17,12 @@ BIN_PLUGIN = src/r2/bin_r2unity.$(SOEXT) PLUGINS = $(CORE_PLUGIN) $(BIN_PLUGIN) CC = gcc -CFLAGS = -Wall -Wextra -g -I. $(shell pkg-config --cflags r_util 2>/dev/null || echo "") -LDFLAGS = $(shell pkg-config --libs r_util 2>/dev/null || echo "") +CFLAGS = -Wall -Wextra -g -I. $(shell pkg-config --cflags r_util r_bin 2>/dev/null || echo "") +LDFLAGS = $(shell pkg-config --libs r_util r_bin 2>/dev/null || echo "") # r_core plugin flags (full radare2) -CORE_PLUGIN_CFLAGS = -Wall -Wextra -g -fPIC $(shell pkg-config --cflags r_core 2>/dev/null || echo "") -CORE_PLUGIN_LDFLAGS = $(shell pkg-config --libs r_core 2>/dev/null || echo "") +CORE_PLUGIN_CFLAGS = -Wall -Wextra -g -fPIC $(shell pkg-config --cflags r_core r_bin 2>/dev/null || echo "") +CORE_PLUGIN_LDFLAGS = $(shell pkg-config --libs r_core r_bin 2>/dev/null || echo "") # r_bin plugin flags BIN_PLUGIN_CFLAGS = -Wall -Wextra -g -fPIC $(shell pkg-config --cflags r_bin 2>/dev/null || echo "") @@ -30,8 +30,9 @@ BIN_PLUGIN_LDFLAGS = $(shell pkg-config --libs r_bin 2>/dev/null || echo "") R2_USER_PLUGINS = $(shell r2 -H R2_USER_PLUGINS 2>/dev/null) -LIB_SRCS = $(wildcard src/lib/*.c) +LIB_SRCS = $(wildcard src/lib/*.c) $(wildcard src/lib/bin/*.c) LIB_OBJS = $(LIB_SRCS:.c=.o) +LEGACY_OBJS = src/lib/elf.o src/lib/macho.o src/lib/pe.o src/lib/native.o CLI_SRCS = src/main.c CLI_OBJS = $(CLI_SRCS:.c=.o) OBJS = $(CLI_OBJS) $(LIB_OBJS) @@ -81,7 +82,7 @@ $(BIN_PLUGIN): src/r2/bin_r2unity.c $(LIB_SRCS) $(CC) $(CFLAGS) -c -o $@ $< clean: - rm -f $(EXEC) $(OBJS) $(PLUGINS) $(CONFIG_H) + rm -f $(EXEC) $(OBJS) $(LEGACY_OBJS) $(PLUGINS) $(CONFIG_H) .PHONY: all clean plugin install-plugin uninstall-plugin fmt diff --git a/README.md b/README.md index f4505b4..9eff149 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,8 @@ IL2CPP binary, and exposes managed metadata for reverse engineering. Linux, and flat fixture layouts. - Recovers managed images, assemblies, types, methods, method flags, and `ldstr` string literals. -- Finds method-pointer tables heuristically in ELF, Mach-O, and PE binaries. +- Resolves method pointers through r_bin symbols/CodeRegistration, with + r_bin or simple ELF/Mach-O/PE section-scan fallback for stripped binaries. - Lists P/Invoke and v29+ reverse-P/Invoke metadata, and emits CycloneDX 1.5 SBOMs for managed assemblies. - Provides both a core r2 command plugin and an `r_bin` plugin for direct @@ -67,6 +68,9 @@ The normal inputs are the native IL2CPP binary and the matching # recover method flags/comments as r2 commands ./r2unity -f /path/to/GameAssembly.dll /path/to/global-metadata.dat > methods.r2 +# override a known native registration symbol address +./r2unity -f -O g_CodeRegistration=0x1234 /path/to/GameAssembly.dll /path/to/global-metadata.dat + # list managed strings, interop metadata, or managed-assembly SBOM data ./r2unity -z /path/to/global-metadata.dat ./r2unity -P -j /path/to/GameAssembly.dll /path/to/global-metadata.dat @@ -100,8 +104,8 @@ classes, imports, libraries, and header fields. ## Current Limits - v24.0 metadata, v36/v37 metadata, and WebAssembly are not supported. -- Method-pointer recovery is heuristic; manual `-a` / `-c` pointer reads are - not implemented yet. +- Method-pointer recovery needs CodeRegistration symbols/addresses or the + section-scan fallback; manual `-a` pointer reads are not implemented yet. - P/Invoke and reverse-P/Invoke output is metadata-first and does not fully recover native wrapper addresses or every `DllImportAttribute` detail. - SBOM output covers managed assemblies only, not native dependencies or file diff --git a/doc/datvsbin.md b/doc/datvsbin.md new file mode 100644 index 0000000..7e60bb9 --- /dev/null +++ b/doc/datvsbin.md @@ -0,0 +1,106 @@ +# IL2CPP Metadata vs Native Binary + +Unity IL2CPP builds split managed-code information across two files: + +- `global-metadata.dat` +- the native IL2CPP binary (`GameAssembly.dll`, `GameAssembly.so`, + `GameAssembly.dylib`, `libil2cpp.so`, `UnityFramework`, etc.) + +They must be read together to recover named native symbols and runtime +layouts. + +## `global-metadata.dat` + +`global-metadata.dat` is the managed/logical metadata blob. It is +platform-independent for a given IL2CPP run: it stores byte offsets and +dense table indices, not native pointers. + +Typical contents: + +- metadata magic, version, and table-of-contents header +- assemblies and images +- type definitions for classes, structs, enums, and interfaces +- method definitions: names, signatures, parameters, return types, + tokens, and method indices +- field definitions +- properties and events +- nested-type, interface, and vtable index tables +- generic containers, generic parameters, and method specs +- custom-attribute and default-value blobs, depending on metadata + version +- identifier strings and managed `ldstr` literal payloads + +It does not contain: + +- native method bodies +- method RVAs or function addresses +- final field offsets +- final type sizes +- native vtables, invokers, wrappers, or trampolines +- Unity asset, scene, prefab, or AssetBundle data + +In short, the `.dat` file says what managed things exist and how they +are named, typed, and indexed. + +## Native IL2CPP binary + +The native binary is the physical/runtime side of the same build. It +contains the compiled machine code and the registration structures that +tie the generated code back to metadata indices. + +Important native-side data: + +- compiled method bodies +- `Il2CppCodeRegistration` +- `Il2CppMetadataRegistration` +- `methodPointers` +- `genericMethodPointers` +- `reversePInvokeWrappers` +- `invokerPointers` +- `codeGenModules` +- `fieldOffsets` +- `typeDefinitionsSizes` +- native type and generic-instantiation tables +- runtime helper code, wrappers, trampolines, and runtime strings + +These tables provide the addresses and runtime layouts that +`global-metadata.dat` intentionally does not carry. + +## Mapping model + +The two files are correlated by indices: + +```text +global-metadata.dat native IL2CPP binary +------------------- -------------------- +MethodDefinition.methodIndex --> CodeRegistration / CodeGenModule methodPointers[index] +TypeDefinition index --> MetadataRegistration.fieldOffsets[type] +Type/generic indices --> MetadataRegistration.types and generic tables +``` + +For r2unity this means: + +- parsing only `global-metadata.dat` can recover names, signatures, + tokens, string literals, and table relationships +- resolving those names to native addresses requires locating + `Il2CppCodeRegistration` in the binary; r2unity accepts an explicit + address, an r2 flag/r_bin symbol, or the r_bin/simple-parser + section-scan fallback +- recovering field offsets and final type sizes requires + `Il2CppMetadataRegistration` +- a complete symbol map needs both files from the same build + +Today r2unity takes managed structure from `.dat` and native addresses +from the binary: + +- `.dat`: image/type/method rows, method indices, names, tokens, + strings, assemblies, P/Invoke metadata, and reverse-P/Invoke + attribute metadata +- binary: executable address ranges, native symbols/flags for + `g_CodeRegistration` and `g_MetadataRegistration`, and method pointer + tables reached from CodeRegistration or the r_bin/simple-parser + fallback scan + +The current method-address path only needs `g_CodeRegistration`. +`g_MetadataRegistration` is tracked as the companion anchor for native +layout work such as field offsets and type sizes. diff --git a/doc/future.md b/doc/future.md index 462b20e..cdde0dc 100644 --- a/doc/future.md +++ b/doc/future.md @@ -535,22 +535,23 @@ are always zero; a defensive parser bounds-checks and skips. ## 3. Native binary — pointer arrays we don't follow -`src/lib/elf.c` and `src/lib/macho.c` currently locate exactly one -array: `CodeRegistration.methodPointers` (or its v24.2+ per-image -equivalent, if the heuristic happens to land on the right -`Il2CppCodeGenModule`). The registration structures actually expose -many more pointer arrays, each with its own metadata table partner. -Each entry below is a `{ ulong count; ulong ptr; }` pair inside the -registration (see §3 of `doc/r2unity.md` for the full struct). +`src/lib/bin/native.c` currently recovers method pointers from +`Il2CppCodeRegistration` when the registration anchor is available, +and falls back to a generic section scan backed by r_bin or the simple +ELF/Mach-O/PE parsers for stripped binaries. +The registration structures expose many more pointer arrays, each +with its own metadata table partner. Each entry below is a +`{ ulong count; ulong ptr; }` pair inside the registration (see §3 of +`doc/r2unity.md` for the full struct). ### 3.1 `methodPointers` (≥v24.1 global; v≥24.2 per-module) What r2unity extracts today. From v24.2 onwards this field is on `Il2CppCodeGenModule`, **not** `CodeRegistration`, and there is one -module per image. If r2unity finds a single `{count, ptr}` on a -v24.2+ binary it is only extracting **one image's** methods. -Walking `codeGenModules[]` and enumerating each module is mandatory -for full coverage. +module per image. The structural path walks `codeGenModules[]` and +maps modules back to `.dat` image rows; the fallback section scan may +still find only one image's table because it does not know the +registration structure. ### 3.2 `invokerPointers` diff --git a/doc/r2unity.md b/doc/r2unity.md index eb33fb4..0f36a35 100644 --- a/doc/r2unity.md +++ b/doc/r2unity.md @@ -1010,13 +1010,13 @@ File-by-file map: type/method/image/assembly/referenced-assembly decoders, P/Invoke and reverse-P/Invoke enumerators, endian-safe LE readers (`RD_LE32`, `RD_LE16`). -- `src/lib/elf.c` — ELF32/64 loader, dynamic-table walk, relative - relocation application (`DT_REL`, `DT_RELA`, `DT_RELR`), - method-pointer-array heuristic. -- `src/lib/macho.c` — Mach-O 64 loader (thin + FAT first-ARM64), - `LC_SEGMENT_64` walk, method-pointer-array heuristic. -- `src/lib/pe.c` — PE32/PE32+ loader, section walk, method-pointer- - array heuristic. +- `src/lib/bin/native.c` — shared native-binary view, + CodeRegistration/MetadataRegistration anchor resolution, structural + CodeRegistration parsing, RBin adapter, and the generic section-scan + fallback. +- `src/lib/bin/elf.c`, `src/lib/bin/macho.c`, `src/lib/bin/pe.c` — + simple file-backed format parsers used when the RBin path cannot + recover method pointers. - `src/main.c` — CLI entry point and output emitters. Every row decoder reads via `r_read_le32`/`r_read_le16` (LE on all @@ -1027,45 +1027,41 @@ retained for the two string pools. ## 6. Native-binary scanning, in one picture ```text -ELF/Mach-O/PE image on disk +Native IL2CPP image opened by r_bin or a simple ELF/Mach-O/PE mapper │ - ├─ load & parse segments/sections + ├─ use r_bin sections/symbols/relocs when available + │ or simple file-backed sections for fallback │ ↓ - │ segments { vaddr/vmaddr, size, perms, file mapping } + │ sections { vaddr, size, perms } │ ↓ │ [text_lo, text_hi) (executable union) │ - ├─ ELF only: apply DT_REL / DT_RELA / DT_RELR relative fixups - │ so data-segment pointer arrays match the runtime - │ state (addends resolved, RELR bitmap expanded). + ├─ resolve g_CodeRegistration / g_MetadataRegistration + │ order: CLI -O / r2 eval vars / r2 flags / r_bin symbols │ - ├─ scan each writable/data segment: - │ pass 1: {count32, pad32, ptr} tuple - │ (CodeRegistration-shaped anchor pair) - │ pass 2: {count32, ptr} generic + ├─ parse Il2CppCodeRegistration: + │ v24.2+: match codeGenModules[] to metadata images and + │ copy each module's methodPointers[] + │ older: recover the global methodPointers[] pair │ - └─ accept if a sample of entries at *ptr[] lands in text, - either already (post-relocations) or after + base_vaddr - (raw RVA case). Emit absolute VAs, one per method index. + └─ fallback when forced or unresolved: + scan non-executable data/readable sections for {count, ptr} + pairs whose table entries land in executable code. + Emit absolute VAs, one per method index. ``` -The heuristic is deliberately weaker than a structural -`Il2CppCodeRegistration` match, but it works on every supported -target and doesn't need symbol tables. It does, however, lock onto -**one** `{count, ptr}` array, which on v24.2+ means one image's -methods, not all of them (§3.1 / §3.7). A proper structural match -that walks `codeGenModules[]` is on the roadmap. +The structural path is preferred because it follows Unity's native +registration structures instead of guessing which `{count, ptr}` pair +is the method-pointer table. The fallback remains useful for stripped +binaries or builds where the registration symbols cannot be resolved. +The simple ELF/Mach-O/PE parsers do not reimplement full symbol-table +parsing; they use explicit registration addresses when provided and +otherwise feed their sections into the fallback scanner. -For ELF the relocation pass matters because the Android linker -produces method-pointer arrays almost entirely as -`R_AARCH64_RELATIVE` (type 1027) entries. Without applying them, -the raw array on disk is a run of zeros. Packed Android relocations -(`DT_ANDROID_RELA`, `DT_ANDROID_RELR`) are not handled yet and -cause the same "empty array" symptom on Play Store builds. - -For Mach-O and PE the linker has already materialised concrete -values; no explicit relocation pass is required for the tables -r2unity currently scans. +Relocation handling is delegated to r_bin (`r_bin_patch_relocs`) on +the RBin path. The simple ELF parser also applies the common relative +REL/RELA/RELR forms so stripped Android/Linux inputs still have a +lightweight fallback. ## 7. Data we can extract today vs. data we do not @@ -1083,7 +1079,7 @@ Already extracted by r2unity (library + CLI): | referenced assemblies | flat int32 array | | P/Invoke marker methods | `-P` enumeration | | reverse-P/Invoke on v29+ | `-R` enumeration via BLOB | -| method-pointer VA (global) | ELF/Mach-O/PE heuristic | +| method-pointer VA | CodeRegistration parse + r_bin/simple-parser section-scan fallback | Data present on disk / in the binary but **not yet consumed**: @@ -1109,14 +1105,14 @@ Data present on disk / in the binary but **not yet consumed**: metadata load in compiled code (§2.12). - `fieldMarshaledSizes`, `unresolvedVirtualCall*`, WinRT tables, `exportedTypeDefinitions`, RGCTX tables (§2.14–2.18). -- Native-side `CodeRegistration` and `MetadataRegistration` walk → +- Native-side registration data beyond method pointers → `invokerPointers`, `customAttributeGenerators`, `reversePInvokeWrappers`, `genericMethodPointers`, - `interopData`, `codeGenModules[]`, `types`, `fieldOffsets`, - `typeDefinitionsSizes`, `metadataUsages` (§3). -- Richer native scanning: per-module `methodPointers` on v24.2+, - packed Android relocations, Mach-O FAT multi-slice, - chained-fixups, PE import table. + `interopData`, `types`, `fieldOffsets`, `typeDefinitionsSizes`, + `metadataUsages` (§3). +- Richer native support: packed Android relocations and other loader + details not yet handled by r_bin for a given target, Mach-O FAT + multi-slice selection, chained-fixups, PE import table. ## 8. validation corpus diff --git a/meson.build b/meson.build index 2d05b07..f942750 100644 --- a/meson.build +++ b/meson.build @@ -18,6 +18,7 @@ add_project_arguments( ) r_util_dep = dependency('r_util') +r_bin_dep = dependency('r_bin') conf = configuration_data() conf.set_quoted('R2UNITY_VERSION', meson.project_version()) @@ -25,11 +26,12 @@ configure_file(output: 'r2unity_config.h', configuration: conf) lib_inc = include_directories('src/lib') lib_sources = files( - 'src/lib/elf.c', 'src/lib/lib.c', - 'src/lib/macho.c', + 'src/lib/bin/elf.c', + 'src/lib/bin/macho.c', + 'src/lib/bin/native.c', + 'src/lib/bin/pe.c', 'src/lib/paths.c', - 'src/lib/pe.c', 'src/lib/sbom.c', ) @@ -37,7 +39,7 @@ r2unity = executable( 'r2unity', files('src/main.c') + lib_sources, include_directories: lib_inc, - dependencies: r_util_dep, + dependencies: [r_util_dep, r_bin_dep], install: true, ) @@ -46,7 +48,6 @@ test('r2unity-version', r2unity, args: ['-v']) plugins_opt = get_option('plugins') r_core_dep = dependency('r_core', required: plugins_opt) -r_bin_dep = dependency('r_bin', required: plugins_opt) build_plugins = plugins_opt.enabled() or ( plugins_opt.auto() and r_core_dep.found() and r_bin_dep.found() ) @@ -72,7 +73,7 @@ if build_plugins 'core_r2unity', files('src/r2/core_r2unity.c') + lib_sources, include_directories: lib_inc, - dependencies: r_core_dep, + dependencies: [r_core_dep, r_bin_dep], name_prefix: '', install: true, install_dir: r2_plugindir, @@ -82,7 +83,7 @@ if build_plugins 'bin_r2unity', files('src/r2/bin_r2unity.c') + lib_sources, include_directories: lib_inc, - dependencies: r_bin_dep, + dependencies: [r_bin_dep], name_prefix: '', install: true, install_dir: r2_plugindir, diff --git a/src/lib/bin/elf.c b/src/lib/bin/elf.c new file mode 100644 index 0000000..498918c --- /dev/null +++ b/src/lib/bin/elf.c @@ -0,0 +1,289 @@ +/* r2unity - MIT - Copyright 2025-2026 - pancake */ + +#define R_LOG_ORIGIN "r2unity.elf" + +#include "native_internal.h" +#include + +typedef struct { + ut64 vaddr; + ut64 memsz; + ut64 offset; + ut64 filesz; + ut32 flags; + ut32 type; +} ElfSeg; + +typedef struct { + ut8 *file; + ut64 size; + bool is64; + ElfSeg segs[128]; + size_t nsegs; +} ElfImg; + +static void elf_write_word(ut8 *p, ut64 v, bool is64) { + if (is64) { + r_write_le64 (p, v); + } else { + r_write_le32 (p, (ut32)v); + } +} + +static bool elf_load(const char *path, ElfImg *e) { + memset (e, 0, sizeof (*e)); + size_t size = 0; + e->file = (ut8 *)r_file_slurp (path, &size); + if (!e->file || size < 0x40) { + R_FREE (e->file); + return false; + } + e->size = size; + if (memcmp (e->file, "\x7f""ELF", 4) || e->file[5] != 1) { + R_FREE (e->file); + return false; + } + e->is64 = e->file[4] == 2; + if (e->is64) { + ut64 phoff = r_read_le64 (e->file + 0x20); + ut16 phentsz = r_read_le16 (e->file + 0x36); + ut16 phnum = r_read_le16 (e->file + 0x38); + for (ut16 i = 0; i < phnum && e->nsegs < R_ARRAY_SIZE (e->segs); i++) { + ut64 off = phoff + (ut64)i * phentsz; + if (off + 56 > e->size) { + break; + } + const ut8 *ph = e->file + off; + ut32 type = r_read_le32 (ph); + if (type != 1 && type != 2) { + continue; + } + ElfSeg *s = &e->segs[e->nsegs++]; + s->type = type; + s->flags = r_read_le32 (ph + 4); + s->offset = r_read_le64 (ph + 8); + s->vaddr = r_read_le64 (ph + 16); + s->filesz = r_read_le64 (ph + 32); + s->memsz = r_read_le64 (ph + 40); + } + } else { + ut32 phoff = r_read_le32 (e->file + 0x1c); + ut16 phentsz = r_read_le16 (e->file + 0x2a); + ut16 phnum = r_read_le16 (e->file + 0x2c); + for (ut16 i = 0; i < phnum && e->nsegs < R_ARRAY_SIZE (e->segs); i++) { + ut64 off = (ut64)phoff + (ut64)i * phentsz; + if (off + 32 > e->size) { + break; + } + const ut8 *ph = e->file + off; + ut32 type = r_read_le32 (ph); + if (type != 1 && type != 2) { + continue; + } + ElfSeg *s = &e->segs[e->nsegs++]; + s->type = type; + s->offset = r_read_le32 (ph + 4); + s->vaddr = r_read_le32 (ph + 8); + s->filesz = r_read_le32 (ph + 16); + s->memsz = r_read_le32 (ph + 20); + s->flags = r_read_le32 (ph + 24); + } + } + return true; +} + +static void elf_free(ElfImg *e) { + R_FREE (e->file); +} + +static const ut8 *elf_ptr_at_size(ElfImg *e, ut64 va, ut64 size) { + for (size_t i = 0; i < e->nsegs; i++) { + const ElfSeg *s = &e->segs[i]; + ut64 memsz = s->memsz? s->memsz: s->filesz; + if (va < s->vaddr || va >= s->vaddr + memsz) { + continue; + } + ut64 delta = va - s->vaddr; + if (delta + size <= s->filesz && s->offset + delta + size <= e->size) { + return e->file + s->offset + delta; + } + return NULL; + } + return NULL; +} + +static const ut8 *elf_ptr_at(void *user, ut64 va) { + return elf_ptr_at_size ((ElfImg *)user, va, 8); +} + +static void elf_ranges(ElfImg *e, ut64 *base, ut64 *text_lo, ut64 *text_hi) { + *base = UT64_MAX; + *text_lo = UT64_MAX; + *text_hi = 0; + for (size_t i = 0; i < e->nsegs; i++) { + const ElfSeg *s = &e->segs[i]; + if (s->vaddr < *base) { + *base = s->vaddr; + } + if ((s->flags & 1) && (s->flags & 4)) { + *text_lo = R_MIN (*text_lo, s->vaddr); + *text_hi = R_MAX (*text_hi, s->vaddr + (s->memsz? s->memsz: s->filesz)); + } + } + if (*base == UT64_MAX) { + *base = 0; + } + if (*text_lo == UT64_MAX || *text_hi <= *text_lo) { + *text_lo = *base; + for (size_t i = 0; i < e->nsegs; i++) { + ut64 memsz = e->segs[i].memsz? e->segs[i].memsz: e->segs[i].filesz; + *text_hi = R_MAX (*text_hi, e->segs[i].vaddr + memsz); + } + } +} + +static R2UnityNativeSection *elf_sections(ElfImg *e, size_t *count) { + *count = e->nsegs; + R2UnityNativeSection *out = R_NEWS0 (R2UnityNativeSection, e->nsegs); + if (!out) { + *count = 0; + return NULL; + } + for (size_t i = 0; i < e->nsegs; i++) { + const ElfSeg *s = &e->segs[i]; + ut64 memsz = s->memsz? s->memsz: s->filesz; + out[i].vaddr = s->vaddr; + out[i].vsize = memsz; + out[i].size = R_MIN (s->filesz, memsz); + out[i].perm = ((s->flags & 4)? R_PERM_R: 0) | ((s->flags & 2)? R_PERM_W: 0) | ((s->flags & 1)? R_PERM_X: 0); + out[i].is_data = (s->flags & 2) && !(s->flags & 1); + } + return out; +} + +static void elf_apply_relocs(ElfImg *e, ut64 base) { + ut64 dyn_off = 0, dyn_sz = 0; + for (size_t i = 0; i < e->nsegs; i++) { + if (e->segs[i].type == 2) { + dyn_off = e->segs[i].offset; + dyn_sz = e->segs[i].filesz; + break; + } + } + if (!dyn_off || !dyn_sz || dyn_off + dyn_sz > e->size) { + return; + } + ut64 rela_off = 0, rela_sz = 0, rela_ent = e->is64? 24: 12; + ut64 rel_off = 0, rel_sz = 0, rel_ent = e->is64? 16: 8; + ut64 relr_off = 0, relr_sz = 0, relr_ent = e->is64? 8: 4; + const ut8 *d = e->file + dyn_off; + ut64 dyn_ent = e->is64? 16: 8; + for (ut64 off = 0; off + dyn_ent <= dyn_sz; off += dyn_ent) { + ut64 tag = e->is64? r_read_le64 (d + off): (ut64)r_read_le32 (d + off); + ut64 val = e->is64? r_read_le64 (d + off + 8): (ut64)r_read_le32 (d + off + 4); + if (!tag) { + break; + } + switch (tag) { + case 7: rela_off = val; break; + case 8: rela_sz = val; break; + case 9: rela_ent = val; break; + case 17: rel_off = val; break; + case 18: rel_sz = val; break; + case 19: rel_ent = val; break; + case 35: relr_sz = val; break; + case 36: relr_off = val; break; + case 37: relr_ent = val; break; + default: break; + } + } + const ut64 type_mask = e->is64? 0xffffffffULL: 0xffULL; + for (ut64 i = 0; rela_off && rela_ent && i + rela_ent <= rela_sz; i += rela_ent) { + const ut8 *rp = elf_ptr_at_size (e, rela_off + i, rela_ent); + if (!rp) { + break; + } + ut64 r_offset = e->is64? r_read_le64 (rp): (ut64)r_read_le32 (rp); + ut64 r_info = e->is64? r_read_le64 (rp + 8): (ut64)r_read_le32 (rp + 4); + ut64 r_addend = e->is64? r_read_le64 (rp + 16): (ut64)r_read_le32 (rp + 8); + ut64 type = r_info & type_mask; + if (type == 8 || type == 23 || type == 1027) { + ut8 *loc = (ut8 *)elf_ptr_at_size (e, r_offset, e->is64? 8: 4); + if (loc) { + elf_write_word (loc, base + r_addend, e->is64); + } + } + } + for (ut64 i = 0; rel_off && rel_ent && i + rel_ent <= rel_sz; i += rel_ent) { + const ut8 *rp = elf_ptr_at_size (e, rel_off + i, rel_ent); + if (!rp) { + break; + } + ut64 r_offset = e->is64? r_read_le64 (rp): (ut64)r_read_le32 (rp); + ut64 r_info = e->is64? r_read_le64 (rp + 8): (ut64)r_read_le32 (rp + 4); + ut64 type = r_info & type_mask; + if (type == 8 || type == 23 || type == 1027) { + ut8 *loc = (ut8 *)elf_ptr_at_size (e, r_offset, e->is64? 8: 4); + if (loc) { + ut64 addend = e->is64? r_read_le64 (loc): (ut64)r_read_le32 (loc); + elf_write_word (loc, base + addend, e->is64); + } + } + } + if (!e->is64 || !relr_off || !relr_sz || !relr_ent) { + return; + } + ut64 curr = 0; + for (ut64 i = 0; i + relr_ent <= relr_sz; i += relr_ent) { + const ut8 *rp = elf_ptr_at_size (e, relr_off + i, relr_ent); + if (!rp) { + break; + } + ut64 r = r_read_le64 (rp); + if (!(r & 1)) { + ut8 *loc = (ut8 *)elf_ptr_at_size (e, r, 8); + if (loc) { + elf_write_word (loc, base + r_read_le64 (loc), true); + } + curr = r + 8; + continue; + } + ut64 bitmap = r >> 1; + for (int bit = 0; bit < 63; bit++) { + if (bitmap & (1ULL << bit)) { + ut8 *loc = (ut8 *)elf_ptr_at_size (e, curr + (ut64)(bit + 1) * 8, 8); + if (loc) { + elf_write_word (loc, base + r_read_le64 (loc), true); + } + } + } + curr += 8 * 63; + } +} + +bool r2unity_find_method_pointers_elf(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + if (!meta || !path || !result) { + return false; + } + ElfImg e; + if (!elf_load (path, &e)) { + return false; + } + ut64 base = 0, text_lo = 0, text_hi = 0; + elf_ranges (&e, &base, &text_lo, &text_hi); + elf_apply_relocs (&e, base); + R2UnityNativeView view = { + .user = &e, + .ptr_at = elf_ptr_at, + .ptr_size = e.is64? 8: 4, + .base_vaddr = base, + .text_lo = text_lo, + .text_hi = text_hi + }; + size_t section_count = 0; + R2UnityNativeSection *sections = elf_sections (&e, §ion_count); + bool ok = r2unity_native_run_view (meta, &view, sections, section_count, options, result); + R_FREE (sections); + elf_free (&e); + return ok; +} diff --git a/src/lib/bin/macho.c b/src/lib/bin/macho.c new file mode 100644 index 0000000..579ad94 --- /dev/null +++ b/src/lib/bin/macho.c @@ -0,0 +1,176 @@ +/* r2unity - MIT - Copyright 2025-2026 - pancake */ + +#define R_LOG_ORIGIN "r2unity.macho" + +#include "native_internal.h" +#include + +typedef struct { + char name[17]; + ut64 vmaddr; + ut64 vmsize; + ut64 fileoff; + ut64 filesize; + ut32 maxprot; +} MachSeg; + +typedef struct { + ut8 *file; + ut64 size; + ut64 base; + ut64 vm_base; + MachSeg segs[128]; + size_t nsegs; +} MachO; + +static bool macho_load(const char *path, MachO *mo) { + memset (mo, 0, sizeof (*mo)); + size_t size = 0; + mo->file = (ut8 *)r_file_slurp (path, &size); + if (!mo->file || size < 0x20) { + R_FREE (mo->file); + return false; + } + mo->size = size; + ut32 magic = r_read_be32 (mo->file); + ut64 off = 0; + if (magic == 0xcafebabe || magic == 0xbebafeca) { + ut32 nfat = r_read_be32 (mo->file + 4); + ut32 best = 0; + for (ut32 i = 0; i < nfat && 8 + (ut64)i * 20 + 20 <= mo->size; i++) { + const ut8 *fa = mo->file + 8 + (ut64)i * 20; + if (r_read_be32 (fa) == 0x0100000c) { + best = i; + break; + } + } + const ut8 *fa = mo->file + 8 + (ut64)best * 20; + off = r_read_be32 (fa + 8); + } + if (off + 0x20 > mo->size) { + R_FREE (mo->file); + return false; + } + mo->base = off; + const ut8 *mh = mo->file + off; + if (r_read_le32 (mh) != 0xfeedfacf) { + R_FREE (mo->file); + return false; + } + ut32 ncmds = r_read_le32 (mh + 0x10); + ut64 co = off + 0x20; + for (ut32 i = 0; i < ncmds && co + 8 <= mo->size; i++) { + ut32 cmd = r_read_le32 (mo->file + co); + ut32 cmdsz = r_read_le32 (mo->file + co + 4); + if (!cmdsz || co + cmdsz > mo->size) { + break; + } + if (cmd == 0x19 && cmdsz >= 72 && mo->nsegs < R_ARRAY_SIZE (mo->segs)) { + const ut8 *sp = mo->file + co + 8; + MachSeg *s = &mo->segs[mo->nsegs++]; + memcpy (s->name, sp, 16); + s->name[16] = 0; + s->vmaddr = r_read_le64 (sp + 16); + s->vmsize = r_read_le64 (sp + 24); + s->fileoff = r_read_le64 (sp + 32); + s->filesize = r_read_le64 (sp + 40); + s->maxprot = r_read_le32 (sp + 48); + if (!mo->vm_base || s->vmaddr < mo->vm_base) { + mo->vm_base = s->vmaddr; + } + } + co += cmdsz; + } + return mo->nsegs > 0; +} + +static void macho_free(MachO *mo) { + R_FREE (mo->file); +} + +static const ut8 *macho_ptr_at(void *user, ut64 va) { + MachO *mo = (MachO *)user; + for (size_t i = 0; i < mo->nsegs; i++) { + const MachSeg *s = &mo->segs[i]; + ut64 vsize = s->vmsize? s->vmsize: s->filesize; + if (va < s->vmaddr || va >= s->vmaddr + vsize) { + continue; + } + ut64 delta = va - s->vmaddr; + if (delta + 8 <= s->filesize && mo->base + s->fileoff + delta + 8 <= mo->size) { + return mo->file + mo->base + s->fileoff + delta; + } + return NULL; + } + return NULL; +} + +static void macho_text_range(MachO *mo, ut64 *lo, ut64 *hi) { + *lo = UT64_MAX; + *hi = 0; + for (size_t i = 0; i < mo->nsegs; i++) { + const MachSeg *s = &mo->segs[i]; + if ((s->maxprot & 4) || !strncmp (s->name, "__TEXT", 6)) { + *lo = R_MIN (*lo, s->vmaddr); + *hi = R_MAX (*hi, s->vmaddr + (s->vmsize? s->vmsize: s->filesize)); + } + } + if (*lo != UT64_MAX && *hi > *lo) { + return; + } + *lo = UT64_MAX; + *hi = 0; + for (size_t i = 0; i < mo->nsegs; i++) { + *lo = R_MIN (*lo, mo->segs[i].vmaddr); + ut64 vsize = mo->segs[i].vmsize? mo->segs[i].vmsize: mo->segs[i].filesize; + *hi = R_MAX (*hi, mo->segs[i].vmaddr + vsize); + } + if (*lo == UT64_MAX) { + *lo = 0; + } +} + +static R2UnityNativeSection *macho_sections(MachO *mo, size_t *count) { + *count = mo->nsegs; + R2UnityNativeSection *out = R_NEWS0 (R2UnityNativeSection, mo->nsegs); + if (!out) { + *count = 0; + return NULL; + } + for (size_t i = 0; i < mo->nsegs; i++) { + const MachSeg *s = &mo->segs[i]; + ut64 vsize = s->vmsize? s->vmsize: s->filesize; + out[i].vaddr = s->vmaddr; + out[i].vsize = vsize; + out[i].size = R_MIN (s->filesize, vsize); + out[i].perm = ((s->maxprot & 1)? R_PERM_R: 0) | ((s->maxprot & 2)? R_PERM_W: 0) | ((s->maxprot & 4)? R_PERM_X: 0); + out[i].is_data = (s->maxprot & 1) && !(s->maxprot & 4); + } + return out; +} + +bool r2unity_find_method_pointers_macho(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + if (!meta || !path || !result) { + return false; + } + MachO mo; + if (!macho_load (path, &mo)) { + return false; + } + ut64 text_lo = 0, text_hi = 0; + macho_text_range (&mo, &text_lo, &text_hi); + R2UnityNativeView view = { + .user = &mo, + .ptr_at = macho_ptr_at, + .ptr_size = 8, + .base_vaddr = mo.vm_base, + .text_lo = text_lo, + .text_hi = text_hi + }; + size_t section_count = 0; + R2UnityNativeSection *sections = macho_sections (&mo, §ion_count); + bool ok = r2unity_native_run_view (meta, &view, sections, section_count, options, result); + R_FREE (sections); + macho_free (&mo); + return ok; +} diff --git a/src/lib/bin/native.c b/src/lib/bin/native.c new file mode 100644 index 0000000..27ed2b0 --- /dev/null +++ b/src/lib/bin/native.c @@ -0,0 +1,875 @@ +/* r2unity - MIT - Copyright 2025-2026 - pancake */ + +#define R_LOG_ORIGIN "r2unity.native" + +#include "native_internal.h" +#include +#include +#include + +static const char *const code_registration_names[] = { + "g_CodeRegistration", + "s_CodeRegistration", + "CodeRegistration", + "g_Il2CppCodeRegistration", + "Il2CppCodeRegistration", + NULL +}; + +static const char *const metadata_registration_names[] = { + "g_MetadataRegistration", + "s_MetadataRegistration", + "MetadataRegistration", + "g_Il2CppMetadataRegistration", + "Il2CppMetadataRegistration", + NULL +}; + +static const ut8 *view_ptr_at(R2UnityNativeView *view, ut64 va, ut64 *actual_va) { + if (!view || !view->ptr_at) { + return NULL; + } + const ut8 *p = view->ptr_at (view->user, va); + if (p) { + if (actual_va) { + *actual_va = va; + } + return p; + } + if (view->base_vaddr != UT64_MAX && view->base_vaddr && va < view->base_vaddr) { + ut64 rebased = view->base_vaddr + va; + p = view->ptr_at (view->user, rebased); + if (p) { + if (actual_va) { + *actual_va = rebased; + } + return p; + } + } + return NULL; +} + +static bool read_u32_at(R2UnityNativeView *view, ut64 va, ut32 *out) { + const ut8 *p = view_ptr_at (view, va, NULL); + if (!p) { + return false; + } + *out = r_read_le32 (p); + return true; +} + +static bool read_ptr_at(R2UnityNativeView *view, ut64 va, ut64 *out) { + const ut8 *p = view_ptr_at (view, va, NULL); + if (!p) { + return false; + } + *out = view->ptr_size == 8? r_read_le64 (p): (ut64)r_read_le32 (p); + return true; +} + +static bool read_count_ptr_pair(R2UnityNativeView *view, ut64 va, ut32 *count, ut64 *ptr) { + if (!read_u32_at (view, va, count)) { + return false; + } + return read_ptr_at (view, va + (view->ptr_size == 8? 8: 4), ptr); +} + +static ut64 data_va_from_raw(R2UnityNativeView *view, ut64 raw) { + if (!raw) { + return 0; + } + ut64 actual = 0; + if (view_ptr_at (view, raw, &actual)) { + return actual; + } + return 0; +} + +static ut64 code_va_from_raw(R2UnityNativeView *view, ut64 raw) { + if (!raw) { + return 0; + } + if (raw >= view->text_lo && raw < view->text_hi) { + return raw; + } + if (view->base_vaddr != UT64_MAX && view->base_vaddr && raw < view->base_vaddr) { + ut64 rebased = view->base_vaddr + raw; + if (rebased >= view->text_lo && rebased < view->text_hi) { + return rebased; + } + } + return 0; +} + +static char *read_cstr_at(R2UnityNativeView *view, ut64 va) { + if (!va) { + return NULL; + } + RStrBuf *sb = r_strbuf_new (""); + if (!sb) { + return NULL; + } + for (size_t i = 0; i < 512; i++) { + const ut8 *p = view_ptr_at (view, va + i, NULL); + if (!p) { + r_strbuf_free (sb); + return NULL; + } + if (!*p) { + return r_strbuf_drain (sb); + } + if (*p < 0x20 || *p > 0x7e) { + r_strbuf_free (sb); + return NULL; + } + r_strbuf_appendf (sb, "%c", *p); + } + r_strbuf_free (sb); + return NULL; +} + +static bool image_name_eq(const char *module_name, const char *image_name) { + if (!module_name || !image_name) { + return false; + } + return !strcmp (r_file_basename (module_name), r_file_basename (image_name)); +} + +static int image_index_by_name(R2UnityMetadata *meta, const Il2CppImageDefinition *images, size_t image_count, const char *module_name) { + if (!module_name || !*module_name) { + return -1; + } + for (size_t i = 0; i < image_count; i++) { + char *name = r2unity_get_string (meta, images[i].nameIndex); + bool match = image_name_eq (module_name, name); + free (name); + if (match) { + return (int)i; + } + } + return -1; +} + +static size_t image_method_count(const Il2CppImageDefinition *image, const Il2CppTypeDefinition *types, size_t type_count) { + if (!image || !types || image->typeStart < 0) { + return 0; + } + size_t start = (size_t)image->typeStart; + size_t end = R_MIN (type_count, start + image->typeCount); + size_t count = 0; + for (size_t i = start; i < end; i++) { + count += types[i].method_count; + } + return count; +} + +static size_t copy_image_method_table(R2UnityNativeView *view, const Il2CppImageDefinition *image, const Il2CppTypeDefinition *types, size_t type_count, size_t method_count, ut64 table_va, ut32 table_count, ut64 *out_ptrs, size_t *out_seen) { + if (out_seen) { + *out_seen = 0; + } + if (!image || !types || !out_ptrs || image->typeStart < 0 || !table_va) { + return 0; + } + size_t local = 0; + size_t copied = 0; + size_t seen = 0; + size_t start = (size_t)image->typeStart; + size_t end = R_MIN (type_count, start + image->typeCount); + for (size_t ti = start; ti < end && local < table_count; ti++) { + const Il2CppTypeDefinition *td = &types[ti]; + if (td->methodStart < 0) { + local += td->method_count; + continue; + } + for (size_t k = 0; k < td->method_count && local < table_count; k++, local++) { + size_t mi = (size_t)td->methodStart + k; + if (mi >= method_count) { + continue; + } + ut64 raw = 0; + if (!read_ptr_at (view, table_va + (ut64)local * view->ptr_size, &raw)) { + continue; + } + if (raw) { + seen++; + } + ut64 addr = code_va_from_raw (view, raw); + if (addr) { + out_ptrs[mi] = addr; + copied++; + } + } + } + if (out_seen) { + *out_seen = seen; + } + return copied; +} + +static size_t copy_global_method_table(R2UnityNativeView *view, size_t method_count, ut64 table_va, ut32 table_count, ut64 *out_ptrs, size_t *out_seen) { + if (out_seen) { + *out_seen = 0; + } + if (!table_va || !table_count || !method_count || !out_ptrs) { + return 0; + } + size_t max = R_MIN ((size_t)table_count, method_count); + size_t copied = 0; + size_t seen = 0; + for (size_t i = 0; i < max; i++) { + ut64 raw = 0; + if (!read_ptr_at (view, table_va + (ut64)i * view->ptr_size, &raw)) { + break; + } + if (raw) { + seen++; + } + ut64 addr = code_va_from_raw (view, raw); + if (addr) { + out_ptrs[i] = addr; + copied++; + } + } + if (out_seen) { + *out_seen = seen; + } + return copied; +} + +static bool parse_codegen_modules(R2UnityMetadata *meta, R2UnityNativeView *view, ut64 code_registration_va, ut64 *out_ptrs, R2UnityNativeResult *result) { + size_t image_count = 0; + Il2CppImageDefinition *images = r2unity_get_images (meta, &image_count); + size_t type_count = 0; + Il2CppTypeDefinition *types = r2unity_get_type_definitions (meta, &type_count); + size_t method_count = (size_t)r2unity_metadata_section_count (meta, R2U_SEC_METHODS); + if (!images || !image_count || !types || !type_count || !method_count) { + R_FREE (images); + R_FREE (types); + return false; + } + bool found = false; + ut64 found_modules_va = 0; + size_t found_copied = 0; + size_t pair_span = view->ptr_size == 8? 16: 8; + for (ut64 off = 0; off + pair_span <= 0x280 && !found; off += 4) { + ut32 count = 0; + ut64 raw_modules = 0; + if (!read_count_ptr_pair (view, code_registration_va + off, &count, &raw_modules) || count != image_count || !raw_modules) { + continue; + } + ut64 modules_va = data_va_from_raw (view, raw_modules); + if (!modules_va) { + continue; + } + ut64 *candidate = R_NEWS (ut64, method_count); + bool *used_images = R_NEWS0 (bool, image_count); + if (!candidate || !used_images) { + R_FREE (candidate); + R_FREE (used_images); + break; + } + size_t exact_matches = 0; + size_t copied = 0; + for (int pass = 0; pass < 2; pass++) { + for (size_t i = 0; i < image_count; i++) { + ut64 raw_module = 0; + if (!read_ptr_at (view, modules_va + (ut64)i * view->ptr_size, &raw_module)) { + continue; + } + ut64 module_va = data_va_from_raw (view, raw_module); + if (!module_va) { + continue; + } + ut64 raw_name = 0; + ut32 mcount = 0; + ut64 raw_table = 0; + if (!read_ptr_at (view, module_va, &raw_name) + || !read_count_ptr_pair (view, module_va + view->ptr_size, &mcount, &raw_table)) { + continue; + } + char *module_name = read_cstr_at (view, data_va_from_raw (view, raw_name)); + int image_idx = image_index_by_name (meta, images, image_count, module_name); + if (pass == 0) { + if (image_idx < 0) { + free (module_name); + continue; + } + exact_matches++; + } else if (image_idx >= 0 || exact_matches > 0) { + free (module_name); + continue; + } else { + image_idx = (i < image_count)? (int)i: -1; + } + free (module_name); + if (image_idx < 0 || (size_t)image_idx >= image_count || used_images[image_idx]) { + continue; + } + size_t expected = image_method_count (&images[image_idx], types, type_count); + if (!expected || !mcount || mcount > method_count * 2 || (expected > 8 && mcount < expected / 2)) { + continue; + } + ut64 table_va = data_va_from_raw (view, raw_table); + if (!table_va) { + continue; + } + size_t seen = 0; + size_t n = copy_image_method_table (view, &images[image_idx], types, type_count, method_count, table_va, mcount, candidate, &seen); + if (n || seen) { + used_images[image_idx] = true; + copied += n; + } + } + } + if (copied > 0) { + memcpy (out_ptrs, candidate, method_count * sizeof (ut64)); + found = true; + found_modules_va = modules_va; + found_copied = copied; + } + R_FREE (candidate); + R_FREE (used_images); + } + if (found) { + result->code_gen_modules_va = found_modules_va; + result->method_pointers_va = found_modules_va; + R_LOG_DEBUG ("CodeRegistration codeGenModules=0x%" PFMT64x " copied=%zu", found_modules_va, found_copied); + } + R_FREE (images); + R_FREE (types); + return found; +} + +static bool parse_global_method_pointers(R2UnityMetadata *meta, R2UnityNativeView *view, ut64 code_registration_va, ut64 *out_ptrs, R2UnityNativeResult *result) { + size_t method_count = (size_t)r2unity_metadata_section_count (meta, R2U_SEC_METHODS); + if (!method_count) { + return false; + } + size_t pair_span = view->ptr_size == 8? 16: 8; + for (ut64 off = 0; off + pair_span <= 0x100; off += 4) { + ut32 count = 0; + ut64 raw_table = 0; + if (!read_count_ptr_pair (view, code_registration_va + off, &count, &raw_table)) { + continue; + } + if (!count || count > method_count * 2 || (method_count > 8 && count < method_count / 2)) { + continue; + } + ut64 table_va = data_va_from_raw (view, raw_table); + if (!table_va) { + continue; + } + ut64 *candidate = R_NEWS (ut64, method_count); + if (!candidate) { + return false; + } + size_t seen = 0; + size_t copied = copy_global_method_table (view, method_count, table_va, count, candidate, &seen); + size_t sample = R_MIN ((size_t)128, R_MIN ((size_t)count, method_count)); + size_t min_good = sample < 8? 1: 8; + if (copied >= min_good || (copied && seen >= min_good)) { + memcpy (out_ptrs, candidate, method_count * sizeof (ut64)); + result->method_pointers_va = table_va; + R_FREE (candidate); + R_LOG_DEBUG ("CodeRegistration methodPointers=0x%" PFMT64x " count=%u copied=%zu", table_va, count, copied); + return true; + } + R_FREE (candidate); + } + return false; +} + +bool r2unity_native_parse_code_registration(R2UnityMetadata *meta, R2UnityNativeView *view, ut64 code_registration_va, R2UnityNativeSource source, R2UnityNativeResult *result) { + R_RETURN_VAL_IF_FAIL (meta && view && result && code_registration_va, false); + size_t method_count = (size_t)r2unity_metadata_section_count (meta, R2U_SEC_METHODS); + if (!method_count) { + return false; + } + ut64 actual_code_va = 0; + if (!view_ptr_at (view, code_registration_va, &actual_code_va)) { + return false; + } + ut64 *method_ptrs = R_NEWS (ut64, method_count); + if (!method_ptrs) { + return false; + } + bool ok = parse_codegen_modules (meta, view, actual_code_va, method_ptrs, result); + if (!ok) { + ok = parse_global_method_pointers (meta, view, actual_code_va, method_ptrs, result); + } + if (!ok) { + R_FREE (method_ptrs); + return false; + } + R_FREE (result->method_ptrs); + result->method_ptrs = method_ptrs; + result->has_method_ptrs = true; + result->source = source; + result->code_registration_va = actual_code_va; + result->ptr_size = view->ptr_size; + return true; +} + +static bool symbol_matches(const char *name, const char *alias) { + if (!name || !alias) { + return false; + } + if (!strcmp (name, alias)) { + return true; + } + if (*name == '_' && !strcmp (name + 1, alias)) { + return true; + } + if (*alias == '_' && !strcmp (name, alias + 1)) { + return true; + } + return false; +} + +static bool symbol_matches_any(const char *name, const char *const *names) { + if (!name || !names) { + return false; + } + for (size_t i = 0; names[i]; i++) { + if (symbol_matches (name, names[i])) { + return true; + } + } + return false; +} + +ut64 r2unity_native_resolve_override(const R2UnityNativeOptions *options, const char *const *names) { + if (!options || !names) { + return 0; + } + for (size_t i = 0; i < options->symbols_count; i++) { + const char *name = options->symbols[i].name; + for (size_t j = 0; names[j]; j++) { + if (symbol_matches (name, names[j])) { + return options->symbols[i].va; + } + } + } + return 0; +} + +bool r2unity_native_result_has_ptrs(const R2UnityNativeResult *result) { + return result && result->method_ptrs && result->has_method_ptrs; +} + +void r2unity_native_take_heuristic_result(R2UnityNativeResult *result, ut64 *method_ptrs, int ptr_size) { + if (!result || !method_ptrs) { + return; + } + R_FREE (result->method_ptrs); + result->method_ptrs = method_ptrs; + result->has_method_ptrs = true; + result->source = R2U_NATIVE_SOURCE_HEURISTIC; + result->ptr_size = ptr_size; +} + +typedef struct { + RBinFile *bf; + RVecRBinSection *sections; + ut8 scratch[8]; +} R2UnityRBinView; + +static bool rbin_section_contains(const RBinSection *s, ut64 va) { + ut64 vsize = s->vsize? s->vsize: s->size; + return vsize && va >= s->vaddr && va < s->vaddr + vsize; +} + +static bool rbin_va_to_paddr(R2UnityRBinView *rv, ut64 va, ut64 *paddr) { + if (rv->sections) { + RBinSection *s; + R_VEC_FOREACH (rv->sections, s) { + if (!rbin_section_contains (s, va)) { + continue; + } + ut64 delta = va - s->vaddr; + if (delta < s->size) { + *paddr = s->paddr + delta; + return true; + } + } + } + ut64 sz = r_buf_size (rv->bf->buf); + if (va < sz) { + *paddr = va; + return true; + } + return false; +} + +static const ut8 *rbin_ptr_at(void *user, ut64 va) { + R2UnityRBinView *rv = (R2UnityRBinView *)user; + ut64 paddr = 0; + if (!rv || !rv->bf || !rv->bf->buf || !rbin_va_to_paddr (rv, va, &paddr)) { + return NULL; + } + if (r_buf_read_at (rv->bf->buf, paddr, rv->scratch, sizeof (rv->scratch)) != (st64)sizeof (rv->scratch)) { + return NULL; + } + return rv->scratch; +} + +static int rbin_ptr_size(RBin *bin) { + RBinInfo *info = r_bin_get_info (bin); + return (info && info->bits == 32)? 4: 8; +} + +static ut64 rbin_base_vaddr(RBinFile *bf, RVecRBinSection *sections) { + ut64 base = bf? r_bin_file_get_baddr (bf): 0; + if (base) { + return base; + } + ut64 lo = UT64_MAX; + if (sections) { + RBinSection *s; + R_VEC_FOREACH (sections, s) { + if (s->vaddr && s->vaddr < lo) { + lo = s->vaddr; + } + } + } + return lo == UT64_MAX? 0: lo; +} + +static void rbin_text_range(RVecRBinSection *sections, ut64 *text_lo, ut64 *text_hi) { + ut64 lo = UT64_MAX; + ut64 hi = 0; + if (sections) { + RBinSection *s; + R_VEC_FOREACH (sections, s) { + ut64 vsize = s->vsize? s->vsize: s->size; + if (!vsize || !(s->perm & R_PERM_X)) { + continue; + } + if (s->vaddr < lo) { + lo = s->vaddr; + } + if (s->vaddr + vsize > hi) { + hi = s->vaddr + vsize; + } + } + if (lo == UT64_MAX || hi <= lo) { + R_VEC_FOREACH (sections, s) { + ut64 vsize = s->vsize? s->vsize: s->size; + if (!vsize) { + continue; + } + if (s->vaddr < lo) { + lo = s->vaddr; + } + if (s->vaddr + vsize > hi) { + hi = s->vaddr + vsize; + } + } + } + } + *text_lo = lo == UT64_MAX? 0: lo; + *text_hi = hi; +} + +static R2UnityNativeSection *rbin_sections(RVecRBinSection *sections, size_t *out_count) { + if (out_count) { + *out_count = 0; + } + if (!sections) { + return NULL; + } + size_t count = 0; + RBinSection *s; + R_VEC_FOREACH (sections, s) { + count++; + } + R2UnityNativeSection *out = R_NEWS0 (R2UnityNativeSection, count); + if (!out) { + return NULL; + } + size_t i = 0; + R_VEC_FOREACH (sections, s) { + ut64 vsize = s->vsize? s->vsize: s->size; + out[i].vaddr = s->vaddr; + out[i].vsize = vsize; + out[i].size = R_MIN (s->size, vsize); + out[i].perm = s->perm; + out[i].is_data = s->is_data; + i++; + } + if (out_count) { + *out_count = count; + } + return out; +} + +static bool bin_name_matches(RBinName *name, const char *const *aliases) { + return name && (symbol_matches_any (name->name, aliases) + || symbol_matches_any (name->oname, aliases) + || symbol_matches_any (name->fname, aliases)); +} + +static ut64 rbin_find_symbol(RBin *bin, RBinFile *bf, const char *const *aliases) { + RVecRBinSymbol *symbols = bf? r_bin_file_get_symbols_vec (bf): r_bin_get_symbols_vec (bin); + if (!symbols) { + return 0; + } + RBinSymbol *sym; + R_VEC_FOREACH (symbols, sym) { + if (!bin_name_matches (sym->name, aliases)) { + continue; + } + ut64 va = sym->vaddr? sym->vaddr: r_bin_file_get_vaddr (bf, sym->paddr, sym->vaddr); + if (va) { + return va; + } + } + return 0; +} + +static bool rbin_probe_table(R2UnityNativeView *view, ut64 arrptr, ut32 count, ut32 min_seen, ut32 min_good, size_t method_count, ut64 *candidate) { + ut32 sample = R_MIN ((ut32)128, count); + ut32 good = 0; + ut32 seen = 0; + for (ut32 k = 0; k < sample; k++) { + ut64 raw = 0; + if (!read_ptr_at (view, arrptr + (ut64)k * view->ptr_size, &raw)) { + return false; + } + if (raw) { + seen++; + } + if (code_va_from_raw (view, raw)) { + good++; + } + } + if (seen < min_seen || good < min_good) { + return false; + } + memset (candidate, 0, method_count * sizeof (ut64)); + size_t tocopy = R_MIN ((size_t)count, method_count); + size_t copied = 0; + for (size_t i = 0; i < tocopy; i++) { + ut64 raw = 0; + if (!read_ptr_at (view, arrptr + (ut64)i * view->ptr_size, &raw)) { + break; + } + ut64 addr = code_va_from_raw (view, raw); + if (addr) { + candidate[i] = addr; + copied++; + } + } + return copied >= R_MIN ((size_t)8, tocopy); +} + +bool r2unity_native_scan_sections(R2UnityMetadata *meta, R2UnityNativeView *view, const R2UnityNativeSection *sections, size_t section_count, R2UnityNativeResult *result) { + size_t method_count = (size_t)r2unity_metadata_section_count (meta, R2U_SEC_METHODS); + if (!method_count || !sections || !section_count) { + return false; + } + ut64 *candidate = R_NEWS (ut64, method_count); + if (!candidate) { + return false; + } + bool found = false; + ut32 expected = (ut32)R_MAX ((ut64)64, (ut64)method_count); + for (int pass = 0; pass < 2 && !found; pass++) { + const ut32 min_count = pass == 0? 32: 64; + const ut64 min_secsize = pass == 0? (ut64)(16 + view->ptr_size * 2): (ut64)(8 + view->ptr_size); + for (size_t i = 0; i < section_count && !found; i++) { + const R2UnityNativeSection *s = §ions[i]; + if ((s->perm & R_PERM_X) || (!s->is_data && !(s->perm & R_PERM_R)) || s->size < min_secsize) { + continue; + } + for (ut64 off = 0; off + min_secsize <= s->size; off += 4) { + ut32 cnt = 0; + if (!read_u32_at (view, s->vaddr + off, &cnt) || cnt < min_count) { + continue; + } + if (pass == 1 && expected && (cnt > expected * 2 || cnt < expected / 2)) { + continue; + } + ut64 raw_table = 0; + if (!read_ptr_at (view, s->vaddr + off + (view->ptr_size == 8? 8: 4), &raw_table)) { + continue; + } + ut64 table_va = data_va_from_raw (view, raw_table); + if (!table_va) { + continue; + } + ut32 sample = R_MIN ((ut32)128, cnt); + ut32 min_seen = pass == 0? 8: sample / 2; + ut32 min_good = pass == 0? 8: (sample * 3) / 4; + if (rbin_probe_table (view, table_va, cnt, min_seen, min_good, method_count, candidate)) { + result->method_pointers_va = table_va; + found = true; + break; + } + } + } + } + if (found) { + r2unity_native_take_heuristic_result (result, candidate, view->ptr_size); + return true; + } + R_FREE (candidate); + return false; +} + +bool r2unity_native_run_view(R2UnityMetadata *meta, R2UnityNativeView *view, const R2UnityNativeSection *sections, size_t section_count, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + R_RETURN_VAL_IF_FAIL (meta && view && result, false); + memset (result, 0, sizeof (*result)); + result->ptr_size = view->ptr_size; + ut64 code_registration_va = options? options->code_registration_va: 0; + ut64 metadata_registration_va = options? options->metadata_registration_va: 0; + if (!code_registration_va) { + code_registration_va = r2unity_native_resolve_override (options, code_registration_names); + } + if (!metadata_registration_va) { + metadata_registration_va = r2unity_native_resolve_override (options, metadata_registration_names); + } + result->code_registration_va = code_registration_va; + result->metadata_registration_va = metadata_registration_va; + if (!(options && options->force_heuristic) && code_registration_va) { + if (r2unity_native_parse_code_registration (meta, view, code_registration_va, R2U_NATIVE_SOURCE_OVERRIDE, result)) { + return true; + } + } + return r2unity_native_scan_sections (meta, view, sections, section_count, result); +} + +R_API const char *r2unity_native_source_name(R2UnityNativeSource source) { + switch (source) { + case R2U_NATIVE_SOURCE_SYMBOL: return "symbol"; + case R2U_NATIVE_SOURCE_OVERRIDE: return "override"; + case R2U_NATIVE_SOURCE_HEURISTIC: return "heuristic"; + default: return "none"; + } +} + +R_API const char *const *r2unity_native_code_registration_names(void) { + return code_registration_names; +} + +R_API const char *const *r2unity_native_metadata_registration_names(void) { + return metadata_registration_names; +} + +R_API void r2unity_native_result_fini(R2UnityNativeResult *result) { + if (!result) { + return; + } + R_FREE (result->method_ptrs); + memset (result, 0, sizeof (*result)); +} + +R_API bool r2unity_find_method_pointers_rbin(R2UnityMetadata *meta, RBin *bin, RBinFile *bf, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + R_RETURN_VAL_IF_FAIL (meta && bin && result, false); + memset (result, 0, sizeof (*result)); + if (!bf) { + bf = r_bin_cur (bin); + } + if (!bf || !bf->buf) { + return false; + } + (void)r_bin_patch_relocs (bf); + RVecRBinSection *sections = r_bin_file_get_sections_vec (bf); + int ptr_size = rbin_ptr_size (bin); + ut64 text_lo = 0; + ut64 text_hi = 0; + rbin_text_range (sections, &text_lo, &text_hi); + R2UnityRBinView rv = { + .bf = bf, + .sections = sections + }; + R2UnityNativeView view = { + .user = &rv, + .ptr_at = rbin_ptr_at, + .ptr_size = ptr_size, + .base_vaddr = rbin_base_vaddr (bf, sections), + .text_lo = text_lo, + .text_hi = text_hi + }; + result->ptr_size = ptr_size; + ut64 code_registration_va = options? options->code_registration_va: 0; + ut64 metadata_registration_va = options? options->metadata_registration_va: 0; + R2UnityNativeSource source = R2U_NATIVE_SOURCE_OVERRIDE; + if (!code_registration_va) { + code_registration_va = r2unity_native_resolve_override (options, code_registration_names); + } + if (!metadata_registration_va) { + metadata_registration_va = r2unity_native_resolve_override (options, metadata_registration_names); + } + if (!code_registration_va) { + code_registration_va = rbin_find_symbol (bin, bf, code_registration_names); + source = R2U_NATIVE_SOURCE_SYMBOL; + } + if (!metadata_registration_va) { + metadata_registration_va = rbin_find_symbol (bin, bf, metadata_registration_names); + } + result->code_registration_va = code_registration_va; + result->metadata_registration_va = metadata_registration_va; + if (!(options && options->force_heuristic) && code_registration_va) { + if (r2unity_native_parse_code_registration (meta, &view, code_registration_va, source, result)) { + return true; + } + } + size_t section_count = 0; + R2UnityNativeSection *native_sections = rbin_sections (sections, §ion_count); + bool ok = r2unity_native_scan_sections (meta, &view, native_sections, section_count, result); + R_FREE (native_sections); + return ok; +} + +R_API bool r2unity_find_method_pointers(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + R_RETURN_VAL_IF_FAIL (meta && path && result, false); + memset (result, 0, sizeof (*result)); + RBin *bin = r_bin_new (); + bool ok = false; + if (bin) { + RIO *io = r_io_new (); + if (io) { + r_libstore_load (bin->libstore); + r_io_bind (io, &bin->iob); + } + RBinFileOptions opt; + r_bin_file_options_init (&opt, -1, 0, 0, 0); + if (io) { + ok = r_bin_open (bin, path, &opt); + if (ok) { + ok = r2unity_find_method_pointers_rbin (meta, bin, r_bin_cur (bin), options, result); + } + } + r_io_free (io); + r_bin_free (bin); + } + if (ok) { + return true; + } + return r2unity_find_method_pointers_simple (meta, path, options, result); +} + +R_API bool r2unity_find_method_pointers_simple(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + R_RETURN_VAL_IF_FAIL (meta && path && result, false); + memset (result, 0, sizeof (*result)); + if (path && *path) { + ut8 magic[4] = { 0 }; + FILE *fp = fopen (path, "rb"); + if (fp) { + (void)fread (magic, 1, sizeof (magic), fp); + fclose (fp); + } + if (!memcmp (magic, "\x7f""ELF", 4)) { + return r2unity_find_method_pointers_elf (meta, path, options, result); + } + ut32 m = r_read_le32 (magic); + if (m == 0xfeedfacf || m == 0xcffaedfe || m == 0xcafebabe || m == 0xbebafeca) { + return r2unity_find_method_pointers_macho (meta, path, options, result); + } + if (magic[0] == 'M' && magic[1] == 'Z') { + return r2unity_find_method_pointers_pe (meta, path, options, result); + } + } + return false; +} diff --git a/src/lib/bin/native_internal.h b/src/lib/bin/native_internal.h new file mode 100644 index 0000000..b241b36 --- /dev/null +++ b/src/lib/bin/native_internal.h @@ -0,0 +1,37 @@ +/* r2unity - MIT - Copyright 2025-2026 - pancake */ + +#ifndef R2UNITY_NATIVE_INTERNAL_H +#define R2UNITY_NATIVE_INTERNAL_H + +#include "../lib.h" + +typedef const ut8 *(*R2UnityNativePtrAt)(void *user, ut64 va); + +typedef struct { + void *user; + R2UnityNativePtrAt ptr_at; + int ptr_size; + ut64 base_vaddr; + ut64 text_lo; + ut64 text_hi; +} R2UnityNativeView; + +typedef struct { + ut64 vaddr; + ut64 vsize; + ut64 size; + ut32 perm; + bool is_data; +} R2UnityNativeSection; + +bool r2unity_native_parse_code_registration(R2UnityMetadata *meta, R2UnityNativeView *view, ut64 code_registration_va, R2UnityNativeSource source, R2UnityNativeResult *result); +ut64 r2unity_native_resolve_override(const R2UnityNativeOptions *options, const char *const *names); +bool r2unity_native_result_has_ptrs(const R2UnityNativeResult *result); +void r2unity_native_take_heuristic_result(R2UnityNativeResult *result, ut64 *method_ptrs, int ptr_size); +bool r2unity_native_scan_sections(R2UnityMetadata *meta, R2UnityNativeView *view, const R2UnityNativeSection *sections, size_t section_count, R2UnityNativeResult *result); +bool r2unity_native_run_view(R2UnityMetadata *meta, R2UnityNativeView *view, const R2UnityNativeSection *sections, size_t section_count, const R2UnityNativeOptions *options, R2UnityNativeResult *result); +bool r2unity_find_method_pointers_elf(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result); +bool r2unity_find_method_pointers_macho(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result); +bool r2unity_find_method_pointers_pe(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result); + +#endif diff --git a/src/lib/bin/pe.c b/src/lib/bin/pe.c new file mode 100644 index 0000000..66162f4 --- /dev/null +++ b/src/lib/bin/pe.c @@ -0,0 +1,182 @@ +/* r2unity - MIT - Copyright 2025-2026 - pancake */ + +#define R_LOG_ORIGIN "r2unity.pe" + +#include "native_internal.h" +#include + +typedef struct { + char name[9]; + ut64 vaddr; + ut64 vsize; + ut64 fileoff; + ut64 filesize; + ut32 chars; +} PeSec; + +typedef struct { + ut8 *file; + ut64 size; + bool is64; + ut64 image_base; + PeSec secs[128]; + size_t nsecs; +} PeImg; + +static bool pe_load(const char *path, PeImg *pe) { + memset (pe, 0, sizeof (*pe)); + size_t size = 0; + pe->file = (ut8 *)r_file_slurp (path, &size); + if (!pe->file || size < 0x40) { + R_FREE (pe->file); + return false; + } + pe->size = size; + if (pe->file[0] != 'M' || pe->file[1] != 'Z') { + R_FREE (pe->file); + return false; + } + ut32 e_lfanew = r_read_le32 (pe->file + 0x3c); + if ((ut64)e_lfanew + 24 > pe->size) { + R_FREE (pe->file); + return false; + } + const ut8 *nt = pe->file + e_lfanew; + if (r_read_le32 (nt) != 0x4550) { + R_FREE (pe->file); + return false; + } + ut16 nsec = r_read_le16 (nt + 6); + ut16 optsz = r_read_le16 (nt + 20); + ut64 optoff = (ut64)e_lfanew + 24; + if (optoff + optsz > pe->size) { + R_FREE (pe->file); + return false; + } + const ut8 *opt = pe->file + optoff; + ut16 magic = r_read_le16 (opt); + if (magic == 0x20b) { + pe->is64 = true; + pe->image_base = r_read_le64 (opt + 24); + } else if (magic == 0x10b) { + pe->image_base = r_read_le32 (opt + 28); + } else { + R_FREE (pe->file); + return false; + } + ut64 shoff = optoff + optsz; + for (ut16 i = 0; i < nsec && pe->nsecs < R_ARRAY_SIZE (pe->secs); i++) { + ut64 off = shoff + (ut64)i * 40; + if (off + 40 > pe->size) { + break; + } + const ut8 *sh = pe->file + off; + PeSec *s = &pe->secs[pe->nsecs++]; + memcpy (s->name, sh, 8); + s->name[8] = 0; + s->vsize = r_read_le32 (sh + 8); + s->vaddr = pe->image_base + r_read_le32 (sh + 12); + s->filesize = r_read_le32 (sh + 16); + s->fileoff = r_read_le32 (sh + 20); + s->chars = r_read_le32 (sh + 36); + } + return pe->nsecs > 0; +} + +static void pe_free(PeImg *pe) { + R_FREE (pe->file); +} + +static const ut8 *pe_ptr_at(void *user, ut64 va) { + PeImg *pe = (PeImg *)user; + for (size_t i = 0; i < pe->nsecs; i++) { + const PeSec *s = &pe->secs[i]; + ut64 vsize = s->vsize? s->vsize: s->filesize; + if (va < s->vaddr || va >= s->vaddr + vsize) { + continue; + } + ut64 delta = va - s->vaddr; + if (delta + 8 <= s->filesize && s->fileoff + delta + 8 <= pe->size) { + return pe->file + s->fileoff + delta; + } + return NULL; + } + return NULL; +} + +static bool pe_is_data_section(const PeSec *s) { + if (s->chars & 0x20000000) { + return false; + } + return !strncmp (s->name, ".data", 5) || !strncmp (s->name, ".rdata", 6) || (s->chars & 0x40); +} + +static void pe_text_range(PeImg *pe, ut64 *lo, ut64 *hi) { + *lo = UT64_MAX; + *hi = 0; + for (size_t i = 0; i < pe->nsecs; i++) { + const PeSec *s = &pe->secs[i]; + if ((s->chars & 0x20000000) || !strncmp (s->name, ".text", 5)) { + *lo = R_MIN (*lo, s->vaddr); + *hi = R_MAX (*hi, s->vaddr + (s->vsize? s->vsize: s->filesize)); + } + } + if (*lo != UT64_MAX && *hi > *lo) { + return; + } + *lo = UT64_MAX; + *hi = 0; + for (size_t i = 0; i < pe->nsecs; i++) { + *lo = R_MIN (*lo, pe->secs[i].vaddr); + ut64 vsize = pe->secs[i].vsize? pe->secs[i].vsize: pe->secs[i].filesize; + *hi = R_MAX (*hi, pe->secs[i].vaddr + vsize); + } + if (*lo == UT64_MAX) { + *lo = pe->image_base; + } +} + +static R2UnityNativeSection *pe_sections(PeImg *pe, size_t *count) { + *count = pe->nsecs; + R2UnityNativeSection *out = R_NEWS0 (R2UnityNativeSection, pe->nsecs); + if (!out) { + *count = 0; + return NULL; + } + for (size_t i = 0; i < pe->nsecs; i++) { + const PeSec *s = &pe->secs[i]; + ut64 vsize = s->vsize? s->vsize: s->filesize; + out[i].vaddr = s->vaddr; + out[i].vsize = vsize; + out[i].size = R_MIN (s->filesize, vsize); + out[i].perm = ((s->chars & 0x40000000)? R_PERM_R: 0) | ((s->chars & 0x80000000)? R_PERM_W: 0) | ((s->chars & 0x20000000)? R_PERM_X: 0); + out[i].is_data = pe_is_data_section (s); + } + return out; +} + +bool r2unity_find_method_pointers_pe(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result) { + if (!meta || !path || !result) { + return false; + } + PeImg pe; + if (!pe_load (path, &pe)) { + return false; + } + ut64 text_lo = 0, text_hi = 0; + pe_text_range (&pe, &text_lo, &text_hi); + R2UnityNativeView view = { + .user = &pe, + .ptr_at = pe_ptr_at, + .ptr_size = pe.is64? 8: 4, + .base_vaddr = pe.image_base, + .text_lo = text_lo, + .text_hi = text_hi + }; + size_t section_count = 0; + R2UnityNativeSection *sections = pe_sections (&pe, §ion_count); + bool ok = r2unity_native_run_view (meta, &view, sections, section_count, options, result); + R_FREE (sections); + pe_free (&pe); + return ok; +} diff --git a/src/lib/elf.c b/src/lib/elf.c deleted file mode 100644 index eccb8e4..0000000 --- a/src/lib/elf.c +++ /dev/null @@ -1,389 +0,0 @@ -/* r2unity - MIT - Copyright 2025-2026 - pancake */ - -// Fast ELF parser for r2unity -#define R_LOG_ORIGIN "r2unity.elf" -#include "lib.h" -#include - -static inline void elf_write_word(ut8 *loc, ut64 val, bool is64) { - if (is64) { - r_write_le64 (loc, val); - } else { - r_write_le32 (loc, (ut32)val); - } -} - -typedef struct { - ut64 vaddr; - ut64 memsz; - ut64 offset; - ut64 filesz; - ut32 flags; - ut32 p_type; -} ElfSeg; - -typedef struct { - ut8 *file; - ut64 filesize; - int is64; - int le; - ElfSeg segs[128]; - int nsegs; -} ElfImg; - -static bool elf_load(const char *path, ElfImg *e) { - memset (e, 0, sizeof (*e)); - size_t sz = 0; - e->file = (ut8 *)r_file_slurp (path, &sz); - if (!e->file || sz == 0) { - R_FREE (e->file); - return false; - } - e->filesize = (ut64)sz; - const ut8 *p = e->file; - if (! (p[0] == 0x7f && p[1] == 'E' && p[2] == 'L' && p[3] == 'F')) { - R_FREE (e->file); - return false; - } - e->is64 = (p[4] == 2); - e->le = (p[5] == 1); - if (!e->le) { - R_FREE (e->file); - return false; - } - if (e->is64) { - ut64 phoff = r_read_le64 (p + 0x20); - ut16 phentsize = r_read_le16 (p + 0x36); - ut16 phnum = r_read_le16 (p + 0x38); - for (ut16 i = 0; i < phnum && e->nsegs < (int) (sizeof (e->segs) / sizeof (e->segs[0])); i++) { - ut64 off = phoff + (ut64)i * phentsize; - if (off + 56 > e->filesize) { - break; - } - const ut8 *ph = p + off; - ut32 p_type = r_read_le32 (ph + 0); - ut32 p_flags = r_read_le32 (ph + 4); - ut64 p_offset = r_read_le64 (ph + 8); - ut64 p_vaddr = r_read_le64 (ph + 16); - ut64 p_filesz = r_read_le64 (ph + 32); - ut64 p_memsz = r_read_le64 (ph + 40); - if (p_type == 1 || p_type == 2) { - ElfSeg *s = &e->segs[e->nsegs++]; - s->vaddr = p_vaddr; - s->memsz = p_memsz; - s->offset = p_offset; - s->filesz = p_filesz; - s->flags = p_flags; - s->p_type = p_type; - } - } - } else { - ut32 phoff = r_read_le32 (p + 0x1C); - ut16 phentsize = r_read_le16 (p + 0x2A); - ut16 phnum = r_read_le16 (p + 0x2C); - for (ut16 i = 0; i < phnum && e->nsegs < (int) (sizeof (e->segs) / sizeof (e->segs[0])); i++) { - ut64 off = (ut64)phoff + (ut64)i * phentsize; - if (off + 32 > e->filesize) { - break; - } - const ut8 *ph = p + off; - ut32 p_type = r_read_le32 (ph + 0); - ut32 p_offset = r_read_le32 (ph + 4); - ut32 p_vaddr = r_read_le32 (ph + 8); - ut32 p_filesz = r_read_le32 (ph + 16); - ut32 p_memsz = r_read_le32 (ph + 20); - ut32 p_flags = r_read_le32 (ph + 24); - if (p_type == 1 || p_type == 2) { - ElfSeg *s = &e->segs[e->nsegs++]; - s->vaddr = p_vaddr; - s->memsz = p_memsz; - s->offset = p_offset; - s->filesz = p_filesz; - s->flags = p_flags; - s->p_type = p_type; - } - } - } - return true; -} - -static void elf_free(ElfImg *e) { - R_FREE (e->file); -} - -static inline const ut8 *elf_vm_to_ptr(ElfImg *e, ut64 vaddr) { - for (int i = 0; i < e->nsegs; i++) { - const ElfSeg *s = &e->segs[i]; - if (vaddr >= s->vaddr && vaddr < s->vaddr + s->memsz) { - ut64 delta = vaddr - s->vaddr; - if (delta < s->filesz && s->offset + delta < e->filesize) { - return e->file + s->offset + delta; - } - return NULL; - } - } - return NULL; -} - -/* Walk a candidate method pointer table at VA `arrptr`. Entries below text_lo - * are retried with base_vaddr added (RVA→VA fallback). On match, copy up to - * method_count entries into candidates and return true. */ -static bool elf_probe_table(ElfImg *e, ut64 arrptr, ut32 count, int ptrsz, ut64 base_vaddr, ut64 text_lo, ut64 text_hi, ut32 min_seen, ut32 min_good, size_t method_count, ut64 *candidates) { - ut32 sample = R_MIN ((ut32)128, count); - ut32 good = 0, seen = 0; - for (ut32 k = 0; k < sample; k++) { - const ut8 *pp = elf_vm_to_ptr (e, arrptr + (ut64)k *(ut64)ptrsz); - if (!pp && base_vaddr != UT64_MAX) { - pp = elf_vm_to_ptr (e, base_vaddr + arrptr + (ut64)k *(ut64)ptrsz); - } - if (!pp) { - return false; - } - ut64 val = (ptrsz == 8)? r_read_le64 (pp): (ut64)r_read_le32 (pp); - if (val) { - seen++; - } - if ((val >= text_lo && val < text_hi) || (val && base_vaddr != UT64_MAX && (val + base_vaddr) >= text_lo && (val + base_vaddr) < text_hi)) { - good++; - } - } - if (seen < min_seen || good < min_good) { - return false; - } - memset (candidates, 0, method_count * sizeof (ut64)); - size_t tocopy = R_MIN ((size_t)count, method_count); - size_t in_text = 0; - for (size_t m = 0; m < tocopy; m++) { - const ut8 *pp = elf_vm_to_ptr (e, arrptr + (ut64)m *(ut64)ptrsz); - if (!pp && base_vaddr != UT64_MAX) { - pp = elf_vm_to_ptr (e, base_vaddr + arrptr + (ut64)m *(ut64)ptrsz); - } - if (!pp) { - break; - } - ut64 val = (ptrsz == 8)? r_read_le64 (pp): (ut64)r_read_le32 (pp); - ut64 abs = (val >= text_lo && val < text_hi) - ? val - : ((val && base_vaddr != UT64_MAX)? val + base_vaddr: val); - if (abs >= text_lo && abs < text_hi) { - candidates[m] = abs; - in_text++; - } - } - return in_text >= 8; -} - -R_API bool r2unity_find_method_pointers_elf(R2UnityMetadata *meta, const char *elf_path, ut64 **out_ptrs) { - R_RETURN_VAL_IF_FAIL (meta && elf_path && out_ptrs, false); - *out_ptrs = NULL; - ElfImg e; - if (!elf_load (elf_path, &e)) { - return false; - } - size_t method_count = (ut64)meta->methodsSize / sizeof (Il2CppMethodDefinition); - if (!method_count) { - elf_free (&e); - return false; - } - ut64 base_vaddr = UT64_MAX, text_lo = UT64_MAX, text_hi = 0; - for (int i = 0; i < e.nsegs; i++) { - const ElfSeg *s = &e.segs[i]; - if (s->vaddr < base_vaddr) { - base_vaddr = s->vaddr; - } - if ((s->flags & 0x1) && (s->flags & 0x4)) { - if (s->vaddr < text_lo) { - text_lo = s->vaddr; - } - if (s->vaddr + s->memsz > text_hi) { - text_hi = s->vaddr + s->memsz; - } - } - } - if (text_lo == UT64_MAX || text_hi <= text_lo) { - elf_free (&e); - return false; - } - ut64 *candidates = R_NEWS (ut64, method_count); - if (!candidates) { - elf_free (&e); - return false; - } - bool found = false; - int ptrsz = e.is64? 8: 4; - // Apply REL (A)/RELR (relative only) - ut64 dyn_off = 0, dyn_sz = 0; - for (int i = 0; i < e.nsegs; i++) { - const ElfSeg *s = &e.segs[i]; - if (s->p_type == 2) { - dyn_off = s->offset; - dyn_sz = s->filesz; - break; - } - } - ut64 rela_off = 0, rela_sz = 0, rela_ent = e.is64? 24: 12; - ut64 rel_off = 0, rel_sz = 0, rel_ent = e.is64? 16: 8; - ut64 relr_off = 0, relr_sz = 0, relr_ent = e.is64? 8: 4; - if (dyn_off && dyn_sz) { - const ut8 *d = e.file + dyn_off; - for (ut64 off = 0; off + (e.is64? 16: 8) <= dyn_sz; off += (e.is64? 16: 8)) { - ut64 tag = e.is64? r_read_le64 (d + off): (ut64)r_read_le32 (d + off); - ut64 val = e.is64? r_read_le64 (d + off + 8): (ut64)r_read_le32 (d + off + 4); - if (tag == 0) { - break; - } - if (tag == 7) { - rela_off = val; - } else if (tag == 8) { - rela_sz = val; - } else if (tag == 9) { - rela_ent = val; - } else if (tag == 17) { - rel_off = val; - } else if (tag == 18) { - rel_sz = val; - } else if (tag == 19) { - rel_ent = val; - } else if (tag == 36) { - relr_off = val; - } else if (tag == 35) { - relr_sz = val; - } else if (tag == 37) { - relr_ent = val; - } - } - } - const ut64 base = (base_vaddr != UT64_MAX)? base_vaddr: 0; - const ut64 type_mask = e.is64? 0xffffffffULL: 0xffULL; - if (rela_off && rela_sz && rela_ent) { - for (ut64 i = 0; i + rela_ent <= rela_sz; i += rela_ent) { - const ut8 *rp = elf_vm_to_ptr (&e, rela_off + i); - if (!rp) { - break; - } - ut64 r_offset = e.is64? r_read_le64 (rp + 0): (ut64)r_read_le32 (rp + 0); - ut64 r_info = e.is64? r_read_le64 (rp + 8): (ut64)r_read_le32 (rp + 4); - ut64 r_addend = e.is64? r_read_le64 (rp + 16): (ut64)r_read_le32 (rp + 8); - ut64 type = r_info & type_mask; - bool is_relative = (type == 8) || (type == 23) || (type == 1027); - if (!is_relative) { - continue; - } - ut8 *loc = (ut8 *)elf_vm_to_ptr (&e, r_offset); - if (loc) { - elf_write_word (loc, base + r_addend, e.is64); - } - } - } - if (rel_off && rel_sz && rel_ent) { - for (ut64 i = 0; i + rel_ent <= rel_sz; i += rel_ent) { - const ut8 *rp = elf_vm_to_ptr (&e, rel_off + i); - if (!rp) { - break; - } - ut64 r_offset = e.is64? r_read_le64 (rp + 0): (ut64)r_read_le32 (rp + 0); - ut64 r_info = e.is64? r_read_le64 (rp + 8): (ut64)r_read_le32 (rp + 4); - ut64 type = r_info & type_mask; - bool is_relative = (type == 8) || (type == 23) || (type == 1027); - if (!is_relative) { - continue; - } - ut8 *loc = (ut8 *)elf_vm_to_ptr (&e, r_offset); - if (!loc) { - continue; - } - ut64 add = e.is64? r_read_le64 (loc): (ut64)r_read_le32 (loc); - elf_write_word (loc, base + add, e.is64); - } - } - if (e.is64 && relr_off && relr_sz && relr_ent) { - ut64 curr = 0; - for (ut64 i = 0; i + relr_ent <= relr_sz; i += relr_ent) { - const ut8 *rp = elf_vm_to_ptr (&e, relr_off + i); - if (!rp) { - break; - } - ut64 R = r_read_le64 (rp); - if ((R & 1ULL) == 0) { - ut64 addr = R; - ut8 *loc = (ut8 *)elf_vm_to_ptr (&e, addr); - if (loc) { - elf_write_word (loc, base + r_read_le64 (loc), true); - } - curr = addr + 8; - } else { - ut64 bitmap = R >> 1; - for (int bit = 0; bit < 63; bit++) { - if (! (bitmap & (1ULL << bit))) { - continue; - } - ut64 addr = curr + (ut64) (bit + 1) * 8ULL; - ut8 *loc = (ut8 *)elf_vm_to_ptr (&e, addr); - if (loc) { - elf_write_word (loc, base + r_read_le64 (loc), true); - } - } - curr += 8ULL * 63ULL; - } - } - } - // Scan for [count][ptr] - /* expected number of entries in a possible method pointer array - * use method_count computed earlier (methodsSize / sizeof (Il2CppMethodDefinition)) - */ - ut32 expected = (ut32)R_MAX ((ut64)64, (ut64)method_count); - /* Pass 0: CodeRegistration-shape (pair of count/ptr tuples), loose floor. - * Pass 1: generic {count32, ptr} with expected-count bounds and stricter - * sample-fraction thresholds. */ - for (int pass = 0; pass < 2 && !found; pass++) { - const ut32 min_count = (pass == 0)? 32: 64; - const ut64 min_secsize = (pass == 0) - ? (ut64) (16 + (ut64)ptrsz * 2) - : (ut64) (8 + ptrsz); - const ut64 min_step = (pass == 0) - ? (ut64) (8 + ptrsz) * 3 - : (ut64) (8 + ptrsz); - for (int i = 0; i < e.nsegs && !found; i++) { - const ElfSeg *s = &e.segs[i]; - bool is_data = (s->flags & 0x1) && ! (s->flags & 0x4); - if (!is_data || s->filesz < min_secsize) { - continue; - } - const ut8 *buf = e.file + s->offset; - for (ut64 off = 0; off + min_step <= s->filesz; off += 4) { - ut32 cnt = r_read_le32 (buf + off); - if (cnt < min_count) { - continue; - } - if (pass == 0) { - /* Require a second plausible count immediately after the ptr. */ - ut32 cnt2 = r_read_le32 (buf + off + (ut64) (8 + ptrsz)); - if (cnt2 < 16) { - continue; - } - } else if (expected && (cnt > expected * 2 || cnt < expected / 2)) { - continue; - } - ut64 arrptr = (ptrsz == 8) - ? r_read_le64 (buf + off + 8) - : (ut64)r_read_le32 (buf + off + 4); - ut32 sample = R_MIN ((ut32)128, cnt); - ut32 min_seen = (pass == 0)? 8: sample / 2; - ut32 min_good = (pass == 0)? 8: (sample * 3) / 4; - if (elf_probe_table (&e, arrptr, cnt, ptrsz, base_vaddr, text_lo, text_hi, min_seen, min_good, method_count, candidates)) { - R_LOG_DEBUG ("[elf] pass=%d arrptr=0x%" PFMT64x " cnt=%u", pass, arrptr, cnt); - found = true; - break; - } - } - } - } - if (!found) { - R_FREE (candidates); - elf_free (&e); - return false; - } - *out_ptrs = candidates; - elf_free (&e); - return true; -} diff --git a/src/lib/lib.h b/src/lib/lib.h index 47b1a41..6deb9c5 100644 --- a/src/lib/lib.h +++ b/src/lib/lib.h @@ -3,6 +3,9 @@ #include +struct r_bin_t; +struct r_bin_file_t; + #define IL2CPP_MAGIC 0xFAB11BAF #define R2UNITY_METADATA_BASE_SECTION_COUNT 31 #define R2UNITY_METADATA_SECTION_COUNT 33 @@ -199,6 +202,37 @@ typedef enum { R2U_SBOM_JSON } R2UnitySbomFormat; +typedef enum { + R2U_NATIVE_SOURCE_NONE, + R2U_NATIVE_SOURCE_SYMBOL, + R2U_NATIVE_SOURCE_OVERRIDE, + R2U_NATIVE_SOURCE_HEURISTIC +} R2UnityNativeSource; + +typedef struct { + const char *name; + ut64 va; +} R2UnitySymbolOverride; + +typedef struct { + bool force_heuristic; + ut64 code_registration_va; + ut64 metadata_registration_va; + const R2UnitySymbolOverride *symbols; + size_t symbols_count; +} R2UnityNativeOptions; + +typedef struct { + ut64 *method_ptrs; /* owned; free with r2unity_native_result_fini() */ + bool has_method_ptrs; + R2UnityNativeSource source; + ut64 code_registration_va; + ut64 metadata_registration_va; + ut64 method_pointers_va; + ut64 code_gen_modules_va; + int ptr_size; +} R2UnityNativeResult; + R_API R2UnityMetadata *r2unity_parse_metadata(RBuffer *buf); R_API void r2unity_free_metadata(R2UnityMetadata *meta); /* Caller owns the returned string and must free() it. NULL on missing/empty. */ @@ -229,9 +263,13 @@ R_API bool r2unity_metadata_section(R2UnityMetadata *meta, R2UMetadataSectionId R_API ut64 r2unity_metadata_section_entry_size(R2UnityMetadata *meta, R2UMetadataSectionId id); R_API ut64 r2unity_metadata_section_count(R2UnityMetadata *meta, R2UMetadataSectionId id); R_API ut64 r2unity_metadata_header_size(R2UnityMetadata *meta); -R_API bool r2unity_find_method_pointers_macho(R2UnityMetadata *meta, const char *macho_path, ut64 **out_ptrs); -R_API bool r2unity_find_method_pointers_elf(R2UnityMetadata *meta, const char *elf_path, ut64 **out_ptrs); -R_API bool r2unity_find_method_pointers_pe(R2UnityMetadata *meta, const char *pe_path, ut64 **out_ptrs); +R_API const char *r2unity_native_source_name(R2UnityNativeSource source); +R_API const char *const *r2unity_native_code_registration_names(void); +R_API const char *const *r2unity_native_metadata_registration_names(void); +R_API void r2unity_native_result_fini(R2UnityNativeResult *result); +R_API bool r2unity_find_method_pointers(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result); +R_API bool r2unity_find_method_pointers_simple(R2UnityMetadata *meta, const char *path, const R2UnityNativeOptions *options, R2UnityNativeResult *result); +R_API bool r2unity_find_method_pointers_rbin(R2UnityMetadata *meta, struct r_bin_t *bin, struct r_bin_file_t *bf, const R2UnityNativeOptions *options, R2UnityNativeResult *result); /* Companion-file discovery. * diff --git a/src/lib/macho.c b/src/lib/macho.c deleted file mode 100644 index f14433a..0000000 --- a/src/lib/macho.c +++ /dev/null @@ -1,253 +0,0 @@ -/* r2unity - MIT - Copyright 2025-2026 - pancake */ - -// Fast Mach-O parser for r2unity -#define R_LOG_ORIGIN "r2unity.macho" -#include "lib.h" -#include - -typedef struct { - char segname[16]; - ut64 vmaddr; - ut64 vmsize; - ut64 fileoff; - ut64 filesize; - ut32 maxprot; -} MachSeg; - -typedef struct { - ut8 *file; - ut64 vm_base; // lowest VM address among segments (usually __TEXT vmaddr) - ut64 filesize; - ut64 base; - ut32 ncmds; - ut64 cmd_off; - MachSeg segs[128]; - int nsegs; -} MachO; - -static bool macho_load(const char *path, MachO *mo) { - memset (mo, 0, sizeof (*mo)); - size_t sz = 0; - mo->file = (ut8 *)r_file_slurp (path, &sz); - if (!mo->file || sz == 0) { - R_FREE (mo->file); - return false; - } - mo->filesize = (ut64)sz; - ut32 magic = r_read_be32 (mo->file); - ut64 off = 0; - if (magic == 0xcafebabe) { - ut32 nfat = r_read_be32 (mo->file + 4); - ut32 best = 0; - for (ut32 i = 0; i < nfat; i++) { - const ut8 *fa = mo->file + 8 + i * 20; - ut32 cputype = r_read_be32 (fa + 0); - if (cputype == 0x0100000c) { - best = i; - break; - } - } - const ut8 *fa = mo->file + 8 + best * 20; - off = r_read_be32 (fa + 8); - } - mo->base = off; - const ut8 *p = mo->file + off; - ut32 mh_magic = r_read_le32 (p); - if (mh_magic != 0xFEEDFACF) { - R_FREE (mo->file); - return false; - } - ut32 ncmds = r_read_le32 (p + 0x10); - mo->ncmds = ncmds; - mo->cmd_off = off + 0x20; - ut64 co = mo->cmd_off; - for (ut32 i = 0; i < ncmds && (co + 8 <= mo->filesize); i++) { - ut32 cmd = r_read_le32 (mo->file + co); - ut32 cmdsize = r_read_le32 (mo->file + co + 4); - if (cmd == 0x19 && cmdsize >= 72) { - if (mo->nsegs < (int) (sizeof (mo->segs) / sizeof (mo->segs[0]))) { - MachSeg *s = &mo->segs[mo->nsegs++]; - const ut8 *sp = mo->file + co + 8; - memcpy (s->segname, sp, 16); - s->vmaddr = r_read_le64 (sp + 16); - s->vmsize = r_read_le64 (sp + 24); - s->fileoff = r_read_le64 (sp + 32); - s->filesize = r_read_le64 (sp + 40); - s->maxprot = r_read_le32 (sp + 48); - if (!mo->vm_base || s->vmaddr < mo->vm_base) { - mo->vm_base = s->vmaddr; - } - } - } - co += cmdsize? cmdsize: 8; - } - return true; -} - -static void macho_free(MachO *mo) { - R_FREE (mo->file); -} - -static inline bool macho_vm_in_text(MachO *mo, ut64 addr, ut64 *text_lo, ut64 *text_hi) { - ut64 lo = UT64_MAX, hi = 0; - for (int i = 0; i < mo->nsegs; i++) { - MachSeg *s = &mo->segs[i]; - bool is_text = (s->maxprot & 0x4) || !strncmp (s->segname, "__TEXT", 6); - if (is_text) { - if (s->vmaddr < lo) { - lo = s->vmaddr; - } - if (s->vmaddr + s->vmsize > hi) { - hi = s->vmaddr + s->vmsize; - } - } - } - // Fallback: if no exec segment detected, use min/max of all segments - if (lo == UT64_MAX || hi <= lo) { - lo = UT64_MAX; - hi = 0; - for (int i = 0; i < mo->nsegs; i++) { - MachSeg *s = &mo->segs[i]; - if (s->vmaddr < lo) { - lo = s->vmaddr; - } - if (s->vmaddr + s->vmsize > hi) { - hi = s->vmaddr + s->vmsize; - } - } - } - if (text_lo) { - *text_lo = lo; - } - if (text_hi) { - *text_hi = hi; - } - return (addr >= lo && addr < hi); -} - -static inline const ut8 *macho_vm_to_ptr(MachO *mo, ut64 vmaddr) { - for (int i = 0; i < mo->nsegs; i++) { - MachSeg *s = &mo->segs[i]; - if (vmaddr >= s->vmaddr && vmaddr < s->vmaddr + s->vmsize) { - ut64 delta = vmaddr - s->vmaddr; - if (delta < s->filesize) { - return mo->file + mo->base + s->fileoff + delta; - } - return NULL; - } - } - return NULL; -} - -/* Evaluate the table at VA `arrptr` with `count` 8-byte pointers. Entries - * below text_lo are retried with vm_base added (RVA→VA fallback). */ -static bool macho_probe_table(MachO *mo, ut64 arrptr, ut32 count, ut64 text_lo, ut64 text_hi, ut32 min_seen, ut32 min_good, size_t method_count, ut64 *candidates) { - ut32 sample = R_MIN ((ut32)128, count); - ut32 good = 0, seen = 0; - for (ut32 k = 0; k < sample; k++) { - const ut8 *p = macho_vm_to_ptr (mo, arrptr + (ut64)k * 8); - if (!p && mo->vm_base) { - p = macho_vm_to_ptr (mo, mo->vm_base + arrptr + (ut64)k * 8); - } - if (!p) { - return false; - } - ut64 val = r_read_le64 (p); - if (val && val < text_lo && mo->vm_base && (val + mo->vm_base) >= text_lo && (val + mo->vm_base) < text_hi) { - val += mo->vm_base; - } - if (val) { - seen++; - } - if (val >= text_lo && val < text_hi) { - good++; - } - } - if (seen < min_seen || good < min_good) { - return false; - } - memset (candidates, 0, method_count * sizeof (ut64)); - size_t tocopy = R_MIN ((size_t)count, method_count); - for (size_t m = 0; m < tocopy; m++) { - const ut8 *p = macho_vm_to_ptr (mo, arrptr + (ut64)m * 8); - if (!p && mo->vm_base) { - p = macho_vm_to_ptr (mo, mo->vm_base + arrptr + (ut64)m * 8); - } - if (!p) { - break; - } - ut64 val = r_read_le64 (p); - if (val && val < text_lo && mo->vm_base && (val + mo->vm_base) >= text_lo && (val + mo->vm_base) < text_hi) { - val += mo->vm_base; - } - if (val >= text_lo && val < text_hi) { - candidates[m] = val; - } - } - return true; -} - -R_API bool r2unity_find_method_pointers_macho(R2UnityMetadata *meta, const char *macho_path, ut64 **out_ptrs) { - R_RETURN_VAL_IF_FAIL (meta && macho_path && out_ptrs, false); - *out_ptrs = NULL; - MachO mo; - if (!macho_load (macho_path, &mo)) { - return false; - } - size_t method_count = (ut64)meta->methodsSize / sizeof (Il2CppMethodDefinition); - if (!method_count) { - macho_free (&mo); - return false; - } - ut64 text_lo = 0, text_hi = 0; - macho_vm_in_text (&mo, 0, &text_lo, &text_hi); - ut64 *candidates = R_NEWS (ut64, method_count); - if (!candidates) { - macho_free (&mo); - return false; - } - bool found = false; - int ptrsz = 8; - ut32 expected = (ut32)R_MAX ((ut64)64, (ut64)method_count); - (void)ptrsz; /* Mach-O fast path is 64-bit only. */ - /* Pass 0: CodeRegistration-shaped pair {count32, pad, ptr64}. - * Pass 1: generic {count32, ptr64} with expected-count bounds and - * stricter sample-fraction thresholds. */ - for (int pass = 0; pass < 2 && !found; pass++) { - const ut32 min_count = (pass == 0)? 32: 64; - for (int i = 0; i < mo.nsegs && !found; i++) { - MachSeg *s = &mo.segs[i]; - bool is_data = (s->maxprot & 0x1) && ! (s->maxprot & 0x4); - if (!is_data || s->filesize < 16) { - continue; - } - const ut8 *buf = mo.file + mo.base + s->fileoff; - for (ut64 off = 0; off + 16 <= s->filesize; off += 4) { - ut32 cnt = r_read_le32 (buf + off); - if (cnt < min_count) { - continue; - } - if (pass == 1 && expected && (cnt > expected * 2 || cnt < expected / 2)) { - continue; - } - ut64 arrptr = r_read_le64 (buf + off + 8); - ut32 sample = R_MIN ((ut32)128, cnt); - ut32 min_seen = (pass == 0)? 8: sample / 2; - ut32 min_good = (pass == 0)? 8: (sample * 3) / 4; - if (macho_probe_table (&mo, arrptr, cnt, text_lo, text_hi, min_seen, min_good, method_count, candidates)) { - R_LOG_DEBUG ("[macho] pass=%d arrptr=0x%" PFMT64x " cnt=%u", pass, arrptr, cnt); - found = true; - break; - } - } - } - } - if (!found) { - R_FREE (candidates); - macho_free (&mo); - return false; - } - *out_ptrs = candidates; - macho_free (&mo); - return true; -} diff --git a/src/lib/paths.c b/src/lib/paths.c index 8a2a090..a5d3f96 100644 --- a/src/lib/paths.c +++ b/src/lib/paths.c @@ -3,17 +3,8 @@ #define R_LOG_ORIGIN "r2unity.paths" #include "lib.h" -#include #include -static bool str_ieq(const char *a, const char *b) { - return a && b && !strcasecmp (a, b); -} - -static char *pjoin(const char *a, const char *b) { - return r_str_newf ("%s/%s", a, b); -} - /* Take ownership of `candidate` if it exists on disk; otherwise free it. */ static bool take_if_exists(char **out, char *candidate) { if (r_file_exists (candidate)) { @@ -49,7 +40,7 @@ static const char *il2cpp_basename_for(const char *platform) { static bool is_il2cpp_basename(const char *base) { for (size_t i = 0; i < sizeof (il2cpp_map) / sizeof (il2cpp_map[0]); i++) { - if (str_ieq (base, il2cpp_map[i].basename)) { + if (!r_str_casecmp (base, il2cpp_map[i].basename)) { return true; } } @@ -59,7 +50,7 @@ static bool is_il2cpp_basename(const char *base) { static char *find_il2cpp_sibling(const char *dir) { char *found = NULL; for (size_t i = 0; i < sizeof (il2cpp_map) / sizeof (il2cpp_map[0]); i++) { - if (take_if_exists (&found, pjoin (dir, il2cpp_map[i].basename))) { + if (take_if_exists (&found, r_file_new (dir, il2cpp_map[i].basename, NULL))) { return found; } } @@ -69,7 +60,7 @@ static char *find_il2cpp_sibling(const char *dir) { static char *strip_ext_icase(const char *name, const char *ext) { size_t nlen = strlen (name); size_t elen = strlen (ext); - if (nlen > elen && !strcasecmp (name + nlen - elen, ext)) { + if (nlen > elen && !r_str_casecmp (name + nlen - elen, ext)) { return r_str_ndup (name, (int) (nlen - elen)); } return strdup (name); @@ -112,7 +103,7 @@ static bool try_macos(R2UnityPaths *p, const char *abs, const char *dir, const c char *contents = NULL; if (r_str_endswith (dir, "/Contents/MacOS")) { contents = r_file_dirname (dir); - } else if (r_str_endswith (dir, "/Contents/Frameworks") && str_ieq (base, "GameAssembly.dylib")) { + } else if (r_str_endswith (dir, "/Contents/Frameworks") && !r_str_casecmp (base, "GameAssembly.dylib")) { contents = r_file_dirname (dir); } else { return false; @@ -128,7 +119,7 @@ static bool try_macos(R2UnityPaths *p, const char *abs, const char *dir, const c p->platform = strdup ("macos"); p->metadata = metadata; p->data_dir = data_dir; - if (str_ieq (base, "GameAssembly.dylib")) { + if (!r_str_casecmp (base, "GameAssembly.dylib")) { p->il2cpp_binary = strdup (abs); } else { take_if_exists (&p->il2cpp_binary, r_file_new (contents, "Frameworks", "GameAssembly.dylib", NULL)); @@ -145,7 +136,7 @@ static char *find_ios_metadata(const char *app_dir) { }; char *metadata = NULL; for (int i = 0; metas[i]; i++) { - if (take_if_exists (&metadata, pjoin (app_dir, metas[i]))) { + if (take_if_exists (&metadata, r_file_new (app_dir, metas[i], NULL))) { return metadata; } } @@ -159,7 +150,7 @@ static bool try_ios(R2UnityPaths *p, const char *abs, const char *dir, const cha * Metadata: Game.app/Data/Managed/Metadata/global-metadata.dat * Game.app/Data/Raw/Managed/Metadata/global-metadata.dat (newer) */ char *app_dir = NULL; - if (r_str_endswith (dir, ".framework") && str_ieq (base, "UnityFramework")) { + if (r_str_endswith (dir, ".framework") && !r_str_casecmp (base, "UnityFramework")) { char *frameworks = r_file_dirname (dir); app_dir = r_file_dirname (frameworks); free (frameworks); @@ -175,12 +166,12 @@ static bool try_ios(R2UnityPaths *p, const char *abs, const char *dir, const cha } p->platform = strdup ("ios"); p->metadata = metadata; - p->data_dir = pjoin (app_dir, "Data"); - if (str_ieq (base, "UnityFramework")) { + p->data_dir = r_file_new (app_dir, "Data", NULL); + if (!r_str_casecmp (base, "UnityFramework")) { p->il2cpp_binary = strdup (abs); } else if (!take_if_exists (&p->il2cpp_binary, r_file_new (app_dir, "Frameworks", "UnityFramework.framework", "UnityFramework", NULL))) { - if (!take_if_exists (&p->il2cpp_binary, pjoin (app_dir, "UnityFramework"))) { + if (!take_if_exists (&p->il2cpp_binary, r_file_new (app_dir, "UnityFramework", NULL))) { p->il2cpp_binary = strdup (abs); } } @@ -193,8 +184,8 @@ static bool try_winlin_standalone(R2UnityPaths *p, const char *abs, const char * * Linux: Game (ELF) + GameAssembly.so + Game_Data/il2cpp_data/Metadata/global-metadata.dat * * `base` is either the main exe name ("Game"/"Game.exe") or the IL2CPP lib itself. */ - const bool is_dll = str_ieq (base, "GameAssembly.dll"); - const bool is_so = str_ieq (base, "GameAssembly.so"); + const bool is_dll = !r_str_casecmp (base, "GameAssembly.dll"); + const bool is_so = !r_str_casecmp (base, "GameAssembly.so"); const bool is_il2cpp = is_dll || is_so; char *data_dir = NULL; @@ -221,8 +212,8 @@ static bool try_winlin_standalone(R2UnityPaths *p, const char *abs, const char * p->il2cpp_binary = strdup (abs); p->platform = strdup (is_dll? "windows": "linux"); } else { - char *dll = pjoin (dir, "GameAssembly.dll"); - char *so = pjoin (dir, "GameAssembly.so"); + char *dll = r_file_new (dir, "GameAssembly.dll", NULL); + char *so = r_file_new (dir, "GameAssembly.so", NULL); if (take_if_exists (&p->il2cpp_binary, dll)) { p->platform = strdup ("windows"); free (so); @@ -240,7 +231,7 @@ static bool try_winlin_standalone(R2UnityPaths *p, const char *abs, const char * static bool try_android_apk(R2UnityPaths *p, const char *abs, const char *dir, const char *base) { /* Extracted APK: /lib//libil2cpp.so * /assets/bin/Data/Managed/Metadata/global-metadata.dat */ - if (!str_ieq (base, "libil2cpp.so")) { + if (r_str_casecmp (base, "libil2cpp.so")) { return false; } char *abi_parent = r_file_dirname (dir); @@ -255,10 +246,10 @@ static bool try_android_apk(R2UnityPaths *p, const char *abs, const char *dir, c NULL }; for (int i = 0; metas[i]; i++) { - if (take_if_exists (&p->metadata, pjoin (root, metas[i]))) { + if (take_if_exists (&p->metadata, r_file_new (root, metas[i], NULL))) { p->platform = strdup ("android"); p->il2cpp_binary = strdup (abs); - p->data_dir = pjoin (root, "assets/bin/Data"); + p->data_dir = r_file_new (root, "assets/bin/Data", NULL); free (root); return true; } @@ -271,7 +262,7 @@ static bool try_fixture(R2UnityPaths *p, const char *abs, const char *dir, const /* Flat fixture: main exe + il2cpp binary + global-metadata.dat all in the * same directory. Used by users dumping files * manually for quick triage. */ - char *metadata = pjoin (dir, "global-metadata.dat"); + char *metadata = r_file_new (dir, "global-metadata.dat", NULL); if (!r_file_exists (metadata)) { free (metadata); return false; @@ -286,7 +277,7 @@ static bool try_fixture(R2UnityPaths *p, const char *abs, const char *dir, const /* Drill into a macOS .app bundle's Contents/MacOS and return the first regular * file there; falls back to iOS framework/flat layouts. */ static char *expand_app_bundle(const char *dir) { - char *macos_dir = pjoin (dir, "Contents/MacOS"); + char *macos_dir = r_file_new (dir, "Contents/MacOS", NULL); if (r_file_is_directory (macos_dir)) { RList *entries = r_sys_dir (macos_dir); if (entries) { @@ -296,7 +287,7 @@ static char *expand_app_bundle(const char *dir) { if (*name == '.') { continue; } - char *f = pjoin (macos_dir, name); + char *f = r_file_new (macos_dir, name, NULL); if (r_file_is_regular (f)) { r_list_free (entries); free (macos_dir); @@ -313,7 +304,7 @@ static char *expand_app_bundle(const char *dir) { if (take_if_exists (&out, ios_fw)) { return out; } - if (take_if_exists (&out, pjoin (dir, "UnityFramework"))) { + if (take_if_exists (&out, r_file_new (dir, "UnityFramework", NULL))) { return out; } return NULL; @@ -332,7 +323,7 @@ static char *expand_ios_root_dir(const char *dir) { if (take_if_exists (&out, r_file_new (dir, "Frameworks", "UnityFramework.framework", "UnityFramework", NULL))) { return out; } - if (take_if_exists (&out, pjoin (dir, "UnityFramework"))) { + if (take_if_exists (&out, r_file_new (dir, "UnityFramework", NULL))) { return out; } @@ -350,7 +341,7 @@ static char *expand_ios_root_dir(const char *dir) { if (!strcmp (name, "Data") || !strcmp (name, "Info.plist") || !strcmp (name, "PkgInfo") || !strcmp (name, "global-metadata.dat") || !strcmp (name, "embedded.mobileprovision")) { continue; } - char *f = pjoin (dir, name); + char *f = r_file_new (dir, name, NULL); if (r_file_is_regular (f)) { if (r_file_is_executable (f)) { r_list_free (entries); @@ -389,7 +380,7 @@ static char *expand_winlin_dir(const char *dir) { free (stem); break; } - if (take_if_exists (&found, pjoin (dir, stem))) { + if (take_if_exists (&found, r_file_new (dir, stem, NULL))) { free (stem); break; } @@ -401,7 +392,7 @@ static char *expand_winlin_dir(const char *dir) { /* Android extracted APK: lib//libil2cpp.so */ static char *expand_apk_dir(const char *dir) { - char *lib_dir = pjoin (dir, "lib"); + char *lib_dir = r_file_new (dir, "lib", NULL); if (!r_file_is_directory (lib_dir)) { free (lib_dir); return NULL; @@ -454,7 +445,7 @@ static char *expand_dir_input(const char *dir) { return apk; } char *out = NULL; - take_if_exists (&out, pjoin (dir, "global-metadata.dat")); + take_if_exists (&out, r_file_new (dir, "global-metadata.dat", NULL)); return out; } diff --git a/src/lib/pe.c b/src/lib/pe.c deleted file mode 100644 index 69cea04..0000000 --- a/src/lib/pe.c +++ /dev/null @@ -1,266 +0,0 @@ -/* r2unity - MIT - Copyright 2025-2026 - pancake */ - -// Minimal PE parser for r2unity (GameAssembly.dll on Windows, GameAssembly.dll on UWP). -// PE .dll images are already relocated at link time (preferred image base is -// concrete), so no relocation fixup is required — pointers in .data / .rdata -// are absolute VAs relative to the ImageBase. -#define R_LOG_ORIGIN "r2unity.pe" -#include "lib.h" -#include - -typedef struct { - char name[9]; - ut64 vaddr; - ut64 vsize; - ut64 fileoff; - ut64 filesize; - ut32 chars; -} PeSec; - -typedef struct { - ut8 *file; - ut64 filesize; - int is64; - ut64 image_base; - PeSec secs[128]; - int nsecs; -} PeImg; - -static bool pe_load(const char *path, PeImg *pe) { - memset (pe, 0, sizeof (*pe)); - size_t sz = 0; - pe->file = (ut8 *)r_file_slurp (path, &sz); - if (!pe->file || sz < 0x40) { - R_FREE (pe->file); - return false; - } - pe->filesize = (ut64)sz; - if (pe->file[0] != 'M' || pe->file[1] != 'Z') { - R_FREE (pe->file); - return false; - } - ut32 e_lfanew = r_read_le32 (pe->file + 0x3C); - if ((ut64)e_lfanew + 24 > pe->filesize) { - R_FREE (pe->file); - return false; - } - const ut8 *nt = pe->file + e_lfanew; - if (r_read_le32 (nt) != 0x00004550) { /* "PE\0\0" */ - R_FREE (pe->file); - return false; - } - /* COFF file header immediately follows the 4-byte PE signature */ - ut16 nsec = r_read_le16 (nt + 4 + 2); - ut16 sizeof_opt = r_read_le16 (nt + 4 + 16); - ut64 opt_off = (ut64)e_lfanew + 4 + 20; - if (opt_off + sizeof_opt > pe->filesize) { - R_FREE (pe->file); - return false; - } - const ut8 *opt = pe->file + opt_off; - ut16 mag = r_read_le16 (opt); - if (mag == 0x20b) { - pe->is64 = 1; - pe->image_base = r_read_le64 (opt + 24); - } else if (mag == 0x10b) { - pe->is64 = 0; - pe->image_base = (ut64)r_read_le32 (opt + 28); - } else { - R_FREE (pe->file); - return false; - } - ut64 sh_off = opt_off + sizeof_opt; - for (ut16 i = 0; i < nsec && pe->nsecs < (int) (sizeof (pe->secs) / sizeof (pe->secs[0])); i++) { - ut64 off = sh_off + (ut64)i * 40; - if (off + 40 > pe->filesize) { - break; - } - const ut8 *sh = pe->file + off; - PeSec *s = &pe->secs[pe->nsecs++]; - memcpy (s->name, sh, 8); - s->name[8] = 0; - s->vsize = r_read_le32 (sh + 8); - ut32 rva = r_read_le32 (sh + 12); - s->vaddr = pe->image_base + (ut64)rva; - s->filesize = r_read_le32 (sh + 16); - s->fileoff = r_read_le32 (sh + 20); - s->chars = r_read_le32 (sh + 36); - } - return true; -} - -static void pe_free(PeImg *pe) { - R_FREE (pe->file); -} - -static inline const ut8 *pe_vm_to_ptr(PeImg *pe, ut64 vaddr) { - for (int i = 0; i < pe->nsecs; i++) { - PeSec *s = &pe->secs[i]; - if (vaddr >= s->vaddr && vaddr < s->vaddr + s->vsize) { - ut64 delta = vaddr - s->vaddr; - if (delta < s->filesize && s->fileoff + delta < pe->filesize) { - return pe->file + s->fileoff + delta; - } - return NULL; - } - } - return NULL; -} - -static inline void pe_text_range(PeImg *pe, ut64 *text_lo, ut64 *text_hi) { - ut64 lo = UT64_MAX, hi = 0; - for (int i = 0; i < pe->nsecs; i++) { - PeSec *s = &pe->secs[i]; - /* IMAGE_SCN_MEM_EXECUTE = 0x20000000 */ - bool is_text = (s->chars & 0x20000000) || !strncmp (s->name, ".text", 5); - if (is_text) { - if (s->vaddr < lo) { - lo = s->vaddr; - } - if (s->vaddr + s->vsize > hi) { - hi = s->vaddr + s->vsize; - } - } - } - if (lo == UT64_MAX || hi <= lo) { - lo = UT64_MAX; - hi = 0; - for (int i = 0; i < pe->nsecs; i++) { - PeSec *s = &pe->secs[i]; - if (s->vaddr < lo) { - lo = s->vaddr; - } - if (s->vaddr + s->vsize > hi) { - hi = s->vaddr + s->vsize; - } - } - } - *text_lo = lo; - *text_hi = hi; -} - -static inline bool pe_is_data_section(const PeSec *s) { - /* Skip executable sections; keep initialized-data / .data / .rdata */ - if (s->chars & 0x20000000) { - return false; - } - if (!strncmp (s->name, ".data", 5)) { - return true; - } - if (!strncmp (s->name, ".rdata", 6)) { - return true; - } - /* IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040 */ - return (s->chars & 0x40) != 0; -} - -/* Validate the table at VA `arrptr` (`count` pointers): require at least - * `min_seen` non-zero entries and `min_good` entries landing in [text_lo, text_hi) - * within a 128-entry sample. On match, copy the first method_count entries - * into candidates (clamping to count) and return true. */ -static bool pe_probe_table(PeImg *pe, ut64 arrptr, ut32 count, int ptrsz, ut64 text_lo, ut64 text_hi, ut32 min_seen, ut32 min_good, size_t method_count, ut64 *candidates) { - ut32 sample = R_MIN ((ut32)128, count); - ut32 good = 0, seen = 0; - for (ut32 k = 0; k < sample; k++) { - const ut8 *p = pe_vm_to_ptr (pe, arrptr + (ut64)k *(ut64)ptrsz); - if (!p) { - return false; - } - ut64 val = (ptrsz == 8)? r_read_le64 (p): (ut64)r_read_le32 (p); - if (val) { - seen++; - } - if (val >= text_lo && val < text_hi) { - good++; - } - } - if (seen < min_seen || good < min_good) { - return false; - } - memset (candidates, 0, method_count * sizeof (ut64)); - size_t tocopy = R_MIN ((size_t)count, method_count); - size_t in_text = 0; - for (size_t m = 0; m < tocopy; m++) { - const ut8 *p = pe_vm_to_ptr (pe, arrptr + (ut64)m *(ut64)ptrsz); - if (!p) { - break; - } - ut64 val = (ptrsz == 8)? r_read_le64 (p): (ut64)r_read_le32 (p); - if (val >= text_lo && val < text_hi) { - candidates[m] = val; - in_text++; - } - } - return in_text >= 8; -} - -R_API bool r2unity_find_method_pointers_pe(R2UnityMetadata *meta, const char *pe_path, ut64 **out_ptrs) { - R_RETURN_VAL_IF_FAIL (meta && pe_path && out_ptrs, false); - *out_ptrs = NULL; - PeImg pe; - if (!pe_load (pe_path, &pe)) { - return false; - } - size_t method_count = (ut64)meta->methodsSize / sizeof (Il2CppMethodDefinition); - if (!method_count) { - pe_free (&pe); - return false; - } - ut64 text_lo = 0, text_hi = 0; - pe_text_range (&pe, &text_lo, &text_hi); - if (text_lo >= text_hi) { - pe_free (&pe); - return false; - } - ut64 *candidates = R_NEWS (ut64, method_count); - if (!candidates) { - pe_free (&pe); - return false; - } - bool found = false; - int ptrsz = pe.is64? 8: 4; - ut32 expected = (ut32)R_MAX ((ut64)64, (ut64) ((ut64)meta->methodsSize / sizeof (Il2CppMethodDefinition))); - - /* Pass 1: CodeRegistration-shaped pair {count32, pad?, ptr}, loose - * thresholds (small absolute floor). Pass 2: generic {count32, ptr} - * fallback with stricter sample-fraction thresholds. */ - for (int pass = 0; pass < 2 && !found; pass++) { - const ut32 min_count = (pass == 0)? 32: 64; - for (int i = 0; i < pe.nsecs && !found; i++) { - PeSec *s = &pe.secs[i]; - if (!pe_is_data_section (s) || s->filesize < (ut64) (8 + ptrsz)) { - continue; - } - const ut8 *buf = pe.file + s->fileoff; - for (ut64 off = 0; off + (ut64) (8 + ptrsz) <= s->filesize; off += 4) { - ut32 cnt = r_read_le32 (buf + off); - if (cnt < min_count) { - continue; - } - if (pass == 1 && expected && (cnt > expected * 2 || cnt < expected / 2)) { - continue; - } - ut64 arrptr = (ptrsz == 8) - ? r_read_le64 (buf + off + 8) - : (ut64)r_read_le32 (buf + off + 4); - ut32 sample = R_MIN ((ut32)128, cnt); - ut32 min_seen = (pass == 0)? 8: sample / 2; - ut32 min_good = (pass == 0)? 8: (sample * 3) / 4; - if (pe_probe_table (&pe, arrptr, cnt, ptrsz, text_lo, text_hi, min_seen, min_good, method_count, candidates)) { - R_LOG_DEBUG ("[pe] pass=%d arrptr=0x%" PFMT64x " cnt=%u", pass, arrptr, cnt); - found = true; - break; - } - } - } - } - - if (!found) { - R_FREE (candidates); - pe_free (&pe); - return false; - } - *out_ptrs = candidates; - pe_free (&pe); - return true; -} diff --git a/src/main.c b/src/main.c index 3ed9b66..889372b 100644 --- a/src/main.c +++ b/src/main.c @@ -9,6 +9,20 @@ #include #include "lib/lib.h" +typedef struct { + R2UnitySymbolOverride *items; + size_t count; +} CliSymbolOverrides; + +typedef struct { + bool as_json; + bool as_r2; + bool fast; + bool quiet; + long limit; + const R2UnityNativeOptions *native; +} CliEmitOptions; + /* System.Reflection.MethodAttributes subset. Returns an owned string *(possibly empty) describing the visibility/flags for a managed method. */ static char *method_attrs(unsigned flags) { @@ -62,10 +76,12 @@ static void print_usage(FILE *out, const char *prog_name) { " -a 0xADDR Read the method pointer table starting at virtual address 0xADDR\n" " -c Enumerate classes, inheritance, methods, and fields\n" " -D Detect companion files from the given executable path and exit\n" - " -f Fast path: auto-detect ELF/Mach-O/PE and scan method pointers\n" + " -f Recover native method pointers through symbols/sections\n" + " -H Force section-scan fallback instead of CodeRegistration\n" " -h Show this help and exit\n" " -j One-line JSON status, or JSON output with -c/-P/-R/-S\n" " -l N Limit emitted entries to N\n" + " -O N=A Override a native symbol address, e.g. g_CodeRegistration=0x1234\n" " -P Enumerate P/Invoke (managed -> native) methods\n" " -q Quiet mode: omit banner and informational comments\n" " -r Emit r2 script commands; pairs with -c/-P/-R\n" @@ -94,6 +110,57 @@ static void print_usage(FILE *out, const char *prog_name) { prog_name); } +static bool add_symbol_override(CliSymbolOverrides *overrides, const char *arg) { + if (!overrides || !arg) { + return false; + } + const char *eq = strchr (arg, '='); + if (!eq || eq == arg || !eq[1]) { + return false; + } + R2UnitySymbolOverride *items = realloc (overrides->items, (overrides->count + 1) * sizeof (*items)); + if (!items) { + return false; + } + overrides->items = items; + char *name = r_str_ndup (arg, (int)(eq - arg)); + if (!name) { + return false; + } + overrides->items[overrides->count].name = name; + overrides->items[overrides->count].va = (ut64)strtoull (eq + 1, NULL, 0); + overrides->count++; + return true; +} + +static void free_symbol_overrides(CliSymbolOverrides *overrides) { + if (!overrides) { + return; + } + for (size_t i = 0; i < overrides->count; i++) { + free ((char *)overrides->items[i].name); + } + R_FREE (overrides->items); + overrides->count = 0; +} + +static void pj_native_result(PJ *pj, const R2UnityNativeResult *native) { + pj_ks (pj, "native_source", native? r2unity_native_source_name (native->source): "none"); + pj_kn (pj, "code_registration", native? native->code_registration_va: 0); + pj_kn (pj, "metadata_registration", native? native->metadata_registration_va: 0); + pj_kn (pj, "method_pointers", native? native->method_pointers_va: 0); + pj_kn (pj, "code_gen_modules", native? native->code_gen_modules_va: 0); +} + +static void print_native_comment(const R2UnityNativeResult *native) { + printf ("# native_source=%s code_registration=0x%" PFMT64x " metadata_registration=0x%" PFMT64x " method_pointers=0x%" PFMT64x " code_gen_modules=0x%" PFMT64x "\n", + native? r2unity_native_source_name (native->source): "none", + native? native->code_registration_va: 0, + native? native->metadata_registration_va: 0, + native? native->method_pointers_va: 0, + native? native->code_gen_modules_va: 0); +} + static void json_escape(FILE *f, const char *s) { for (; s && *s; s++) { unsigned char c = (unsigned char)*s; @@ -409,31 +476,6 @@ static int emit_detected_paths(const char *input, bool as_json) { return 0; } -/* Sniff the exe magic and dispatch to the matching fast finder. */ -static bool find_method_pointers_fast(R2UnityMetadata *meta, const char *path, ut64 **out_ptrs) { - ut8 magic[4] = { 0 }; - FILE *fp = fopen (path, "rb"); - if (fp) { - if (fread (magic, 1, sizeof (magic), fp) != sizeof (magic)) { - memset (magic, 0, sizeof (magic)); - } - fclose (fp); - } - if (!memcmp (magic, "\x7f" - "ELF", - 4)) { - return r2unity_find_method_pointers_elf (meta, path, out_ptrs); - } - ut32 m = r_read_le32 (magic); - if (m == 0xfeedfacf || m == 0xcffaedfe || m == 0xcafebabe || m == 0xbebafeca) { - return r2unity_find_method_pointers_macho (meta, path, out_ptrs); - } - if (magic[0] == 'M' && magic[1] == 'Z') { - return r2unity_find_method_pointers_pe (meta, path, out_ptrs); - } - return false; -} - /* Emit one method as r2 script commands. Methods without a plausible native * address (addr <= 0x1000) are skipped entirely and do not count against the * emit limit. Returns true when output was produced. */ @@ -528,48 +570,27 @@ static char *method_name_or_fallback(R2UnityMetadata *meta, const Il2CppMethodDe return r_str_newf ("method.%zu", index); } -static void sanitize_ic_name(char *s) { - if (!s) { - return; +static char *r2_ic_name(const char *prefix, const char *name, size_t index) { + char *out = R_STR_ISNOTEMPTY (name)? r_str_newf ("%s", name): r_str_newf ("%s_%zu", prefix, index); + if (out) { + r_name_filter (out, -1); } - r_name_filter (s, -1); - for (char *p = s; *p; p++) { + for (char *p = out; p && *p; p++) { if (*p == '.' || *p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') { *p = '_'; } } -} - -static char *r2_class_name(const char *name, size_t index) { - char *out = (name && *name)? strdup (name): r_str_newf ("type_%zu", index); - sanitize_ic_name (out); - if (!out || !*out) { - free (out); - out = r_str_newf ("type_%zu", index); - } - return out; -} - -static char *r2_method_name(const char *name, size_t index) { - char *out = (name && *name)? strdup (name): r_str_newf ("method_%zu", index); - sanitize_ic_name (out); - if (!out || !*out) { - free (out); - out = r_str_newf ("method_%zu", index); + if (R_STR_ISEMPTY (out)) { + R_FREE (out); + return r_str_newf ("%s_%zu", prefix, index); } return out; } static void pj_hex(PJ *pj, const char *key, ut64 value, int width) { - RStrBuf *sb = r_strbuf_new (""); - if (!sb) { - pj_knull (pj, key); - return; - } - r_strbuf_appendf (sb, "0x%0*" PFMT64x, width, value); - char *s = r_strbuf_drain (sb); - pj_ks (pj, key, s? s: ""); - free (s); + char hex[64]; + snprintf (hex, sizeof (hex), "0x%0*" PFMT64x, width, value); + pj_ks (pj, key, hex); } static void pj_string_or_null(PJ *pj, const char *key, const char *value) { @@ -749,7 +770,7 @@ static void emit_class_r2(R2UnityMetadata *meta, bool quiet) { const Il2CppTypeDefinition *td = &types[type_index]; char *name = r2unity_type_fullname (meta, td, type_index, R2U_NAME_FALLBACK_TYPE); - char *r2klass = r2_class_name (name, type_index); + char *r2klass = r2_ic_name ("type", name, type_index); if (!quiet) { char *base = type_name_from_index (meta, types, type_count, td->parentIndex, false); if (base) { @@ -769,7 +790,7 @@ static void emit_class_r2(R2UnityMetadata *meta, continue; } char *mname = method_name_or_fallback (meta, &methods[mi], mi); - char *r2meth = r2_method_name (mname, mi); + char *r2meth = r2_ic_name ("method", mname, mi); printf ("ic+%s.%s @ 0x%" PFMT64x "\n", r2klass, r2meth, addr); free (r2meth); free (mname); @@ -795,12 +816,12 @@ static void emit_class_r2(R2UnityMetadata *meta, free (name); } -static int emit_classes(R2UnityMetadata *meta, const char *exe_path, const char *metadata_path, bool as_json, bool as_r2, bool fast, bool quiet, long limit) { - if (as_json && as_r2) { +static int emit_classes(R2UnityMetadata *meta, const char *exe_path, const char *metadata_path, const CliEmitOptions *opts) { + if (opts->as_json && opts->as_r2) { R_LOG_ERROR ("-j and -r are mutually exclusive with -c"); return 1; } - if (fast && (!exe_path || !*exe_path)) { + if (opts->fast && (!exe_path || !*exe_path)) { R_LOG_ERROR ("-c -f requires both executable and metadata paths"); return 1; } @@ -818,26 +839,20 @@ static int emit_classes(R2UnityMetadata *meta, const char *exe_path, const char size_t interface_count = 0; int32_t *interfaces = r2unity_get_type_index_table (meta, R2U_SEC_INTERFACES, &interface_count); + R2UnityNativeResult native_result = { 0 }; ut64 *method_ptrs = NULL; bool has_ptrs = false; - if (fast) { - has_ptrs = find_method_pointers_fast (meta, exe_path, &method_ptrs); - } - if (method_ptrs && !has_ptrs) { - for (size_t k = 0; k < method_count; k++) { - if (method_ptrs[k]) { - has_ptrs = true; - break; - } - } + if (opts->fast) { + has_ptrs = r2unity_find_method_pointers (meta, exe_path, opts->native, &native_result); + method_ptrs = native_result.method_ptrs; } size_t max = type_count; - if (limit >= 0 && (size_t)limit < max) { - max = (size_t)limit; + if (opts->limit >= 0 && (size_t)opts->limit < max) { + max = (size_t)opts->limit; } - if (as_json) { + if (opts->as_json) { PJ *pj = pj_new (); if (!pj) { R_LOG_ERROR ("unable to allocate JSON builder"); @@ -853,6 +868,7 @@ static int emit_classes(R2UnityMetadata *meta, const char *exe_path, const char pj_ki (pj, "version", meta->version); pj_ks (pj, "unity_range", r2unity_unity_range_from_wire (meta->version)); pj_kb (pj, "has_ptrs", has_ptrs); + pj_native_result (pj, &native_result); pj_kn (pj, "types", (ut64)type_count); pj_kn (pj, "methods", (ut64)method_count); pj_kn (pj, "fields", (ut64)field_count); @@ -867,19 +883,20 @@ static int emit_classes(R2UnityMetadata *meta, const char *exe_path, const char puts (out); free (out); } - } else if (as_r2) { - if (!quiet) { + } else if (opts->as_r2) { + if (!opts->quiet) { printf ("# r2 script generated by r2unity -c\n"); printf ("# Input file: %s\n", metadata_path && *metadata_path? metadata_path: "-"); + print_native_comment (&native_result); if (!has_ptrs) { printf ("# Method ic+ entries need native addresses; pass -f with an executable to recover them.\n"); } } for (size_t i = 0; i < max; i++) { - emit_class_r2 (meta, types, type_count, methods, method_count, fields, field_count, method_ptrs, has_ptrs, i, quiet); + emit_class_r2 (meta, types, type_count, methods, method_count, fields, field_count, method_ptrs, has_ptrs, i, opts->quiet); } } else { - if (!quiet) { + if (!opts->quiet) { printf ("# classes from %s\n", metadata_path && *metadata_path? metadata_path: "-"); printf ("# wire_version=%d (%s) types=%zu methods=%zu fields=%zu\n", meta->version, @@ -893,7 +910,7 @@ static int emit_classes(R2UnityMetadata *meta, const char *exe_path, const char } } - R_FREE (method_ptrs); + r2unity_native_result_fini (&native_result); R_FREE (interfaces); R_FREE (fields); R_FREE (methods); @@ -915,84 +932,116 @@ int main(int argc, char *argv[]) { bool string_literals = false; bool detect_paths = false; bool classes = false; + R2UnityNativeOptions native_options = { 0 }; + CliSymbolOverrides symbol_overrides = { 0 }; int opt; - while ((opt = getopt (argc, argv, "chjrqfVvSPRDzl:a:")) != -1) { + RGetopt go; + r_getopt_init (&go, argc, (const char **)argv, "chjrqfHVvSPRDzl:a:O:"); + while ((opt = r_getopt_next (&go)) != -1) { switch (opt) { case 'c': classes = true; break; case 'j': json_one_line = true; break; case 'r': r2_script = true; break; case 'q': quiet = true; break; case 'f': fast = true; break; + case 'H': + fast = true; + native_options.force_heuristic = true; + break; case 'V': debug = true; break; case 'v': printf ("r2unity %s\n", R2UNITY_VERSION); + free_symbol_overrides (&symbol_overrides); return 0; case 'S': sbom = true; break; case 'P': pinvokes = true; break; case 'R': reverse_pinvokes = true; break; case 'D': detect_paths = true; break; case 'z': string_literals = true; break; - case 'l': limit = strtol (optarg, NULL, 0); break; - case 'a': gmp_addr = (ut64)strtoull (optarg, NULL, 0); break; + case 'l': limit = strtol (go.arg, NULL, 0); break; + case 'a': gmp_addr = (ut64)strtoull (go.arg, NULL, 0); break; + case 'O': + if (!add_symbol_override (&symbol_overrides, go.arg)) { + R_LOG_ERROR ("invalid -O argument; expected name=addr"); + free_symbol_overrides (&symbol_overrides); + return 1; + } + fast = true; + break; case 'h': print_usage (stdout, argv[0]); + free_symbol_overrides (&symbol_overrides); return 0; default: print_usage (stderr, argv[0]); + free_symbol_overrides (&symbol_overrides); return 1; } } + native_options.symbols = symbol_overrides.items; + native_options.symbols_count = symbol_overrides.count; if (pinvokes && reverse_pinvokes) { R_LOG_ERROR ("-P and -R are mutually exclusive"); + free_symbol_overrides (&symbol_overrides); return 1; } if (detect_paths) { - if (argc - optind != 1) { + if (argc - go.ind != 1) { print_usage (stderr, argv[0]); + free_symbol_overrides (&symbol_overrides); return 1; } - return emit_detected_paths (argv[optind], json_one_line); + int rc = emit_detected_paths (argv[go.ind], json_one_line); + free_symbol_overrides (&symbol_overrides); + return rc; } if (classes) { if (sbom || pinvokes || reverse_pinvokes || string_literals) { R_LOG_ERROR ("-c cannot be combined with -S, -P, -R or -z"); + free_symbol_overrides (&symbol_overrides); return 1; } - if (argc - optind != 1 && argc - optind != 2) { + if (argc - go.ind != 1 && argc - go.ind != 2) { print_usage (stderr, argv[0]); + free_symbol_overrides (&symbol_overrides); return 1; } - if (fast && argc - optind != 2) { + if (fast && argc - go.ind != 2) { R_LOG_ERROR ("-c -f requires both executable and metadata paths"); + free_symbol_overrides (&symbol_overrides); return 1; } } else if (string_literals) { if (json_one_line || fast || gmp_addr || sbom || pinvokes || reverse_pinvokes) { R_LOG_ERROR ("-z cannot be combined with -j, -f, -a, -S, -P or -R"); + free_symbol_overrides (&symbol_overrides); return 1; } - if (argc - optind != 1 && argc - optind != 2) { + if (argc - go.ind != 1 && argc - go.ind != 2) { print_usage (stderr, argv[0]); + free_symbol_overrides (&symbol_overrides); return 1; } - } else if (argc - optind != 2) { + } else if (argc - go.ind != 2) { print_usage (stderr, argv[0]); + free_symbol_overrides (&symbol_overrides); return 1; } const char *exe_path = NULL; const char *metadata_path = NULL; - if ((string_literals || classes) && argc - optind == 1) { + if ((string_literals || classes) && argc - go.ind == 1) { exe_path = ""; - metadata_path = argv[optind]; + metadata_path = argv[go.ind]; } else { - exe_path = argv[optind]; - metadata_path = argv[optind + 1]; + exe_path = argv[go.ind]; + metadata_path = argv[go.ind + 1]; } RBuffer *buf = r_buf_new_file (metadata_path, O_RDONLY, 0); if (!buf) { perror ("Error opening file"); + free_symbol_overrides (&symbol_overrides); return 1; } @@ -1003,6 +1052,7 @@ int main(int argc, char *argv[]) { if (!meta) { R_LOG_ERROR ("Failed to parse metadata"); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return 1; } @@ -1017,6 +1067,7 @@ int main(int argc, char *argv[]) { } r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return rc; } @@ -1025,6 +1076,7 @@ int main(int argc, char *argv[]) { R_LOG_ERROR ("-j and -r are mutually exclusive"); r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return 1; } int rc = reverse_pinvokes @@ -1032,13 +1084,23 @@ int main(int argc, char *argv[]) { : emit_pinvokes (meta, exe_path, json_one_line, r2_script, quiet); r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return rc; } if (classes) { - int rc = emit_classes (meta, exe_path, metadata_path, json_one_line, r2_script, fast, quiet, limit); + CliEmitOptions emit_opts = { + .as_json = json_one_line, + .as_r2 = r2_script, + .fast = fast, + .quiet = quiet, + .limit = limit, + .native = &native_options + }; + int rc = emit_classes (meta, exe_path, metadata_path, &emit_opts); r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return rc; } @@ -1046,6 +1108,7 @@ int main(int argc, char *argv[]) { int rc = emit_string_literals (meta, metadata_path, quiet, limit); r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return rc; } @@ -1055,42 +1118,45 @@ int main(int argc, char *argv[]) { size_t method_count = 0; Il2CppMethodDefinition *methods = r2unity_get_method_definitions (meta, &method_count); + R2UnityNativeResult native_result = { 0 }; ut64 *method_ptrs = NULL; bool has_ptrs = false; if (gmp_addr) { R_LOG_WARN ("Manual method-pointer table reading (-a) is not implemented"); } else if (fast) { - has_ptrs = find_method_pointers_fast (meta, exe_path, &method_ptrs); - } - /* Fast-path may prealloc an all-zero table; upgrade has_ptrs only if any - * non-zero entry survived. */ - if (method_ptrs && !has_ptrs) { - for (size_t k = 0; k < method_count; k++) { - if (method_ptrs[k]) { - has_ptrs = true; - break; - } - } + has_ptrs = r2unity_find_method_pointers (meta, exe_path, &native_options, &native_result); + method_ptrs = native_result.method_ptrs; } if (json_one_line) { - // Output a single stable JSON line - printf ("{\"ok\":true,\"version\":%d,\"types\":%u,\"methods\":%u,\"has_ptrs\":%s}\n", - meta->version, - (unsigned)type_count, - (unsigned)method_count, - has_ptrs? "true": "false"); - R_FREE (method_ptrs); + PJ *pj = pj_new (); + pj_o (pj); + pj_kb (pj, "ok", true); + pj_ki (pj, "version", meta->version); + pj_kn (pj, "types", (ut64)type_count); + pj_kn (pj, "methods", (ut64)method_count); + pj_kb (pj, "has_ptrs", has_ptrs); + pj_native_result (pj, &native_result); + pj_end (pj); + char *out = pj_drain (pj); + if (out) { + puts (out); + free (out); + } + r2unity_native_result_fini (&native_result); R_FREE (methods); R_FREE (types); r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return 0; } if (!quiet) { printf ("# r2 script generated by r2unity\n"); - printf ("# Input file: %s\n\n", metadata_path); + printf ("# Input file: %s\n", metadata_path); + print_native_comment (&native_result); + printf ("\n"); } size_t img_count = 0; @@ -1122,7 +1188,7 @@ int main(int argc, char *argv[]) { } } - R_FREE (method_ptrs); + r2unity_native_result_fini (&native_result); R_FREE (methods); R_FREE (types); R_FREE (images); @@ -1130,6 +1196,7 @@ int main(int argc, char *argv[]) { r2unity_free_metadata (meta); r_unref (buf); + free_symbol_overrides (&symbol_overrides); return 0; } diff --git a/src/r2/core_r2unity.c b/src/r2/core_r2unity.c index 329e0a2..24ab9b5 100644 --- a/src/r2/core_r2unity.c +++ b/src/r2/core_r2unity.c @@ -21,6 +21,9 @@ static const char *g_help_msg[] = { "r2unity-S", "[j]", "emit managed-assembly SBOM (text or JSON)", "Variables:", "", "", "r2unity.metadata", "", "path to global-metadata.dat", "r2unity.library", "", "path to IL2CPP native library", + "r2unity.code_registration", "", "Il2CppCodeRegistration VA override/resolved VA", + "r2unity.metadata_registration", "", "Il2CppMetadataRegistration VA override/resolved VA", + "r2unity.force_heuristic", "", "force section-scan fallback", NULL }; // clang-format on @@ -75,16 +78,108 @@ static const char *current_binary_path(RCore *core) { return NULL; } -static const char *cfg_get_nonempty(RConfig *cfg, const char *key) { - const char *v = r_config_get (cfg, key); - return (v && *v)? v: NULL; +static ut64 flag_addr_prefixed(RCore *core, const char *prefix, const char *name) { + char buf[256]; + int n = snprintf (buf, sizeof (buf), "%s%s", prefix, name); + if (n > 0 && n < (int)sizeof (buf)) { + RFlagItem *fi = r_flag_get (core->flags, buf); + if (fi && fi->addr) { + return fi->addr; + } + } + if (*name != '_') { + n = snprintf (buf, sizeof (buf), "%s_%s", prefix, name); + if (n > 0 && n < (int)sizeof (buf)) { + RFlagItem *fi = r_flag_get (core->flags, buf); + return fi? fi->addr: 0; + } + } + return 0; +} + +static ut64 flag_addr_native_alias(RCore *core, const char *const *names) { + static const char *const prefixes[] = { + "", + "sym.", + "obj.", + NULL + }; + if (!core || !core->flags || !names) { + return 0; + } + for (size_t i = 0; names[i]; i++) { + for (size_t j = 0; prefixes[j]; j++) { + ut64 addr = flag_addr_prefixed (core, prefixes[j], names[i]); + if (addr) { + return addr; + } + } + } + return 0; +} + +static bool current_binary_matches_path(RCore *core, const char *path) { + if (!path || !*path) { + return true; + } + const char *cur = current_binary_path (core); + if (!cur || !*cur) { + return false; + } + if (!strcmp (cur, path)) { + return true; + } + return !strcmp (r_file_basename (cur), r_file_basename (path)); +} + +static void native_options_from_core(RCore *core, R2UnityNativeOptions *opts) { + memset (opts, 0, sizeof (*opts)); + opts->force_heuristic = r_config_get_b (core->config, "r2unity.force_heuristic"); + opts->code_registration_va = r_config_get_i (core->config, "r2unity.code_registration"); + opts->metadata_registration_va = r_config_get_i (core->config, "r2unity.metadata_registration"); + if (!opts->code_registration_va) { + opts->code_registration_va = flag_addr_native_alias (core, r2unity_native_code_registration_names ()); + } + if (!opts->metadata_registration_va) { + opts->metadata_registration_va = flag_addr_native_alias (core, r2unity_native_metadata_registration_names ()); + } +} + +static void native_result_to_config(RCore *core, const R2UnityNativeResult *result) { + if (!core || !result) { + return; + } + if (result->code_registration_va) { + r_config_set_i (core->config, "r2unity.code_registration", result->code_registration_va); + } + if (result->metadata_registration_va) { + r_config_set_i (core->config, "r2unity.metadata_registration", result->metadata_registration_va); + } +} + +static void pj_native_result(PJ *pj, const R2UnityNativeResult *result) { + pj_ks (pj, "native_source", result? r2unity_native_source_name (result->source): "none"); + pj_kn (pj, "code_registration", result? result->code_registration_va: 0); + pj_kn (pj, "metadata_registration", result? result->metadata_registration_va: 0); + pj_kn (pj, "method_pointers", result? result->method_pointers_va: 0); + pj_kn (pj, "code_gen_modules", result? result->code_gen_modules_va: 0); +} + +static void print_native_result(RCore *core, const R2UnityNativeResult *result) { + r_cons_printf (core->cons, + "# native_source=%s code_registration=0x%" PFMT64x " metadata_registration=0x%" PFMT64x " method_pointers=0x%" PFMT64x " code_gen_modules=0x%" PFMT64x "\n", + result? r2unity_native_source_name (result->source): "none", + result? result->code_registration_va: 0, + result? result->metadata_registration_va: 0, + result? result->method_pointers_va: 0, + result? result->code_gen_modules_va: 0); } /* Resolve (and on first use, cache into the eval vars) the metadata path for * the current session. Returns a pointer owned by the RConfig, or NULL. */ static const char *resolve_metadata_path(RCore *core) { - const char *v = cfg_get_nonempty (core->config, "r2unity.metadata"); - if (v) { + const char *v = r_config_get (core->config, "r2unity.metadata"); + if (R_STR_ISNOTEMPTY (v)) { return v; } const char *bin = current_binary_path (core); @@ -102,12 +197,13 @@ static const char *resolve_metadata_path(RCore *core) { r_config_set (core->config, "r2unity.library", p->il2cpp_binary); } r2unity_free_paths (p); - return cfg_get_nonempty (core->config, "r2unity.metadata"); + v = r_config_get (core->config, "r2unity.metadata"); + return R_STR_ISNOTEMPTY (v)? v: NULL; } static const char *resolve_library_path(RCore *core) { - const char *v = cfg_get_nonempty (core->config, "r2unity.library"); - if (v) { + const char *v = r_config_get (core->config, "r2unity.library"); + if (R_STR_ISNOTEMPTY (v)) { return v; } const char *bin = current_binary_path (core); @@ -125,7 +221,8 @@ static const char *resolve_library_path(RCore *core) { r_config_set (core->config, "r2unity.metadata", p->metadata); } r2unity_free_paths (p); - return cfg_get_nonempty (core->config, "r2unity.library"); + v = r_config_get (core->config, "r2unity.library"); + return R_STR_ISNOTEMPTY (v)? v: NULL; } static R2UnityMetadata *open_metadata(RCore *core, RBuffer **out_buf) { @@ -158,7 +255,7 @@ static void close_metadata(R2UnityMetadata *meta, RBuffer *buf) { } } -static void json_escape_cons(PJ *pj, const char *key, const char *value) { +static void pj_string_or_null(PJ *pj, const char *key, const char *value) { if (value) { pj_ks (pj, key, value); } else { @@ -198,11 +295,11 @@ static int cmd_detect(RCore *core, bool as_json) { PJ *pj = pj_new (); pj_o (pj); pj_kb (pj, "ok", true); - json_escape_cons (pj, "platform", p->platform); - json_escape_cons (pj, "main_executable", p->main_executable); - json_escape_cons (pj, "il2cpp_binary", p->il2cpp_binary); - json_escape_cons (pj, "metadata", p->metadata); - json_escape_cons (pj, "data_dir", p->data_dir); + pj_string_or_null (pj, "platform", p->platform); + pj_string_or_null (pj, "main_executable", p->main_executable); + pj_string_or_null (pj, "il2cpp_binary", p->il2cpp_binary); + pj_string_or_null (pj, "metadata", p->metadata); + pj_string_or_null (pj, "data_dir", p->data_dir); pj_end (pj); r_cons_println (core->cons, pj_string (pj)); pj_free (pj); @@ -262,27 +359,18 @@ static int cmd_info(RCore *core, bool as_json) { return 0; } -/* Sniff the exe magic and dispatch to the matching fast finder. */ -static bool find_method_pointers(R2UnityMetadata *meta, const char *path, ut64 **out_ptrs) { - ut8 magic[4] = { 0 }; - FILE *fp = fopen (path, "rb"); - if (fp) { - (void)fread (magic, 1, 4, fp); - fclose (fp); - } - if (!memcmp (magic, "\x7f" - "ELF", - 4)) { - return r2unity_find_method_pointers_elf (meta, path, out_ptrs); - } - ut32 m = r_read_le32 (magic); - if (m == 0xfeedfacf || m == 0xcffaedfe || m == 0xcafebabe || m == 0xbebafeca) { - return r2unity_find_method_pointers_macho (meta, path, out_ptrs); +static bool find_method_pointers(RCore *core, R2UnityMetadata *meta, const char *path, R2UnityNativeResult *result) { + R2UnityNativeOptions opts = { 0 }; + native_options_from_core (core, &opts); + bool ok = false; + if (core && core->bin && r_bin_cur (core->bin) && current_binary_matches_path (core, path)) { + ok = r2unity_find_method_pointers_rbin (meta, core->bin, r_bin_cur (core->bin), &opts, result); } - if (magic[0] == 'M' && magic[1] == 'Z') { - return r2unity_find_method_pointers_pe (meta, path, out_ptrs); + if (!ok && path && *path) { + ok = r2unity_find_method_pointers_simple (meta, path, &opts, result); } - return false; + native_result_to_config (core, result); + return ok; } static int type_definition_for_type_index(const Il2CppTypeDefinition *types, size_t type_count, int32_t type_index) { @@ -325,44 +413,21 @@ static char *method_name_or_fallback(R2UnityMetadata *meta, const Il2CppMethodDe return r_str_newf ("method.%zu", index); } -static void sanitize_ic_name(char *s) { - if (!s) { - return; +static char *r2_ic_name(const char *prefix, const char *name, size_t index, bool append_index) { + char *out = R_STR_ISNOTEMPTY (name) + ? (append_index? r_str_newf ("%s_%zu", name, index): r_str_newf ("%s", name)) + : r_str_newf ("%s_%zu", prefix, index); + if (out) { + r_name_filter (out, -1); } - r_name_filter (s, -1); - for (char *p = s; *p; p++) { + for (char *p = out; p && *p; p++) { if (*p == '.' || *p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') { *p = '_'; } } -} - -static char *r2_class_name(const char *name, size_t index) { - char *out = (name && *name)? strdup (name): r_str_newf ("type_%zu", index); - sanitize_ic_name (out); - if (!out || !*out) { - free (out); - out = r_str_newf ("type_%zu", index); - } - return out; -} - -static char *r2_method_name(const char *name, size_t index) { - char *out = (name && *name)? r_str_newf ("%s_%zu", name, index): r_str_newf ("method_%zu", index); - sanitize_ic_name (out); - if (!out || !*out) { - free (out); - out = r_str_newf ("method_%zu", index); - } - return out; -} - -static char *r2_field_name(const char *name, size_t index) { - char *out = (name && *name)? strdup (name): r_str_newf ("field_%zu", index); - sanitize_ic_name (out); - if (!out || !*out) { - free (out); - out = r_str_newf ("field_%zu", index); + if (R_STR_ISEMPTY (out)) { + R_FREE (out); + return r_str_newf ("%s_%zu", prefix, index); } return out; } @@ -373,14 +438,6 @@ static void pj_hex(PJ *pj, const char *key, ut64 value, int width) { pj_ks (pj, key, hex); } -static void pj_string_or_null(PJ *pj, const char *key, const char *value) { - if (value) { - pj_ks (pj, key, value); - } else { - pj_knull (pj, key); - } -} - static void emit_class_text(RCore *core, R2UnityMetadata *meta, const Il2CppTypeDefinition *types, @@ -553,7 +610,7 @@ static void emit_class_r2(RCore *core, size_t type_index) { const Il2CppTypeDefinition *td = &types[type_index]; char *name = r2unity_type_fullname (meta, td, type_index, R2U_NAME_FALLBACK_TYPE); - char *r2klass = r2_class_name (name, type_index); + char *r2klass = r2_ic_name ("type", name, type_index, false); char *base = type_name_from_index (meta, types, type_count, td->parentIndex, false); if (base) { r_cons_printf (core->cons, "# class %s : %s\n", name? name: r2klass, base); @@ -562,7 +619,7 @@ static void emit_class_r2(RCore *core, } r_cons_printf (core->cons, "ic+%s @ 0\n", r2klass); if (base) { - char *r2base = r2_class_name (base, 0); + char *r2base = r2_ic_name ("type", base, 0, false); r_cons_printf (core->cons, "ic+%s:%s @ 0\n", r2klass, r2base); free (r2base); } @@ -571,7 +628,7 @@ static void emit_class_r2(RCore *core, for (size_t k = 0; k < td->interfaces_count && (size_t)(td->interfacesStart + k) < interface_count; k++) { char *iname = type_name_from_index (meta, types, type_count, interfaces[td->interfacesStart + k], false); if (iname) { - char *r2iface = r2_class_name (iname, 0); + char *r2iface = r2_ic_name ("type", iname, 0, false); r_cons_printf (core->cons, "ic+%s:%s @ 0\n", r2klass, r2iface); free (r2iface); } @@ -585,7 +642,7 @@ static void emit_class_r2(RCore *core, ut64 addr = (has_ptrs && method_ptrs)? method_ptrs[mi]: 0; addr = addr > 0x1000? addr: 0; char *mname = method_name_or_fallback (meta, &methods[mi], mi); - char *r2meth = r2_method_name (mname, mi); + char *r2meth = r2_ic_name ("method", mname, mi, true); r_cons_printf (core->cons, "ic+%s.%s @ 0x%" PFMT64x "\n", r2klass, r2meth, addr); free (r2meth); free (mname); @@ -595,7 +652,7 @@ static void emit_class_r2(RCore *core, for (int k = 0; fields && k < td->field_count && fstart >= 0 && (size_t)(fstart + k) < field_count; k++) { size_t fi = (size_t)(fstart + k); char *fname = field_name_or_fallback (meta, &fields[fi], fi); - char *r2field = r2_field_name (fname, fi); + char *r2field = r2_ic_name ("field", fname, fi, false); char *ftype = type_name_from_index (meta, types, type_count, fields[fi].typeIndex, true); r_cons_printf (core->cons, "ic+%s..%s %s @ 0\n", r2klass, @@ -630,19 +687,13 @@ static int cmd_classes(RCore *core, char mode) { size_t interface_count = 0; int32_t *interfaces = r2unity_get_type_index_table (meta, R2U_SEC_INTERFACES, &interface_count); + R2UnityNativeResult native_result = { 0 }; ut64 *method_ptrs = NULL; bool has_ptrs = false; const char *lib = resolve_library_path (core); if (lib && *lib) { - has_ptrs = find_method_pointers (meta, lib, &method_ptrs); - } - if (method_ptrs && !has_ptrs) { - for (size_t k = 0; k < method_count; k++) { - if (method_ptrs[k]) { - has_ptrs = true; - break; - } - } + has_ptrs = find_method_pointers (core, meta, lib, &native_result); + method_ptrs = native_result.method_ptrs; } if (mode == 'j') { @@ -652,6 +703,7 @@ static int cmd_classes(RCore *core, char mode) { pj_ki (pj, "version", meta->version); pj_ks (pj, "unity_range", r2unity_unity_range_from_wire (meta->version)); pj_kb (pj, "has_ptrs", has_ptrs); + pj_native_result (pj, &native_result); pj_kn (pj, "types", (ut64)type_count); pj_kn (pj, "methods", (ut64)method_count); pj_kn (pj, "fields", (ut64)field_count); @@ -666,6 +718,7 @@ static int cmd_classes(RCore *core, char mode) { } else if (mode == '*') { r_cons_println (core->cons, "# r2 script generated by r2unity-c"); r_cons_printf (core->cons, "# Input file: %s\n", metadata_path && *metadata_path? metadata_path: "-"); + print_native_result (core, &native_result); if (!has_ptrs) { r_cons_println (core->cons, "# Method addresses default to 0; run r2unity-D or set r2unity.library to recover native addresses."); } @@ -685,7 +738,7 @@ static int cmd_classes(RCore *core, char mode) { } } - R_FREE (method_ptrs); + r2unity_native_result_fini (&native_result); R_FREE (interfaces); R_FREE (fields); R_FREE (methods); @@ -714,16 +767,9 @@ static int cmd_symbols(RCore *core, char mode) { size_t img_count = 0; Il2CppImageDefinition *images = r2unity_get_images (meta, &img_count); - ut64 *method_ptrs = NULL; - bool has_ptrs = find_method_pointers (meta, lib, &method_ptrs); - if (method_ptrs && !has_ptrs) { - for (size_t k = 0; k < method_count; k++) { - if (method_ptrs[k]) { - has_ptrs = true; - break; - } - } - } + R2UnityNativeResult native_result = { 0 }; + bool has_ptrs = find_method_pointers (core, meta, lib, &native_result); + ut64 *method_ptrs = native_result.method_ptrs; int *type2img = r2unity_build_type_image_map (images, img_count, type_count); @@ -734,7 +780,10 @@ static int cmd_symbols(RCore *core, char mode) { pj_kb (pj, "ok", true); pj_ki (pj, "version", meta->version); pj_kb (pj, "has_ptrs", has_ptrs); + pj_native_result (pj, &native_result); pj_ka (pj, "methods"); + } else if (mode == '*') { + print_native_result (core, &native_result); } ut64 applied = 0, listed = 0; @@ -833,7 +882,7 @@ static int cmd_symbols(RCore *core, char mode) { lib); } - R_FREE (method_ptrs); + r2unity_native_result_fini (&native_result); R_FREE (type2img); R_FREE (images); R_FREE (methods); @@ -1124,6 +1173,15 @@ static bool r2unity_init(RCorePluginSession *cps) { r_config_node_desc ( r_config_set (cfg, "r2unity.library", ""), "path to IL2CPP native library (empty = auto-detect)"); + r_config_node_desc ( + r_config_set (cfg, "r2unity.code_registration", ""), + "Il2CppCodeRegistration VA override (empty = flags/RBin symbols)"); + r_config_node_desc ( + r_config_set (cfg, "r2unity.metadata_registration", ""), + "Il2CppMetadataRegistration VA override (empty = flags/RBin symbols)"); + r_config_node_desc ( + r_config_set_b (cfg, "r2unity.force_heuristic", false), + "force section-scan fallback instead of CodeRegistration parsing"); r_config_lock (cfg, true); return true; }