diff --git a/panda/include/panda/panda_api.h b/panda/include/panda/panda_api.h index 0bad333c5ba..b58f10d9e91 100644 --- a/panda/include/panda/panda_api.h +++ b/panda/include/panda/panda_api.h @@ -68,6 +68,8 @@ CPUState* get_cpu(void); unsigned long garray_len(GArray *list); void panda_cleanup_record(void); + +void panda_page_fault(CPUState *cpu, target_ulong address, target_ulong retaddr); // END_PYPANDA_NEEDS_THIS -- do not delete this comment! // don't expose to API because we don't want to add siginfo_t understanding diff --git a/panda/python/core/pandare/arch.py b/panda/python/core/pandare/arch.py index 1a2860d3611..61b93d8a9be 100644 --- a/panda/python/core/pandare/arch.py +++ b/panda/python/core/pandare/arch.py @@ -114,7 +114,7 @@ def get_pc(self, cpu): else: raise RuntimeError(f"get_pc unsupported for {self.panda.arch_name}") - def _get_arg_reg(self, idx, convention): + def _get_arg_loc(self, idx, convention): ''' return the name of the argument [idx] for the given arch with calling [convention] ''' @@ -137,7 +137,7 @@ def set_arg(self, cpu, idx, val, convention='default'): Note for syscalls we define arg[0] as syscall number and then 1-index the actual args ''' - reg = self._get_arg_reg(idx, convention) + reg = self._get_arg_loc(idx, convention) return self.set_reg(cpu, reg, val) def get_arg(self, cpu, idx, convention='default'): @@ -145,15 +145,47 @@ def get_arg(self, cpu, idx, convention='default'): Return arg [idx] for given calling convention. This only works right as the guest is calling or has called a function before register values are clobbered. + If arg[idx] should be stack-based, name it stack_0, stack_1... this allows mixed + conventions where some args are in registers and others are on the stack (i.e., + mips32 syscalls). + + When doing a stack-based read, this function may raise a ValueError if the memory + read fails (i.e., paged out, invalid address). + Note for syscalls we define arg[0] as syscall number and then 1-index the actual args ''' - # i386 is stack based and so the convention wont work - if self.call_conventions[convention] == "stack": - return self.get_arg_stack(cpu, idx) - reg = self._get_arg_reg(idx, convention) - return self.get_reg(cpu, reg) + argloc = self._get_arg_loc(idx, convention) + + if self._is_stack_loc(argloc): + return self._read_stack(cpu, argloc) + else: + return self.get_reg(cpu, argloc) + + @staticmethod + def _is_stack_loc(argloc): + ''' + Given a name returned by self._get_arg_loc + check if it's the name of a stack offset + ''' + return argloc.startswith("stack_") + + def _read_stack(self, cpu, argloc): + ''' + Given a name like stack_X, calculate where + the X-th value on the stack is, then read it out of + memory and return it. + + May raise a ValueError if the memory read fails + ''' + # Stack based - get stack base, calculate offset, then try to read it + assert(self._is_stack_loc(argloc)), f"Can't get stack offset of {argloc}" + stack_idx = int(argloc.split("stack_")[1]) + stack_base = self.get_reg(cpu, self.reg_sp) + arg_sz = self.panda.bits // 8 + offset = arg_sz * (stack_idx+1) + return self.panda.virtual_memory_read(cpu, stack_base + offset, arg_sz, fmt='int') def set_retval(self, cpu, val, convention='default', failure=False): ''' @@ -362,7 +394,7 @@ def get_return_address(self, cpu): class MipsArch(PandaArch): ''' - Register names and accessors for MIPS + Register names and accessors for 32-bit MIPS ''' # Registers are: @@ -391,8 +423,9 @@ def __init__(self, panda): self.reg_sp = regnames.index('sp') self.reg_retaddr = regnames.index("ra") + # Default syscall/args are for mips o32 self.call_conventions = {"mips": ["A0", "A1", "A2", "A3"], - "syscall": ["V0", "A0", "A1", "A2", "A3"]} + "syscall": ["V0", "A0", "A1", "A2", "A3", "stack_1", "stack_2", "stack_3", "stack_4"]} self.call_conventions['default'] = self.call_conventions['mips'] self.reg_retval = {"default": "V0", @@ -452,7 +485,7 @@ def get_call_return(self, cpu): ''' .. Deprecated:: use get_return_address ''' - return self.get_return_addess(cpu) + return self.get_return_address(cpu) def get_return_address(self,cpu): ''' @@ -481,6 +514,33 @@ def set_retval(self, cpu, val, convention='default', failure=False): return super().set_retval(cpu, val, convention) +class Mips64Arch(MipsArch): + ''' + Register names and accessors for MIPS64. Inherits from MipsArch for everything + except the register name and call conventions. + ''' + + def __init__(self, panda): + super().__init__(panda) + regnames = ["zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra"] + + self.reg_sp = regnames.index('sp') + self.reg_retaddr = regnames.index("ra") + # Default syscall/args are for mips 64/n32 - note the registers are different than 32 + self.call_conventions = {"mips": ["A0", "A1", "A2", "A3"], # XXX Unsure? + "syscall": ["V0", "A0", "A1", "A2", "A3", "A4", "A5"]} + self.call_conventions['default'] = self.call_conventions['mips'] + + self.reg_retval = {"default": "V0", + "syscall": 'V0'} + + + # note names must be stored uppercase for get/set reg to work case-insensitively + self.registers = {regnames[idx].upper(): idx for idx in range(len(regnames)) } + class X86Arch(PandaArch): ''' Register names and accessors for x86 @@ -496,9 +556,9 @@ def __init__(self, panda): self.reg_retval = {"default": "EAX", "syscall": "EAX"} - self.call_conventions = {"stack": "stack", + self.call_conventions = {"cdecl": ["stack_{x}" for x in range(20)], # 20: arbitrary but big "syscall": ["EAX", "EBX", "ECX", "EDX", "ESI", "EDI", "EBP"]} - self.call_conventions['default'] = self.call_conventions['stack'] + self.call_conventions['default'] = self.call_conventions['cdecl'] self.reg_sp = regnames.index('ESP') self.registers = {regnames[idx]: idx for idx in range(len(regnames)) } @@ -541,14 +601,6 @@ def get_return_address(self,cpu): esp = self.get_reg(cpu,"ESP") return self.panda.virtual_memory_read(cpu,esp,4,fmt='int') - # we need this because X86 is stack based - def get_arg_stack(self, cpu, num, kernel=False): - ''' - Gets arguments based on the number. Supports kernel and usermode. - ''' - esp = self.get_reg(cpu, "ESP") - return self.panda.virtual_memory_read(cpu, esp+(4*(num+1)),4,fmt='int') - class X86_64Arch(PandaArch): ''' Register names and accessors for x86_64 diff --git a/panda/python/core/pandare/panda.py b/panda/python/core/pandare/panda.py index e3dea00a62f..74e81ea5d42 100755 --- a/panda/python/core/pandare/panda.py +++ b/panda/python/core/pandare/panda.py @@ -37,7 +37,7 @@ from .panda_expect import Expect from .asyncthread import AsyncThread from .qcows import Qcows -from .arch import ArmArch, Aarch64Arch, MipsArch, X86Arch, X86_64Arch +from .arch import ArmArch, Aarch64Arch, MipsArch, Mips64Arch, X86Arch, X86_64Arch # Might be worth importing and auto-initilizing a PLogReader # object within Panda for the current architecture? @@ -139,7 +139,7 @@ def __init__(self, arch="i386", mem="128M", elif self.arch_name in ["mips", "mipsel"]: self.arch = MipsArch(self) elif self.arch_name in ["mips64"]: - self.arch = MipsArch(self) # XXX: We probably need a different class? + self.arch = Mips64Arch(self) else: raise ValueError(f"Unsupported architecture {self.arch_name}") self.bits, self.endianness, self.register_size = self.arch._determine_bits() @@ -1001,24 +1001,27 @@ def read_str(self, cpu, ptr, max_length=None): idx += 1 return r.decode("utf8", "ignore") - def to_unsigned_guest(self, x): + def to_unsigned_guest(self, x, bits=None): ''' Convert a singed python int to an unsigned int32/unsigned int64 depending on guest bit-size Args: x (int): Python integer + bits (int): Number of bits to treat this value as. If unset, uses architecture default Returns: int: Python integer representing x as an unsigned value in the guest's pointer-size. ''' import ctypes - if self.bits == 32: + if bits is None: + bits = self.bits + if bits == 32: return ctypes.c_uint32(x).value - elif self.bits == 64: + elif bits == 64: return ctypes.c_uint64(x).value else: - raise ValueError("Unsupported number of bits") + raise ValueError(f"Unsupported number of bits {bits}") def from_unsigned_guest(self, x): ''' diff --git a/panda/python/core/pandare/pypluginmanager.py b/panda/python/core/pandare/pypluginmanager.py index e003fc28714..b425ce600bf 100644 --- a/panda/python/core/pandare/pypluginmanager.py +++ b/panda/python/core/pandare/pypluginmanager.py @@ -212,6 +212,10 @@ def load_all(self, plugin_file, args=None, template_dir=None): ''' import inspect, importlib spec = importlib.util.spec_from_file_location("plugin_file", plugin_file) + if spec is None: + # Likely an invalid path + raise ValueError(f"Unable to load {plugin_file}") + module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) diff --git a/panda/python/examples/fault.py b/panda/python/examples/fault.py new file mode 100644 index 00000000000..9f794a69601 --- /dev/null +++ b/panda/python/examples/fault.py @@ -0,0 +1,95 @@ +from pandare import Panda + +panda = Panda(generic="arm") +#panda = Panda(generic="i386") +#panda = Panda(generic="x86_64") # Infinte faults +#panda = Panda(generic="mips64") + +panda.load_plugin("syscalls2", {"load-info": True}) + +paged_in = 0 +faulted = 0 +faulted_then_resolved = 0 +faulted_then_failed = 0 + +@panda.queue_blocking +def drive(): + panda.revert_sync('root') + try: + print(panda.run_serial_cmd("md5sum $(which whoami); find /etc/ | md5sum; timeout 10s apt-get update -yy")) + except Exception as e: + print("EXN:", e) + panda.end_analysis() + +last_fault = None +def fault(panda, cpu, addr, pc): + global last_fault + if last_fault == addr: + raise MemoryError(f"Double fault of {addr:x}") + last_fault = addr + panda.libpanda.panda_page_fault(cpu, addr, pc) + + +@panda.ppp("syscalls2", "on_all_sys_enter2") +def all_sys(cpu, pc, call, rp): + if call == panda.ffi.NULL: + print("CALL ISNULL") + return + args = panda.ffi.cast("target_ulong**", rp.args) + asid = panda.current_asid(cpu) + + sc_name = panda.ffi.string(call.name).decode() if call.name != panda.ffi.NULL else 'err' + print(f"{pc:#08x} {asid:#08x} (from block starting at {panda.current_pc(cpu):#08x}): {sc_name}(", end="") + if call.nargs == 0: + print(")", end="") + + just_dumped = False + for i in range(call.nargs): + print(f"{panda.ffi.string(call.argn[i]).decode()}=", end="") + sep = ", " if i != call.nargs-1 else ")" + + if call.argt[i] not in [0x20, 0x21, 0x22]: + val = int(panda.ffi.cast("unsigned int", args[i])) + print(hex(val), end=sep) + continue + + # It's a pointer type + addr = int(panda.ffi.cast("unsigned int", args[i])) + if addr < 0xFFFF: + # Probably not a pointer? + print(hex(addr), end="") + else: + global faulted, faulted_then_resolved, faulted_then_failed + try: + s = panda.read_str(cpu, addr) + if addr == last_fault: # faulted before, then resolved + s += f"(PANDA: read now works faulted_then_resolved now {faulted_then_resolved+1})" + faulted_then_resolved += 1 + except ValueError: + # This argument can't be read - let's raise a fault on it + if last_fault != addr: # fault on new address + print(f"{addr:#x} => Can't read - INJECT PANDA PAGE FAULT, faulted is now {faulted+1}") # newline + faulted += 1 + fault(panda, cpu, addr, pc) + return # Raised a fault, hope it's gonna work + else: + # faulted then failed + faulted_then_failed += 1 + s = f"still can't read (faulted_then_failed now {faulted_then_failed+1}" + + # No fault + print(f"{addr:#x} => {repr(s)}", end="") + + print(sep, end="") # , or ) + +@panda.ppp("syscalls2", "on_all_sys_return2") +def all_ret(cpu, pc, call, rp): + rv = panda.arch.get_return_value(cpu) + print(f"\t\t==> {rv:#x}") + + +# XXX: with TB chaining there's a gap between when the syscall happens and our injected PF is handled +#panda.disable_tb_chaining() +panda.run() + +print(f"\nFINISHED!\nTotal of {faulted} faults seen\n\t{faulted_then_resolved} resolved\n\t{faulted_then_failed} failed") diff --git a/panda/scripts/scgen.py b/panda/scripts/scgen.py new file mode 100644 index 00000000000..bf94289cf76 --- /dev/null +++ b/panda/scripts/scgen.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +''' +Reformat the syscalls2 prototypes to a mapping from callno to syscall name +and store the results in syscalls.json in the following format: + { + 'linux_arm': + { + '0': 'some_syscall', + '1': 'another_syscall' + }, + 'linux_mips': + { + ... + } + } + +Unfortunately syscall number keys are stored as strings +''' + + +import glob +import json +from os import path + +base = path.join(path.dirname(__file__), "../plugins/syscalls2/generated-in/") + +def gen_syscalls(os_arch): + syscalls = {} + num_sc = {} + + fname = f"{os_arch}_prototypes.txt" + target = path.join(base, fname) + if not path.isfile(target): + raise ValueError(f"Unsupported os_arch: {os_arch} - could not find {target}") + + with open(target) as f: + for line in f.readlines(): + if not len(line): + continue + + # num type name(args + try: + sys_no = int(line.split(" ")[0]) + except: + continue + sys_name = line.split(" ")[2].split("(")[0] + + sys_name = sys_name.replace("sys_", "") + if sys_name.startswith("do_"): + sys_name.replace("do_", "") + syscalls[sys_name] = sys_no + num_sc[sys_no] = sys_name + + #return syscalls + return num_sc + +if __name__ == '__main__': + results = {} + for arch in ['linux_arm', 'linux_mips', 'linux_x64', 'linux_x86']: + results[arch] = gen_syscalls(arch) + with open("syscalls.json", 'w') as f: + json.dump(results, f) diff --git a/panda/src/panda_api.c b/panda/src/panda_api.c index 86ac3614e5d..4ca0709d279 100644 --- a/panda/src/panda_api.c +++ b/panda/src/panda_api.c @@ -288,3 +288,36 @@ unsigned long garray_len(GArray *list) { void _panda_set_library_mode(const bool b) { panda_set_library_mode(b); } + +// Raise a page fault on address, then return execution to retaddr +void panda_page_fault(CPUState* cpu, target_ulong address, target_ulong retaddr) { + // Update the CPUArchstate so the PC is the desired return address, then call + // tlb_fill. This ensures that we always go back to retaddr + +#if defined(TARGET_I386) //|| defined(TARGET_X86_64) + CPUX86State *env = cpu->env_ptr; + + // We want to set up CPU state with x86_cpu_handle_mmu_fault + // and then interrupt the cpu-exec loop with raise_exception_err_ra + // Explicitly set EIP to retaddr so after the exception is handled, we resume from retaddr + // instead of restarting the block + env->eip = retaddr; + tlb_fill(cpu, address, MMU_DATA_LOAD, 0, retaddr); +#elif defined (TARGET_ARM) + CPUARMState *env = cpu->env_ptr; + if (is_a64(env)) { + env->pc = retaddr; // PC for aarch64 + } else { + env->regs[15] = retaddr; // PC for arm32 + } + tlb_fill(cpu, address, MMU_DATA_LOAD, 0, retaddr); + +#elif defined(TARGET_MIPS) + CPUMIPSState *env = cpu->env_ptr; + env->active_tc.PC = retaddr; + tlb_fill(cpu, address, MMU_DATA_LOAD, 0, retaddr); +#else + printf("\n\nnERROR: Unsupported architecture for panda_page_fault!!\n\n"); + assert(0); +#endif +}