Skip to content
2 changes: 2 additions & 0 deletions panda/include/panda/panda_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ CPUState* get_cpu(void);

unsigned long garray_len(GArray *list);
void panda_cleanup_record(void);

void panda_page_fault(CPUState *cpu, target_ulong address, target_ulong retaddr);
// END_PYPANDA_NEEDS_THIS -- do not delete this comment!

// don't expose to API because we don't want to add siginfo_t understanding
Expand Down
92 changes: 72 additions & 20 deletions panda/python/core/pandare/arch.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_pc(self, cpu):
else:
raise RuntimeError(f"get_pc unsupported for {self.panda.arch_name}")

def _get_arg_reg(self, idx, convention):
def _get_arg_loc(self, idx, convention):
'''
return the name of the argument [idx] for the given arch with calling [convention]
'''
Expand All @@ -137,23 +137,55 @@ def set_arg(self, cpu, idx, val, convention='default'):

Note for syscalls we define arg[0] as syscall number and then 1-index the actual args
'''
reg = self._get_arg_reg(idx, convention)
reg = self._get_arg_loc(idx, convention)
return self.set_reg(cpu, reg, val)

def get_arg(self, cpu, idx, convention='default'):
'''
Return arg [idx] for given calling convention. This only works right as the guest
is calling or has called a function before register values are clobbered.

If arg[idx] should be stack-based, name it stack_0, stack_1... this allows mixed
conventions where some args are in registers and others are on the stack (i.e.,
mips32 syscalls).

When doing a stack-based read, this function may raise a ValueError if the memory
read fails (i.e., paged out, invalid address).

Note for syscalls we define arg[0] as syscall number and then 1-index the actual args
'''

# i386 is stack based and so the convention wont work
if self.call_conventions[convention] == "stack":
return self.get_arg_stack(cpu, idx)
reg = self._get_arg_reg(idx, convention)
return self.get_reg(cpu, reg)
argloc = self._get_arg_loc(idx, convention)

if self._is_stack_loc(argloc):
return self._read_stack(cpu, argloc)
else:
return self.get_reg(cpu, argloc)

@staticmethod
def _is_stack_loc(argloc):
'''
Given a name returned by self._get_arg_loc
check if it's the name of a stack offset
'''
return argloc.startswith("stack_")

def _read_stack(self, cpu, argloc):
'''
Given a name like stack_X, calculate where
the X-th value on the stack is, then read it out of
memory and return it.

May raise a ValueError if the memory read fails
'''
# Stack based - get stack base, calculate offset, then try to read it
assert(self._is_stack_loc(argloc)), f"Can't get stack offset of {argloc}"

stack_idx = int(argloc.split("stack_")[1])
stack_base = self.get_reg(cpu, self.reg_sp)
arg_sz = self.panda.bits // 8
offset = arg_sz * (stack_idx+1)
return self.panda.virtual_memory_read(cpu, stack_base + offset, arg_sz, fmt='int')

def set_retval(self, cpu, val, convention='default', failure=False):
'''
Expand Down Expand Up @@ -362,7 +394,7 @@ def get_return_address(self, cpu):

class MipsArch(PandaArch):
'''
Register names and accessors for MIPS
Register names and accessors for 32-bit MIPS
'''

# Registers are:
Expand Down Expand Up @@ -391,8 +423,9 @@ def __init__(self, panda):

self.reg_sp = regnames.index('sp')
self.reg_retaddr = regnames.index("ra")
# Default syscall/args are for mips o32
self.call_conventions = {"mips": ["A0", "A1", "A2", "A3"],
"syscall": ["V0", "A0", "A1", "A2", "A3"]}
"syscall": ["V0", "A0", "A1", "A2", "A3", "stack_1", "stack_2", "stack_3", "stack_4"]}
self.call_conventions['default'] = self.call_conventions['mips']

self.reg_retval = {"default": "V0",
Expand Down Expand Up @@ -452,7 +485,7 @@ def get_call_return(self, cpu):
'''
.. Deprecated:: use get_return_address
'''
return self.get_return_addess(cpu)
return self.get_return_address(cpu)

def get_return_address(self,cpu):
'''
Expand Down Expand Up @@ -481,6 +514,33 @@ def set_retval(self, cpu, val, convention='default', failure=False):

return super().set_retval(cpu, val, convention)

class Mips64Arch(MipsArch):
'''
Register names and accessors for MIPS64. Inherits from MipsArch for everything
except the register name and call conventions.
'''

def __init__(self, panda):
super().__init__(panda)
regnames = ["zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
"a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
"t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra"]

self.reg_sp = regnames.index('sp')
self.reg_retaddr = regnames.index("ra")
# Default syscall/args are for mips 64/n32 - note the registers are different than 32
self.call_conventions = {"mips": ["A0", "A1", "A2", "A3"], # XXX Unsure?
"syscall": ["V0", "A0", "A1", "A2", "A3", "A4", "A5"]}
self.call_conventions['default'] = self.call_conventions['mips']

self.reg_retval = {"default": "V0",
"syscall": 'V0'}


# note names must be stored uppercase for get/set reg to work case-insensitively
self.registers = {regnames[idx].upper(): idx for idx in range(len(regnames)) }

class X86Arch(PandaArch):
'''
Register names and accessors for x86
Expand All @@ -496,9 +556,9 @@ def __init__(self, panda):
self.reg_retval = {"default": "EAX",
"syscall": "EAX"}

self.call_conventions = {"stack": "stack",
self.call_conventions = {"cdecl": ["stack_{x}" for x in range(20)], # 20: arbitrary but big
"syscall": ["EAX", "EBX", "ECX", "EDX", "ESI", "EDI", "EBP"]}
self.call_conventions['default'] = self.call_conventions['stack']
self.call_conventions['default'] = self.call_conventions['cdecl']

self.reg_sp = regnames.index('ESP')
self.registers = {regnames[idx]: idx for idx in range(len(regnames)) }
Expand Down Expand Up @@ -541,14 +601,6 @@ def get_return_address(self,cpu):
esp = self.get_reg(cpu,"ESP")
return self.panda.virtual_memory_read(cpu,esp,4,fmt='int')

# we need this because X86 is stack based
def get_arg_stack(self, cpu, num, kernel=False):
'''
Gets arguments based on the number. Supports kernel and usermode.
'''
esp = self.get_reg(cpu, "ESP")
return self.panda.virtual_memory_read(cpu, esp+(4*(num+1)),4,fmt='int')

class X86_64Arch(PandaArch):
'''
Register names and accessors for x86_64
Expand Down
15 changes: 9 additions & 6 deletions panda/python/core/pandare/panda.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from .panda_expect import Expect
from .asyncthread import AsyncThread
from .qcows import Qcows
from .arch import ArmArch, Aarch64Arch, MipsArch, X86Arch, X86_64Arch
from .arch import ArmArch, Aarch64Arch, MipsArch, Mips64Arch, X86Arch, X86_64Arch

# Might be worth importing and auto-initilizing a PLogReader
# object within Panda for the current architecture?
Expand Down Expand Up @@ -139,7 +139,7 @@ def __init__(self, arch="i386", mem="128M",
elif self.arch_name in ["mips", "mipsel"]:
self.arch = MipsArch(self)
elif self.arch_name in ["mips64"]:
self.arch = MipsArch(self) # XXX: We probably need a different class?
self.arch = Mips64Arch(self)
else:
raise ValueError(f"Unsupported architecture {self.arch_name}")
self.bits, self.endianness, self.register_size = self.arch._determine_bits()
Expand Down Expand Up @@ -1001,24 +1001,27 @@ def read_str(self, cpu, ptr, max_length=None):
idx += 1
return r.decode("utf8", "ignore")

def to_unsigned_guest(self, x):
def to_unsigned_guest(self, x, bits=None):
'''
Convert a singed python int to an unsigned int32/unsigned int64
depending on guest bit-size

Args:
x (int): Python integer
bits (int): Number of bits to treat this value as. If unset, uses architecture default

Returns:
int: Python integer representing x as an unsigned value in the guest's pointer-size.
'''
import ctypes
if self.bits == 32:
if bits is None:
bits = self.bits
if bits == 32:
return ctypes.c_uint32(x).value
elif self.bits == 64:
elif bits == 64:
return ctypes.c_uint64(x).value
else:
raise ValueError("Unsupported number of bits")
raise ValueError(f"Unsupported number of bits {bits}")

def from_unsigned_guest(self, x):
'''
Expand Down
4 changes: 4 additions & 0 deletions panda/python/core/pandare/pypluginmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ def load_all(self, plugin_file, args=None, template_dir=None):
'''
import inspect, importlib
spec = importlib.util.spec_from_file_location("plugin_file", plugin_file)
if spec is None:
# Likely an invalid path
raise ValueError(f"Unable to load {plugin_file}")

module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)

Expand Down
95 changes: 95 additions & 0 deletions panda/python/examples/fault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from pandare import Panda

panda = Panda(generic="arm")
#panda = Panda(generic="i386")
#panda = Panda(generic="x86_64") # Infinte faults
#panda = Panda(generic="mips64")

panda.load_plugin("syscalls2", {"load-info": True})

paged_in = 0
faulted = 0
faulted_then_resolved = 0
faulted_then_failed = 0

@panda.queue_blocking
def drive():
panda.revert_sync('root')
try:
print(panda.run_serial_cmd("md5sum $(which whoami); find /etc/ | md5sum; timeout 10s apt-get update -yy"))
except Exception as e:
print("EXN:", e)
panda.end_analysis()

last_fault = None
def fault(panda, cpu, addr, pc):
global last_fault
if last_fault == addr:
raise MemoryError(f"Double fault of {addr:x}")
last_fault = addr
panda.libpanda.panda_page_fault(cpu, addr, pc)


@panda.ppp("syscalls2", "on_all_sys_enter2")
def all_sys(cpu, pc, call, rp):
if call == panda.ffi.NULL:
print("CALL ISNULL")
return
args = panda.ffi.cast("target_ulong**", rp.args)
asid = panda.current_asid(cpu)

sc_name = panda.ffi.string(call.name).decode() if call.name != panda.ffi.NULL else 'err'
print(f"{pc:#08x} {asid:#08x} (from block starting at {panda.current_pc(cpu):#08x}): {sc_name}(", end="")
if call.nargs == 0:
print(")", end="")

just_dumped = False
for i in range(call.nargs):
print(f"{panda.ffi.string(call.argn[i]).decode()}=", end="")
sep = ", " if i != call.nargs-1 else ")"

if call.argt[i] not in [0x20, 0x21, 0x22]:
val = int(panda.ffi.cast("unsigned int", args[i]))
print(hex(val), end=sep)
continue

# It's a pointer type
addr = int(panda.ffi.cast("unsigned int", args[i]))
if addr < 0xFFFF:
# Probably not a pointer?
print(hex(addr), end="")
else:
global faulted, faulted_then_resolved, faulted_then_failed
try:
s = panda.read_str(cpu, addr)
if addr == last_fault: # faulted before, then resolved
s += f"(PANDA: read now works faulted_then_resolved now {faulted_then_resolved+1})"
faulted_then_resolved += 1
except ValueError:
# This argument can't be read - let's raise a fault on it
if last_fault != addr: # fault on new address
print(f"{addr:#x} => Can't read - INJECT PANDA PAGE FAULT, faulted is now {faulted+1}") # newline
faulted += 1
fault(panda, cpu, addr, pc)
return # Raised a fault, hope it's gonna work
else:
# faulted then failed
faulted_then_failed += 1
s = f"still can't read (faulted_then_failed now {faulted_then_failed+1}"

# No fault
print(f"{addr:#x} => {repr(s)}", end="")

print(sep, end="") # , or )

@panda.ppp("syscalls2", "on_all_sys_return2")
def all_ret(cpu, pc, call, rp):
rv = panda.arch.get_return_value(cpu)
print(f"\t\t==> {rv:#x}")


# XXX: with TB chaining there's a gap between when the syscall happens and our injected PF is handled
#panda.disable_tb_chaining()
panda.run()

print(f"\nFINISHED!\nTotal of {faulted} faults seen\n\t{faulted_then_resolved} resolved\n\t{faulted_then_failed} failed")
63 changes: 63 additions & 0 deletions panda/scripts/scgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python3

'''
Reformat the syscalls2 prototypes to a mapping from callno to syscall name
and store the results in syscalls.json in the following format:
{
'linux_arm':
{
'0': 'some_syscall',
'1': 'another_syscall'
},
'linux_mips':
{
...
}
}

Unfortunately syscall number keys are stored as strings
'''


import glob
import json
from os import path

base = path.join(path.dirname(__file__), "../plugins/syscalls2/generated-in/")

def gen_syscalls(os_arch):
syscalls = {}
num_sc = {}

fname = f"{os_arch}_prototypes.txt"
target = path.join(base, fname)
if not path.isfile(target):
raise ValueError(f"Unsupported os_arch: {os_arch} - could not find {target}")

with open(target) as f:
for line in f.readlines():
if not len(line):
continue

# num type name(args
try:
sys_no = int(line.split(" ")[0])
except:
continue
sys_name = line.split(" ")[2].split("(")[0]

sys_name = sys_name.replace("sys_", "")
if sys_name.startswith("do_"):
sys_name.replace("do_", "")
syscalls[sys_name] = sys_no
num_sc[sys_no] = sys_name

#return syscalls
return num_sc

if __name__ == '__main__':
results = {}
for arch in ['linux_arm', 'linux_mips', 'linux_x64', 'linux_x86']:
results[arch] = gen_syscalls(arch)
with open("syscalls.json", 'w') as f:
json.dump(results, f)
Loading