diff --git a/bindiff b/bindiff index 300822c..477dac1 100755 --- a/bindiff +++ b/bindiff @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import binascii import difflib @@ -40,7 +40,7 @@ if __name__ == '__main__': backtrack = True if len(sys.argv) != 3: - print 'Usage: %s [--backtrack] ' % sys.argv[0] + print('Usage: %s [--backtrack] ' % sys.argv[0]) sys.exit(1) bina, binb = sys.argv[1:3] diff --git a/core/arch.py b/core/arch.py index 7d1b56e..705ed56 100644 --- a/core/arch.py +++ b/core/arch.py @@ -1,7 +1,14 @@ +import binascii import re from capstone import * from keystone import * +""" +from keystone import * +ks = Ks(KS_ARCH_X86, KS_MODE_64) + + +""" class Arch: def __init__(self): self.cs = Cs(*self._cs) @@ -12,17 +19,54 @@ def asm(self, asm, addr=0, att_syntax=False): if not asm: return '' # asm start label for use with relative offsets - asm = '_PKST_:;' + asm + asm = '_PKST_:;\n' + asm saved = self.ks.syntax if att_syntax: self.ks.syntax = KS_OPT_SYNTAX_ATT - tmp, _ = self.ks.asm(asm, addr=addr) + + #Keystone doesn't support this instruction + asm = asm.replace('endbr64', '') + + newasm = '' + for line in asm.split('\n'): + if '.long' in line: + x = line.split('\t') + if '-' in x[1]: + vals = x[1].split('-') + new_line = f'{x[0]}\t 0x{vals[0].strip()} - 0x{vals[1].strip()} \n' + newasm += new_line + continue + if re.match(r'^\d+:', line): + continue + newasm += f'{line}\n' + + #print('------------') + #import keystone + #for line in newasm.split('\n'): + # print(f'checking line: {line}') + # try: + # tmp, _ = self.ks.asm(line) + # print(tmp) + # except keystone.keystone.KsError as e: + # print(e) + #print(newasm) + + # Problematic instructions: + # https://github.com/keystone-engine/keystone/issues/546 + # leal -48(%rax,%rdx), %eax + # movb (%rcx,%rdx), %dl + tmp, _ = self.ks.asm(newasm, addr=addr) self.ks.syntax = saved - return ''.join(map(chr, tmp)) + return ''.join(map(chr, tmp)).encode('latin') def dis(self, raw, addr=0): - return list(self.cs.disasm(str(raw), addr)) + if isinstance(raw, bytearray): + return list(self.cs.disasm(raw, addr)) + elif isinstance(raw, str): + return list(self.cs.disasm((raw.encode()), addr)) + else: + return list(self.cs.disasm(raw, addr)) def jmp(self, dst): raise NotImplementedError @@ -41,7 +85,12 @@ class x86(Arch): _ks = KS_ARCH_X86, KS_MODE_32 def call(self, dst): return 'call 0x%x;' % dst - def jmp(self, dst): return 'jmp 0x%x;' % dst + def jmp(self, dst): + print(f'debugging jmp: dst:{dst}') + if isinstance(dst, str): + return f'jmp {dst}' + else: + return 'jmp 0x%x;' % dst def ret(self): return 'ret;' def nop(self): return 'nop;' diff --git a/core/binary.py b/core/binary.py index ce1f5f4..dfaaed3 100644 --- a/core/binary.py +++ b/core/binary.py @@ -73,7 +73,7 @@ def next_alloc(self, target='patch'): def alloc(self, size, target='patch'): ph = self._seg(target) tmp = self.next_alloc(target) - ph.data += '\0' * size + ph.data += b'\0' * size ph.memsz += size ph.filesz += size return tmp @@ -87,12 +87,12 @@ def onasm(self, cb): def save(self, path): self.nxpatch.flags &= ~1 - print '[+] Saving binary to: %s' % path + print('[+] Saving binary to: %s' % path) # hooking the entry point is a special case that generates a more efficient call table if self.entry_hooks: with self.collect() as pt: # call each hook addr then jump to original entry point - calls = map(pt.arch.call, self.entry_hooks) + [pt.arch.jmp(pt.entry)] + calls = list(map(pt.arch.call, self.entry_hooks)) + [pt.arch.jmp(pt.entry)] addr = pt.inject(asm=';'.join(calls), internal=True) pt.entry = addr @@ -105,4 +105,4 @@ def save(self, path): self.elf.progs.remove(prog) self.elf.save(path) - os.chmod(path, 0755) + os.chmod(path, 0o755) diff --git a/core/compiler.py b/core/compiler.py index d4cceda..947efe1 100644 --- a/core/compiler.py +++ b/core/compiler.py @@ -40,7 +40,7 @@ def clean(asm): elif section.startswith(('.text', '__TEXT')): cur = text else: - print 'unknown section', section + print('unknown section', section) continue if line.startswith('.text'): @@ -78,7 +78,7 @@ def clean(asm): ''' if line.startswith('.') and not line.endswith(':'): if not line.startswith(('.long', '.byte')): - print line + print(line) ''' cur.append(line) @@ -92,18 +92,17 @@ def compile(code, linker, syms=()): if compiler_version is None: compiler_version = subprocess.check_output(['gcc', '--version']) - if 'gcc' in compiler_version and not 'clang' in compiler_version: + if b'gcc' in compiler_version and not b'clang' in compiler_version: cflags += ['-fleading-underscore', '-fno-toplevel-reorder'] cflags += linker.cflags code = linker.pre(code, syms=syms) p = subprocess.Popen(['gcc', '-xc', '-S', '-o-', '-'] + cflags, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - asm, err = p.communicate(code) - if 'error:' in err.lower(): + asm, err = p.communicate(code.encode('latin')) + if b'error:' in err.lower(): raise BuildError(err) elif err: - print err - - asm = linker.post(asm, syms=syms) + print(err) + asm = linker.post(asm.decode(), syms=syms) asm = clean(asm) return asm diff --git a/core/context.py b/core/context.py index e432f21..a2369a7 100644 --- a/core/context.py +++ b/core/context.py @@ -1,3 +1,6 @@ +import codecs +import sys + import capstone import binascii @@ -76,14 +79,14 @@ def info(self, *args, **kwargs): if self.current_func: if self.func_printed != self.current_func: if self.func_printed is not None: - print + print() func = self.current_func - print indent + '[FUNC] @0x%x-0x%x' % (func.addr, func.addr + func.size) + print(indent + '[FUNC] @0x%x-0x%x' % (func.addr, func.addr + func.size)) self.func_printed = self.current_func indent += ' ' if kwargs.get('prefix'): indent += kwargs['prefix'] + ' ' - print indent + line + print(indent + line) dis = kwargs.get('dis', None) if dis: @@ -109,17 +112,17 @@ def pdis(self, dis): out = [] nop_start = 0 - nop_bytes = '' + nop_bytes = b'' nops = 0 just = max(len(i.bytes) for i in dis) - pnop = lambda: ('0x%x: %s nop (x%d)' % (nop_start, binascii.hexlify(nop_bytes).ljust(just * 2), nops)) + pnop = lambda: ('0x%x: %s nop (x%d)' % (nop_start, binascii.hexlify(nop_bytes).ljust(just * 2).decode(), nops)) for i in dis: if i.mnemonic == 'nop': if not nops: nop_start = i.address - nop_bytes += str(i.bytes) + nop_bytes += bytes(i.bytes) nops += 1 else: if nops: @@ -127,7 +130,7 @@ def pdis(self, dis): nops = 0 nop_bytes = '' data = binascii.hexlify(i.bytes).ljust(just * 2) - out.append('0x%x: %s %s %s' % (i.address, data, i.mnemonic, i.op_str)) + out.append('0x%x: %s %s %s' % (i.address, data.decode(), i.mnemonic, i.op_str)) if nops: out.append(pnop()) return '\n'.join(out) @@ -202,10 +205,13 @@ def hook(self, src, dst, first=False, noentry=False): # our injected code is guaranteed to be sequential and unaligned # so we can inject twice and call the first one - evicted = '' + evicted = b'' # eh we'll just trust that a call won't be anywhere near 64 bytes - ins = self.dis(src) - for ins in ins: + instructs = self.dis(src) + for ins in instructs: + for b in ins.bytes: + sys.stdout.write(f'{hex(b)[1:]} ') + sys.stdout.flush() evicted += ins.bytes if len(evicted) >= len(call): break @@ -227,7 +233,7 @@ def hook(self, src, dst, first=False, noentry=False): emptyjmp = self.asm(self.arch.jmp(self.binary.next_alloc()), addr=src) jmpoff = src + len(evicted) - jmpevict = str(self.elf.read(jmpoff, len(emptyjmp))) + jmpevict = self.elf.read(jmpoff, len(emptyjmp)) stage0 = evicted + jmpevict # TODO: self.alloc()? @@ -263,7 +269,7 @@ def _lint(self, addr, raw, typ, is_asm=False): if typ == 'asm' or is_asm: dis = self.arch.dis(raw, addr=addr) for ins in dis: - if ins.bytes == 'ebfe'.decode('hex'): + if ins.bytes == codecs.decode('ebfe','hex'): self.warn('JMP 0 emitted!') def _compile(self, addr, **kwargs): @@ -319,7 +325,13 @@ def inject(self, **kwargs): if typ == 'asm' or is_asm: self.debug(dis=self.arch.dis(raw, addr=addr)) else: - self.debug(binascii.hexlify(raw)) + if isinstance(raw, str): + self.debug(binascii.hexlify(raw.encode('utf-8'))) + elif isinstance(raw, bytes): + self.debug(str(raw)) + else: + raise RuntimeError('unsupported type') + addr = self.binary.alloc(len(raw), target=target) if mark_func: diff --git a/core/linker.py b/core/linker.py index 39249cf..3559b6c 100644 --- a/core/linker.py +++ b/core/linker.py @@ -1,3 +1,4 @@ +import sys from contextlib import contextmanager import compiler @@ -37,10 +38,9 @@ def inject(self, linker, sym): else: pt.info('[LINK] %s' % sym) asm = compiler.compile(self.source, linker, syms=self.syms.keys()) - table = '\n'.join([pt.arch.jmp('_' + sym) for sym in self.syms.keys()]) - sep = 'PATCHKITJMPTABLE' - asm += ('\n.ascii "%s"\n__JMPTABLE__:\n' % sep) + table + sep = b'PATCHKITJMPTABLE' + asm += ('\n.ascii "%s"\n__JMPTABLE__:\n' % sep.decode()) + table addr = pt.binary.next_alloc('link') raw = pt.asm(asm, addr=addr, att_syntax=True) raw, jmps = raw.rsplit(sep, 1) @@ -76,7 +76,7 @@ def declare(self, symbols=None, source='', headers=''): if symbols: for sym, desc in symbols.items(): if sym in self.syms: - print 'Warning: duplicate symbol (%s)' % sym + print('Warning: duplicate symbol (%s)' % sym) self.syms[sym] = (desc, decl) @staticmethod diff --git a/core/patcher.py b/core/patcher.py index 1207135..f593e1c 100644 --- a/core/patcher.py +++ b/core/patcher.py @@ -29,7 +29,8 @@ def add(self, path): def debug(self, *args): if not self.silent: - print >>sys.stderr, ' '.join(map(str, args)) + print(' '.join(map(str, args))) + #sys.stderr.write(' '.join(map(str, args))) def patch(self): cwd = os.getcwd() diff --git a/deps.sh b/deps.sh index 02e7f43..d493770 100755 --- a/deps.sh +++ b/deps.sh @@ -6,7 +6,7 @@ echo " https://github.com/unicorn-engine/unicorn/blob/master/docs/COMPILE-NIX.md echo echo "If you're on Ubuntu, you want to do this first:" echo " sudo apt-get update" -echo " sudo apt-get install python-pip build-essential git cmake python-dev libglib2.0-dev" +echo " sudo apt-get install python3-pip build-essential git cmake python-dev libglib2.0-dev" echo echo "If you're on a Mac, do this first:" echo " brew install pkg-config glib cmake" @@ -23,6 +23,11 @@ build="$cwd/build" mkdir build &>/dev/null set -e +echo "[*] Building Unicorn" +cd "$build" +git clone https://github.com/unicorn-engine/unicorn.git +cd unicorn && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release && sudo make -j2 + echo "[*] Building Keystone" cd "$build" git clone https://github.com/keystone-engine/keystone.git @@ -36,21 +41,17 @@ git clone https://github.com/aquynh/capstone.git cd capstone && make -j2 echo -echo "[*] Building Unicorn" -cd "$build" -git clone https://github.com/unicorn-engine/unicorn.git -cd unicorn && ./make.sh - echo echo "[*] Installing projects and Python bindings (using sudo)" +cd "$build/unicorn/build" && sudo make install +cd "$build/unicorn/bindings/python" && sudo make install3 + cd "$build/keystone/build" && sudo make install -cd "$build/keystone/bindings/python" && sudo make install +cd "$build/keystone/bindings/python" && sudo make install3 cd "$build/capstone" && sudo make install -cd "$build/capstone/bindings/python" && sudo make install +cd "$build/capstone/bindings/python" && sudo make install3 -cd "$build/unicorn" && sudo ./make.sh install -cd "$build/unicorn/bindings/python" && sudo make install which ldconfig &>/dev/null && sudo ldconfig @@ -58,4 +59,4 @@ echo echo "All done!" echo echo -n "Testing Python import: " -python -c "import capstone, keystone, unicorn; capstone.CS_ARCH_X86, unicorn.UC_ARCH_X86, keystone.KS_ARCH_X86; print 'works.'" +python3 -c "import capstone, keystone, unicorn; capstone.CS_ARCH_X86, unicorn.UC_ARCH_X86, keystone.KS_ARCH_X86; print('works.')" diff --git a/explore b/explore index 4e3bbbb..af1b088 100755 --- a/explore +++ b/explore @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import binascii import difflib @@ -15,7 +15,7 @@ if __name__ == '__main__': backtrack = True if len(sys.argv) != 2: - print 'Usage: %s [--backtrack] ' % sys.argv[0] + print('Usage: %s [--backtrack] ' % sys.argv[0]) sys.exit(1) bina = sys.argv[1] diff --git a/patch b/patch index f4a7ee4..6ab7f3e 100755 --- a/patch +++ b/patch @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import sys @@ -19,7 +19,7 @@ if __name__ == '__main__': parser.print_help() sys.exit(1) - args = map(os.path.abspath, args) + args = list(map(os.path.abspath, args)) patchdirs = args[1:] if options.new: diff --git a/run b/run index e30526b..77eacbb 100755 --- a/run +++ b/run @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import sys @@ -16,7 +16,7 @@ if __name__ == '__main__': parser.print_help() sys.exit(1) - args = map(os.path.abspath, args) + args = list(map(os.path.abspath, args)) patchdirs = args[1:] patch = Patcher(args[0], verbose=options.verbose, silent=not options.verbose) diff --git a/samples/x86/fuzzing/01_cmp_split.py b/samples/x86/fuzzing/01_cmp_split.py index 44a9995..e478aa9 100644 --- a/samples/x86/fuzzing/01_cmp_split.py +++ b/samples/x86/fuzzing/01_cmp_split.py @@ -17,7 +17,7 @@ def _decompose(ins, jmps): asm.append('%s _ret' % (invert[jmp.mnemonic])) asm.append('jmp local_%d' % (i + 1)) asm.append('local_4:\n_ret: ret') - print '\n'.join(asm) + print('\n'.join(asm)) return '\n'.join(asm) # try to split word-wise cmp into byte-wise cmp so AFL can path through it diff --git a/samples/x86/hello/hello32.py b/samples/x86/hello/hello32.py index ac57ca2..32d11bd 100644 --- a/samples/x86/hello/hello32.py +++ b/samples/x86/hello/hello32.py @@ -1,5 +1,5 @@ def patch(pt): - hello, size = pt.inject(raw='hello world\n', size=True) + hello, size = pt.inject(raw=b'hello world\n', size=True) addr = pt.inject(asm=r''' push eax diff --git a/samples/x86/hello/hello64.py b/samples/x86/hello/hello64.py index 3bd1505..e78fdea 100644 --- a/samples/x86/hello/hello64.py +++ b/samples/x86/hello/hello64.py @@ -1,16 +1,17 @@ def patch(pt): - hello, size = pt.inject(raw='hello world\n', size=True) + hello, size = pt.inject(raw=b'hello world\n', size=True) + base = pt.binary.next_alloc() addr = pt.inject(asm=r''' push rax push rdi push rsi push rdx - mov rax, 1 # SYS_write - mov rdi, 1 # fd - mov rsi, %d # buf - mov rdx, %d # size + mov rax, 1 # SYS_write + mov rdi, 1 # fd + lea rsi, [rip - _PKST_ + %d] # buf + mov rdx, %d # size syscall pop rdx @@ -18,5 +19,5 @@ def patch(pt): pop rdi pop rax ret - ''' % (hello, size)) + ''' % (hello - base, size)) pt.hook(pt.entry, addr) diff --git a/util/__init__.py b/util/__init__.py index ac3b789..d1ea0b0 100644 --- a/util/__init__.py +++ b/util/__init__.py @@ -2,4 +2,4 @@ def read(name): path = os.path.join(os.path.dirname(__file__), name) - return open(path, 'rb').read() + return open(path, 'r').read() diff --git a/util/backdoor/backdoor_poc.py b/util/backdoor/backdoor_poc.py index c64002b..a7206c5 100644 --- a/util/backdoor/backdoor_poc.py +++ b/util/backdoor/backdoor_poc.py @@ -10,7 +10,7 @@ for j in xrange(4): key[j] = chr(ord(key[j]) ^ ord(c)) key = ''.join(key) -print 'sending key', repr(key) +print('sending key', repr(key)) p.send(key) # wait for key response @@ -19,7 +19,7 @@ while True: buf += p.recv(1) if buf[-len(ref):] == ref: - print 'got key response' + print('got key response') break nonce = p.recv(8).encode('hex') @@ -29,4 +29,4 @@ p.send(p32(len(sig))) p.send(sig) flag = p.recv(4) -print 'flag', repr(flag) +print('flag', repr(flag)) diff --git a/util/elffile.py b/util/elffile.py index b368ed2..811d74b 100644 --- a/util/elffile.py +++ b/util/elffile.py @@ -60,10 +60,18 @@ def __call__(self, name='', code=0, desc=''): self.bycode[code] = c def __getitem__(self, key): - if isinstance(key, basestring): + #print(f'type(key): {type(key)}') + if isinstance(key, str): return self.byname[key] elif isinstance(key, int): - return self.bycode[key] + try: + #print(f'hex(key): {hex(key)}: self.bycode[key]: {self.bycode[key]}') + #print(f'self.bycode: {len(self.bycode)}, {self.bycode} {hex(key)}') + return self.bycode[key] + except KeyError: + # TODO: Figure out why we're getting a key that we don't have Code section for + ret = Code(self, '', '', '') + return ret else: raise KeyError(key) @@ -891,7 +899,7 @@ def _offsets(self, offset=0): continue p.filesz = len(p.data) # FIXME: repatching a file will spew PHDRs at the end of TEXT - if p.offset is 0: + if 0 == p.offset: p.filesz += phsize x = offset + p.filesz else: @@ -948,12 +956,12 @@ def _regen_section_name_table(self): length = sum([len(s) + 1 for s in strings]) + 1 data = strtab.data = bytearray(length) - data[0] = b'\0' + data[0] = 0 p = 1 for s in strings: data[p:p+len(s)] = s p += len(s) + 1 - data[p - 1] = b'\0' + data[p - 1] = 0 for s in self.sections: s.nameoffset = data.find(s.name + b'\0') diff --git a/util/emu.py b/util/emu.py index 03d1ec3..0f38b39 100644 --- a/util/emu.py +++ b/util/emu.py @@ -130,7 +130,7 @@ def run(self, entry): uc = self.uc self.last_addr = entry - print ' [BACKTRACK] Running once:' + print(' [BACKTRACK] Running once:') self.recv_hist = [] # stop emulator on receive() spam def hook_intr(uc, intno, user): @@ -148,14 +148,14 @@ def hook_intr(uc, intno, user): self.emu.verbose = False self.emu.block_timeout = 1 - print ' [BACKTRACK] Backtracking...' + print(' [BACKTRACK] Backtracking...') last = time.time() finished = 0 while self.targets: now = time.time() if now - last > 1: last = now - print ' [BACKTRACK] %d/%d' % (finished, finished + len(self.targets)) + print(' [BACKTRACK] %d/%d' % (finished, finished + len(self.targets))) addr, transaction = self.targets.pop(-1) transaction.rewind() @@ -167,7 +167,7 @@ def hook_intr(uc, intno, user): except Exception as e: pass finished += 1 - print ' [BACKTRACK] Finished (%d/%d)' % (finished, finished + len(self.targets)) + print(' [BACKTRACK] Finished (%d/%d)' % (finished, finished + len(self.targets))) uc.hook_del(hh) class Emu: diff --git a/util/patch/dis.py b/util/patch/dis.py index ee4d8d0..1abb15c 100644 --- a/util/patch/dis.py +++ b/util/patch/dis.py @@ -212,7 +212,7 @@ def fromins(cls, ins): if opcls: ops.append(opcls.fromop(ins, op)) else: - print 'UNSUPPORTED OP', op, ins.op_str + print('UNSUPPORTED OP', op, ins.op_str) assert(False) c = cls(ins.mnemonic, *ops)