Skip to content

Commit 0036c42

Browse files
kumarakAkshayK
andauthored
Handling of Named Type references (#99)
* Handling of named references Fix the handling of type cache and read bytes from memory * review changes * Add vector type lifting * add remill compat header for vector type Co-authored-by: AkshayK <akshay.kumar@octolabs.io>
1 parent 659ac45 commit 0036c42

File tree

2 files changed

+141
-59
lines changed

2 files changed

+141
-59
lines changed

lib/Lift.cpp

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include <llvm/IR/Module.h>
2525
#include <llvm/Transforms/Scalar.h>
2626
#include <llvm/Transforms/Utils.h>
27+
28+
#include <remill/BC/Compat/VectorType.h>
2729
#include <remill/BC/Util.h>
2830

2931
#include <algorithm>
@@ -510,8 +512,10 @@ namespace {
510512
static llvm::APInt ReadValueFromMemory(const uint64_t addr, const uint64_t size,
511513
const remill::Arch *arch,
512514
const Program &program) {
513-
llvm::APInt result(size, 0);
514-
for (auto i = 0u; i < (size / 8); ++i) {
515+
516+
// create an instance of precision integer of size*8 bits
517+
llvm::APInt result(size * 8, 0);
518+
for (auto i = 0u; i < size; ++i) {
515519
auto byte_val = program.FindByte(addr + i).Value();
516520
if (remill::IsError(byte_val)) {
517521
LOG(ERROR) << "Unable to read value of byte at " << std::hex << addr + i
@@ -524,8 +528,8 @@ static llvm::APInt ReadValueFromMemory(const uint64_t addr, const uint64_t size,
524528
}
525529

526530
// NOTE(artem): LLVM's APInt does not handle byteSwap()
527-
// for size 8, leading to a segfault. Guard against it here.
528-
if (arch->MemoryAccessIsLittleEndian() && size > 8) {
531+
// for size 1, leading to a segfault. Guard against it here.
532+
if (arch->MemoryAccessIsLittleEndian() && size > 1) {
529533
result = result.byteSwap();
530534
}
531535

@@ -540,61 +544,74 @@ CreateConstFromMemory(const uint64_t addr, llvm::Type *type,
540544
llvm::Constant *result{nullptr};
541545
switch (type->getTypeID()) {
542546
case llvm::Type::IntegerTyID: {
543-
const auto size = dl.getTypeSizeInBits(type);
547+
const auto size = dl.getTypeAllocSize(type);
544548
auto val = ReadValueFromMemory(addr, size, arch, program);
545549
result = llvm::ConstantInt::get(type, val);
546550
} break;
547551

548552
case llvm::Type::PointerTyID: {
553+
const auto pointer_type = llvm::dyn_cast<llvm::PointerType>(type);
554+
const auto size = dl.getTypeAllocSize(type);
555+
auto val = ReadValueFromMemory(addr, size, arch, program);
556+
result = llvm::Constant::getIntegerValue(pointer_type, val);
549557
} break;
550558

551559
case llvm::Type::StructTyID: {
560+
552561
// Take apart the structure type, recursing into each element
553562
// so that we can create a constant structure
554-
auto struct_type = llvm::dyn_cast<llvm::StructType>(type);
555-
556-
auto num_elms = struct_type->getNumElements();
557-
auto elm_offset = 0;
563+
const auto struct_type = llvm::dyn_cast<llvm::StructType>(type);
564+
const auto layout = dl.getStructLayout(struct_type);
565+
const auto num_elms = struct_type->getStructNumElements();
566+
std::vector<llvm::Constant *> initializer_list;
567+
initializer_list.reserve(num_elms);
558568

559-
std::vector<llvm::Constant *> const_list;
560-
561-
for (std::uint64_t i = 0U; i < num_elms; ++i) {
562-
auto elm_type = struct_type->getElementType(i);
563-
auto elm_size = dl.getTypeSizeInBits(elm_type);
564-
565-
auto const_elm =
566-
CreateConstFromMemory(addr + elm_offset, elm_type, arch,
567-
program, module);
568-
569-
const_list.push_back(const_elm);
570-
elm_offset += elm_size / 8;
569+
for (auto i = 0u; i < num_elms; ++i) {
570+
const auto elm_type = struct_type->getStructElementType(i);
571+
const auto offset = layout->getElementOffset(i);
572+
auto const_elm = CreateConstFromMemory(addr + offset, elm_type, arch,
573+
program, module);
574+
initializer_list.push_back(const_elm);
571575
}
572-
573-
result = llvm::ConstantStruct::get(struct_type,
574-
llvm::ArrayRef(const_list));
576+
result = llvm::ConstantStruct::get(struct_type, initializer_list);
575577
} break;
576578

577579
case llvm::Type::ArrayTyID: {
580+
581+
// Traverse through all the elements of array and create the initializer
582+
const auto array_type = llvm::dyn_cast<llvm::ArrayType>(type);
578583
const auto elm_type = type->getArrayElementType();
579-
const auto elm_size = dl.getTypeSizeInBits(elm_type);
584+
const auto elm_size = dl.getTypeAllocSize(elm_type);
580585
const auto num_elms = type->getArrayNumElements();
581-
std::string bytes(dl.getTypeSizeInBits(type) / 8, '\0');
586+
std::vector<llvm::Constant *> initializer_list;
587+
initializer_list.reserve(num_elms);
588+
582589
for (auto i = 0u; i < num_elms; ++i) {
583-
const auto elm_offset = i * (elm_size / 8);
584-
const auto src =
585-
ReadValueFromMemory(addr + elm_offset, elm_size, arch, program)
586-
.getRawData();
587-
const auto dst = bytes.data() + elm_offset;
588-
std::memcpy(dst, src, elm_size / 8);
589-
}
590-
if (elm_size == 8) {
591-
result = llvm::ConstantDataArray::getString(module.getContext(), bytes,
592-
/*AddNull=*/false);
593-
} else {
594-
result = llvm::ConstantDataArray::getRaw(bytes, num_elms, elm_type);
590+
const auto elm_offset = i * elm_size;
591+
auto const_elm = CreateConstFromMemory(addr + elm_offset, elm_type,
592+
arch, program, module);
593+
initializer_list.push_back(const_elm);
595594
}
595+
result = llvm::ConstantArray::get(array_type, initializer_list);
596596
} break;
597597

598+
case llvm::GetFixedVectorTypeId(): {
599+
const auto vec_type = llvm::dyn_cast<llvm::FixedVectorType>(type);
600+
const auto num_elms = vec_type->getNumElements();
601+
const auto elm_type = vec_type->getElementType();
602+
const auto elm_size = dl.getTypeAllocSize(elm_type);
603+
std::vector<llvm::Constant *> initializer_list;
604+
initializer_list.reserve(num_elms);
605+
606+
for (auto i = 0u; i < num_elms; ++i) {
607+
const auto elm_offset = i * elm_size;
608+
auto const_elm = CreateConstFromMemory(addr + elm_offset, elm_type,
609+
arch, program, module);
610+
initializer_list.push_back(const_elm);
611+
}
612+
result = llvm::ConstantVector::get(initializer_list);
613+
}break;
614+
598615
default:
599616
LOG(FATAL) << "Unhandled LLVM Type: " << remill::LLVMThingToString(type);
600617
break;

python/anvill/binja.py

Lines changed: 86 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -173,11 +173,66 @@ def _convert_bn_llil_type(
173173
ret = IntegerType(reg_size_bytes, True)
174174
return ret
175175

176+
def _cache_key(tinfo: bn.types.Type) -> str:
177+
""" Convert bn Type instance to cache key"""
178+
return str(tinfo)
179+
180+
def _convert_named_type_reference(
181+
bv, tinfo: bn.types.Type, cache) -> Type:
182+
""" Convert named type references into a `Type` instance"""
183+
if tinfo.type_class != bn.TypeClass.NamedTypeReferenceClass:
184+
return
185+
186+
named_tinfo = tinfo.named_type_reference
187+
if (named_tinfo.type_class
188+
== bn.NamedTypeReferenceClass.StructNamedTypeClass):
189+
# Get the bn struct type and recursively recover the elements
190+
ref_type = bv.get_type_by_name(named_tinfo.name);
191+
struct_type = ref_type.structure
192+
ret = StructureType()
193+
cache[_cache_key(struct_type)] = ret
194+
for elem in struct_type.members:
195+
ret.add_element_type(_convert_bn_type(bv, elem.type, cache))
196+
return ret
197+
198+
elif (named_tinfo.type_class
199+
== bn.NamedTypeReferenceClass.UnionNamedTypeClass):
200+
# Get the union type and recover the member elements
201+
ref_type = bv.get_type_by_name(named_tinfo.name);
202+
struct_type = ref_type.structure
203+
ret = UnionType()
204+
cache[_cache_key(struct_type)] = ret
205+
for elem in struct_type.union.members:
206+
ret.add_element_type(_convert_bn_type(bv, elem.type, cache))
207+
return ret
208+
209+
elif (named_tinfo.type_class
210+
== bn.NamedTypeReferenceClass.TypedefNamedTypeClass):
211+
ref_type = bv.get_type_by_name(named_tinfo.name);
212+
ret = TypedefType()
213+
cache[_cache_key(ref_type)] = ret
214+
ret.set_underlying_type(_convert_bn_type(bv, ref_type, cache))
215+
return ret
176216

177-
def _convert_bn_type(tinfo: bn.types.Type, cache):
217+
elif (named_tinfo.type_class
218+
== bn.NamedTypeReferenceClass.EnumNamedTypeClass):
219+
# Set the underlying type int of size width
220+
ref_type = bv.get_type_by_name(named_tinfo.name);
221+
ret = EnumType()
222+
cache[_cache_key(ref_type)] = ret
223+
ret.set_underlying_type(IntegerType(ref_type.width, False))
224+
return ret
225+
226+
else:
227+
DEBUG("WARNING: Unknown named type {} not handled".format(named_tinfo))
228+
229+
230+
def _convert_bn_type(bv, tinfo: bn.types.Type, cache):
178231
"""Convert an bn `Type` instance into a `Type` instance."""
179-
if str(tinfo) in cache:
180-
return cache[str(tinfo)]
232+
233+
cache_key = _cache_key(tinfo)
234+
if cache_key in cache:
235+
return cache[cache_key]
181236

182237
# Void type.
183238
if tinfo.type_class == bn.TypeClass.VoidTypeClass:
@@ -186,17 +241,17 @@ def _convert_bn_type(tinfo: bn.types.Type, cache):
186241
# Pointer, array, or function.
187242
elif tinfo.type_class == bn.TypeClass.PointerTypeClass:
188243
ret = PointerType()
189-
cache[str(tinfo)] = ret
190-
ret.set_element_type(_convert_bn_type(tinfo.element_type, cache))
244+
cache[cache_key] = ret
245+
ret.set_element_type(_convert_bn_type(bv, tinfo.element_type, cache))
191246
return ret
192247

193248
elif tinfo.type_class == bn.TypeClass.FunctionTypeClass:
194249
ret = FunctionType()
195-
cache[str(tinfo)] = ret
196-
ret.set_return_type(_convert_bn_type(tinfo.return_value, cache))
250+
cache[cache_key] = ret
251+
ret.set_return_type(_convert_bn_type(bv, tinfo.return_value, cache))
197252

198253
for var in tinfo.parameters:
199-
ret.add_parameter_type(_convert_bn_type(var.type, cache))
254+
ret.add_parameter_type(_convert_bn_type(bv, var.type, cache))
200255

201256
if tinfo.has_variable_arguments:
202257
ret.set_is_variadic()
@@ -205,19 +260,26 @@ def _convert_bn_type(tinfo: bn.types.Type, cache):
205260

206261
elif tinfo.type_class == bn.TypeClass.ArrayTypeClass:
207262
ret = ArrayType()
208-
cache[str(tinfo)] = ret
209-
ret.set_element_type(_convert_bn_type(tinfo.element_type, cache))
263+
cache[cache_key] = ret
264+
ret.set_element_type(_convert_bn_type(bv, tinfo.element_type, cache))
210265
ret.set_num_elements(tinfo.count)
211266
return ret
212267

213268
elif tinfo.type_class == bn.TypeClass.StructureTypeClass:
214269
ret = StructureType()
215-
cache[str(tinfo)] = ret
270+
cache[cache_key] = ret
271+
272+
for elem in tinfo.structure.members:
273+
ret.add_element_type(_convert_bn_type(bv, elem.type, cache))
274+
216275
return ret
217276

218277
elif tinfo.type_class == bn.TypeClass.EnumerationTypeClass:
278+
# The underlying type of enum will be an Interger of size
279+
# tinfo.width
219280
ret = EnumType()
220-
cache[str(tinfo)] = ret
281+
cache[cache_key] = ret
282+
ret.set_underlying_type(IntegerType(tinfo.width, False))
221283
return ret
222284

223285
elif tinfo.type_class == bn.TypeClass.BoolTypeClass:
@@ -238,16 +300,18 @@ def _convert_bn_type(tinfo: bn.types.Type, cache):
238300
width = tinfo.width
239301
return FloatingPointType(width)
240302

303+
elif tinfo.type_class == bn.TypeClass.NamedTypeReferenceClass:
304+
ret = _convert_named_type_reference(bv, tinfo, cache)
305+
return ret
306+
241307
elif tinfo.type_class in [
242308
bn.TypeClass.VarArgsTypeClass,
243309
bn.TypeClass.ValueTypeClass,
244-
bn.TypeClass.NamedTypeReferenceClass,
245310
bn.TypeClass.WideCharTypeClass,
246311
]:
247312
err_type_class = {
248313
bn.TypeClass.VarArgsTypeClass : "VarArgsTypeClass",
249314
bn.TypeClass.ValueTypeClass : "ValueTypeClass",
250-
bn.TypeClass.NamedTypeReferenceClass : "NamedTypeReferenceClass",
251315
bn.TypeClass.WideCharTypeClass : "WideCharTypeClass",
252316
}
253317
DEBUG("WARNING: Unhandled type class {}".format(err_type_class[tinfo.type_class]))
@@ -256,7 +320,7 @@ def _convert_bn_type(tinfo: bn.types.Type, cache):
256320
raise UnhandledTypeException("Unhandled type: {}".format(str(tinfo)), tinfo)
257321

258322

259-
def get_type(ty):
323+
def get_type(bv, ty):
260324
"""Type class that gives access to type sizes, printings, etc."""
261325

262326
if isinstance(ty, Type):
@@ -266,7 +330,7 @@ def get_type(ty):
266330
return ty.type()
267331

268332
elif isinstance(ty, bn.Type):
269-
return _convert_bn_type(ty, {})
333+
return _convert_bn_type(bv, ty, {})
270334

271335
if not ty:
272336
return VoidType()
@@ -416,7 +480,7 @@ def _extract_types_mlil(
416480
):
417481
reg_name = bv.arch.get_reg_name(item_or_list.storage)
418482
results.append(
419-
(reg_name, _convert_bn_type(item_or_list.type, {}), None)
483+
(reg_name, _convert_bn_type(bv, item_or_list.type, {}), None)
420484
)
421485
return results
422486

@@ -525,7 +589,8 @@ def get_variable_impl(self, address):
525589

526590
arch = self._arch
527591
bn_var = self._bv.get_data_var_at(address)
528-
var_type = get_type(bn_var.type)
592+
var_type = get_type(self._bv, bn_var.type)
593+
529594
# fall back onto an array of bytes type for variables
530595
# of an unknown (void) type.
531596
if isinstance(var_type, VoidType):
@@ -550,15 +615,15 @@ def get_function_impl(self, address):
550615
"No function defined at or containing address {:x}".format(address)
551616
)
552617

553-
func_type = get_type(bn_func.function_type)
618+
func_type = get_type(self._bv, bn_func.function_type)
554619
calling_conv = CallingConvention(arch, bn_func)
555620

556621
index = 0
557622
param_list = []
558623
for var in bn_func.parameter_vars:
559624
source_type = var.source_type
560625
var_type = var.type
561-
arg_type = get_type(var_type)
626+
arg_type = get_type(self._bv, var_type)
562627

563628
if source_type == bn.VariableSourceType.RegisterVariableSourceType:
564629
if (
@@ -590,7 +655,7 @@ def get_function_impl(self, address):
590655
index += 1
591656

592657
ret_list = []
593-
retTy = get_type(bn_func.return_type)
658+
retTy = get_type(self._bv, bn_func.return_type)
594659
if not isinstance(retTy, VoidType):
595660
for reg in calling_conv.return_regs:
596661
loc = Location()

0 commit comments

Comments
 (0)