diff --git a/.remill_commit_id b/.remill_commit_id index 50ad550a3..e02e319dc 100644 --- a/.remill_commit_id +++ b/.remill_commit_id @@ -1 +1 @@ -99df2e19d4c4af677948a851461e9ffb3c164331 \ No newline at end of file +da2e970a5143aca7977761d7a74f654b42e3d9af \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index e9dcb36ca..9f2314730 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,6 +119,7 @@ add_executable(${MCSEMA_LIFT} mcsema/BC/External.cpp mcsema/BC/Function.cpp mcsema/BC/Instruction.cpp + mcsema/BC/Info.cpp mcsema/BC/Legacy.cpp mcsema/BC/Lift.cpp mcsema/BC/Optimize.cpp diff --git a/mcsema/BC/Function.cpp b/mcsema/BC/Function.cpp index f3881e457..547475864 100644 --- a/mcsema/BC/Function.cpp +++ b/mcsema/BC/Function.cpp @@ -44,6 +44,7 @@ #include "mcsema/Arch/Arch.h" #include "mcsema/BC/Callback.h" #include "mcsema/BC/Instruction.h" +#include "mcsema/BC/Info.h" #include "mcsema/BC/Legacy.h" #include "mcsema/BC/Lift.h" #include "mcsema/BC/Optimize.h" @@ -1452,6 +1453,9 @@ static llvm::Function *LiftFunction(const NativeModule *cfg_module, lifted_func->removeFnAttr(llvm::Attribute::AlwaysInline); lifted_func->removeFnAttr(llvm::Attribute::InlineHint); lifted_func->addFnAttr(llvm::Attribute::NoInline); + + // Annotate the lifted function. + info::Set( { cfg_func->name, cfg_func->ea }, *lifted_func ); if (FLAGS_stack_protector) { lifted_func->addFnAttr(llvm::Attribute::StackProtectReq); diff --git a/mcsema/BC/Info.cpp b/mcsema/BC/Info.cpp new file mode 100644 index 000000000..737d34af3 --- /dev/null +++ b/mcsema/BC/Info.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +// TODO(lukas): Nested declaration once C++17 is available +namespace mcsema { +namespace info { + +void Set(const Info &meta, llvm::Function &func) { + if (meta.ea) + SetMetadata(func, Kinds::ea_kind, std::to_string(*meta.ea)); + if (meta.name) + SetMetadata(func, Kinds::name_kind, *meta.name); +} + +Info Get(llvm::Function &func) { + return { Name(func), EA(func) }; +} + +std::optional Name(llvm::Function &func) { + return GetMetadata(func, Kinds::name_kind); +} + +std::optional EA(llvm::Function &func) { + auto as_str = GetMetadata(func, Kinds::ea_kind); + if (!as_str) { + return {}; + } + return { stoul(*as_str) }; +} + +} // namespace info +} // namespace mcsema + + diff --git a/mcsema/BC/Info.h b/mcsema/BC/Info.h new file mode 100644 index 000000000..761f95cb8 --- /dev/null +++ b/mcsema/BC/Info.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace llvm { +class Function; +} // namespace llvm + +namespace mcsema::info { + +struct Kinds { + // TODO(lukas): std::string_view once c++17 is available + static constexpr char *ea_kind = "bin.ea"; + static constexpr char *name_kind = "bin.name"; +}; + +struct Info { + std::optional name; + std::optional ea; + + template + friend Stream &operator<<(Stream &os, const Info &info) { + if ( info.ea ) { + os << "0x" << std::hex << *info.ea << std::dec; + } else { + os << "(unknown)"; + } + + os << ": " << ((info.name) ? *info.name : "(unknown)") << std::endl; + return os; + } +}; + +void Set(const Info &meta, llvm::Function &func); +Info Get(llvm::Function &func); + +std::optional Name(llvm::Function &func); +std::optional EA(llvm::Function &func); + +} // namespace mcsema::info + + diff --git a/mcsema/BC/Util.cpp b/mcsema/BC/Util.cpp index b7097aca1..6b29f0840 100644 --- a/mcsema/BC/Util.cpp +++ b/mcsema/BC/Util.cpp @@ -222,4 +222,28 @@ llvm::Value *GetTLSBaseAddress(llvm::IRBuilder<> &ir) { return nullptr; } +void SetMetadata(llvm::GlobalObject &go, + const std::string &kind, const std::string &val) { + if (go.getMetadata(kind)) { + LOG(WARNING) << remill::LLVMThingToString(&go) << " already has metadata of kind: " + << kind; + } + auto &ctx = go.getContext(); + auto node = llvm::MDNode::get(ctx, llvm::MDString::get(ctx, val)); + go.setMetadata(kind, node); +} + +MetaValue GetMetadata(llvm::GlobalObject &go, const std::string &kind) { + auto node = go.getMetadata(kind); + if (!node) { + return {}; + } + + CHECK(node->getNumOperands() == 1) + << "util::GetMetada does not support nodes with more than one operand"; + + return { llvm::cast(node->getOperand(0))->getString().str() }; +} + + } // namespace mcsema diff --git a/mcsema/BC/Util.h b/mcsema/BC/Util.h index 6d23252e6..6f996cbf8 100644 --- a/mcsema/BC/Util.h +++ b/mcsema/BC/Util.h @@ -21,10 +21,16 @@ #include #include +#include #include +#include +#include + #include "mcsema/CFG/CFG.h" +#include "remill/BC/Annotate.h" + namespace llvm { class BasicBlock; @@ -43,6 +49,64 @@ struct NativeSegment; extern std::shared_ptr gContext; extern llvm::IntegerType *gWordType; extern std::unique_ptr gModule; + +template +struct LLVMConstants { + + llvm::ConstantInt *i32(int32_t value) { + return GetConstantInt(value, 32); + } + + llvm::ConstantInt *i64(int64_t value) { + return GetConstantInt(value, 64); + } + + llvm::ConstantInt *GetConstantInt(int64_t value, int64_t size) { + return llvm::ConstantInt::get( + llvm::Type::getIntNTy(static_cast(*this).context, size), value); + } + + llvm::Type *i64_t() { + return llvm::Type::getInt64Ty(static_cast(*this).context); + } + + llvm::Type *i64_ptr_t() { + return llvm::Type::getInt64PtrTy(static_cast(*this).context); + } + + llvm::Type *i_n_ptr_t(uint64_t size) { + return llvm::Type::getIntNPtrTy(static_cast(*this).context, size); + } + + llvm::Type *i8_t() { + return llvm::Type::getInt8Ty(static_cast(*this).context); + } + + llvm::Type *i8_ptr_t() { + return llvm::Type::getInt8PtrTy(static_cast(*this).context); + } + + llvm::Type *i_n_ty(uint64_t size) { + return llvm::Type::getIntNTy(static_cast(*this).context, size); + } + + llvm::Value *undef(llvm::Type *type) { + return llvm::UndefValue::get(type); + } + + llvm::Type *ptr(llvm::Type *type, unsigned addr_space=0) { + return llvm::PointerType::get(type, addr_space); + } + +}; + +template +struct ModuleUtil { + llvm::Function &function(const std::string &name) { + return *static_cast(*this).module.getFunction(name); + } +}; + extern llvm::Constant *gZero; extern uint64_t gWordMask; @@ -55,6 +119,19 @@ llvm::Constant *LiftXrefInCode(uint64_t ea); llvm::Constant *LiftXrefInData(const NativeSegment *cfg_seg, uint64_t ea, bool cast_to_int = true); +template +void ForEachLifted(llvm::Module &_module, Yield yield) { + using funcs = std::vector; + for (auto f : remill::GetFunctionsByOrigin(_module)) { + yield(f); + } +} + +using MetaValue = std::optional; + +void SetMetadata(llvm::GlobalObject &go, const std::string &kind, const std::string &val); +MetaValue GetMetadata(llvm::GlobalObject &go, const std::string &kind); + // Create a global register state pointer to pass to lifted functions. llvm::Constant *GetStatePointer(void);