Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions bolt/include/bolt/Core/MCInstUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define BOLT_CORE_MCINSTUTILS_H

#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/MCPlus.h"
#include <map>
#include <variant>

Expand Down Expand Up @@ -175,6 +176,171 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
return Ref.print(OS);
}

/// Instruction-matching helpers operating on a single instruction at a time.
///
/// The idea is to make low-level instruction matching as readable as possible.
/// The classes contained in this namespace are intended to be used as a
/// domain-specific language to match MCInst with the particular opcode and
/// operands.
///
/// The goals of this DSL include
/// * matching a single instruction against the template consisting of the
/// particular target-specific opcode and a pattern of operands
/// * matching operands against the known values (such as 42, AArch64::X1 or
/// "the value of --brk-operand=N command line argument")
/// * capturing operands of an instruction ("whatever is the destination
/// register of AArch64::ADDXri instruction, store it to Xd variable to be
/// queried later")
/// * expressing repeated operands of a single matched instruction (such as
/// "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xd) as well as across
/// multiple calls to matchInst(), which is naturally achieved by sequentially
/// capturing the operands and matching operands against the known values
/// * matching multi-instruction code patterns by sequentially calling
/// matchInst() while passing around already matched operands
///
/// The non-goals (compared to MCPlusBuilder::MCInstMatcher) include
/// * matching an arbitrary tree of instructions in a single matchInst() call
/// * encapsulation of target-specific knowledge ("match an increment of Xm
/// by 42")
///
/// Unlike MCPlusBuilder::MCInstMatcher, this DSL focuses on the use cases when
/// the precise control over the instruction order is important. For example,
/// let's consider a target-specific function that has to match two particular
/// instructions against this pattern (for two different registers Xm and Xn)
///
/// ADDXrs Xm, Xn, Xm, #0
/// BR Xm
///
/// and return the register holding the branch target. Assuming the instructions
/// are available as MaybeAdd and MaybeBr, the following code can be used:
///
/// // Bring the short names into the local scope:
/// using namespace LowLevelInstMatcherDSL;
/// // Declare the registers to capture:
/// Reg Xn, Xm;
/// // Capture the 0th and 1st operands, match the 2nd operand against the
/// // just captured Xm register, match the 3rd operand against literal 0:
/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
/// return AArch64::NoRegister;
/// // Match the 0th operand against Xm:
/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
/// return AArch64::NoRegister;
/// // Manually check that Xm and Xn did not match the same register:
/// if (Xm.get() == Xn.get())
/// return AArch64::NoRegister;
/// // Return the matched register:
/// return Xm.get();
///
namespace LowLevelInstMatcherDSL {

// The base class to match an operand of type T.
//
// The subclasses of OpMatcher are intended to be allocated on the stack and
// to only be used by passing them to matchInst() and by calling their get()
// function, thus the peculiar `mutable` specifiers: to make the calling code
// compact and readable, the templated matchInst() function has to accept both
// long-lived Imm/Reg wrappers declared as local variables (intended to capture
// the first operand's value and match the subsequent operands, whether inside
// a single instruction or across multiple instructions), as well as temporary
// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
template <typename T> class OpMatcher {
mutable std::optional<T> Value;
mutable std::optional<T> SavedValue;

// Remember/restore the last Value - to be called by matchInst.
void remember() const { SavedValue = Value; }
void restore() const { Value = SavedValue; }

template <class... OpMatchers>
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);

protected:
OpMatcher(std::optional<T> ValueToMatch) : Value(ValueToMatch) {}

bool matchValue(T OpValue) const {
// Check that OpValue does not contradict the existing Value.
bool MatchResult = !Value || *Value == OpValue;
// If MatchResult is false, all matchers will be reset before returning from
// matchInst, including this one, thus no need to assign conditionally.
Value = OpValue;

return MatchResult;
}

public:
/// Returns the captured value.
T get() const {
assert(Value.has_value());
return *Value;
}
};

class Reg : public OpMatcher<MCPhysReg> {
bool matches(const MCOperand &Op) const {
if (!Op.isReg())
return false;

return matchValue(Op.getReg());
}

template <class... OpMatchers>
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);

public:
Reg(std::optional<MCPhysReg> RegToMatch = std::nullopt)
: OpMatcher<MCPhysReg>(RegToMatch) {}
};

class Imm : public OpMatcher<int64_t> {
bool matches(const MCOperand &Op) const {
if (!Op.isImm())
return false;

return matchValue(Op.getImm());
}

template <class... OpMatchers>
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);

public:
Imm(std::optional<int64_t> ImmToMatch = std::nullopt)
: OpMatcher<int64_t>(ImmToMatch) {}
};

/// Tries to match Inst and updates Ops on success.
///
/// If Inst has the specified Opcode and its operand list prefix matches Ops,
/// this function returns true and updates Ops, otherwise false is returned and
/// values of Ops are kept as before matchInst was called.
///
/// Please note that while Ops are technically passed by a const reference to
/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
/// fields are marked mutable.
template <class... OpMatchers>
bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) {
if (Inst.getOpcode() != Opcode)
return false;
assert(sizeof...(Ops) <= MCPlus::getNumPrimeOperands(Inst) &&
"Too many operands are matched for the Opcode");

// Ask each matcher to remember its current value in case of rollback.
(Ops.remember(), ...);

// Check if all matchers match the corresponding operands.
auto It = Inst.begin();
auto AllMatched = (Ops.matches(*(It++)) && ... && true);

// If match failed, restore the original captured values.
if (!AllMatched) {
(Ops.restore(), ...);
return false;
}

return true;
}

} // namespace LowLevelInstMatcherDSL

} // namespace bolt
} // namespace llvm

Expand Down
91 changes: 35 additions & 56 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "Utils/AArch64BaseInfo.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCInstUtils.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
Expand Down Expand Up @@ -401,81 +402,59 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {

// Iterate over the instructions of BB in reverse order, matching opcodes
// and operands.
MCPhysReg TestedReg = 0;
MCPhysReg ScratchReg = 0;

auto It = BB.end();
auto StepAndGetOpcode = [&It, &BB]() -> int {
if (It == BB.begin())
return -1;
--It;
return It->getOpcode();
auto StepBack = [&]() {
while (It != BB.begin()) {
--It;
// Skip any CFI instructions, but no other pseudos are expected here.
if (!isCFI(*It))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While virtually identical, you likely want isPseudo().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intention was to control the instruction sequence as much as possible in ptrauth-related methods. While CFI pseudos were known to occur in the middle of the sequence (and they don't add any real instructions to the sequence being analyzed), I don't expect any other pseudo instructions to occur here. As I'm not absolutely sure any pseudo instruction must expand into exactly zero bytes of code (this is obviously not the case in the backend in general, but it is probably the case in the disassembled code), I kept isCFI so far and added a comment. Please feel free to correct me if I'm wrong.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks for the context. In BOLT, at the moment CFIs are the only pseudo instructions widely used. I believe we have an assumption that pseudos don't directly produce any code.

return true;
}
return false;
};

switch (StepAndGetOpcode()) {
default:
// Not matched the branch instruction.
// Step to the last non-CFI instruction.
if (!StepBack())
return std::nullopt;
case AArch64::Bcc:
// Bcc EQ, .Lon_success
if (It->getOperand(0).getImm() != AArch64CC::EQ)
return std::nullopt;
// Not checking .Lon_success (see above).

// SUBSXrs XZR, TestedReg, ScratchReg, 0 (used by "CMP reg, reg" alias)
if (StepAndGetOpcode() != AArch64::SUBSXrs ||
It->getOperand(0).getReg() != AArch64::XZR ||
It->getOperand(3).getImm() != 0)
using namespace llvm::bolt::LowLevelInstMatcherDSL;
Reg TestedReg;
Reg ScratchReg;

if (matchInst(*It, AArch64::Bcc, Imm(AArch64CC::EQ) /*, .Lon_success*/)) {
if (!StepBack() || !matchInst(*It, AArch64::SUBSXrs, Reg(AArch64::XZR),
TestedReg, ScratchReg, Imm(0)))
return std::nullopt;
TestedReg = It->getOperand(1).getReg();
ScratchReg = It->getOperand(2).getReg();

// Either XPAC(I|D) ScratchReg, ScratchReg
// or XPACLRI
switch (StepAndGetOpcode()) {
default:
if (!StepBack())
return std::nullopt;
case AArch64::XPACLRI:
if (matchInst(*It, AArch64::XPACLRI)) {
// No operands to check, but using XPACLRI forces TestedReg to be X30.
if (TestedReg != AArch64::LR)
return std::nullopt;
break;
case AArch64::XPACI:
case AArch64::XPACD:
if (It->getOperand(0).getReg() != ScratchReg ||
It->getOperand(1).getReg() != ScratchReg)
if (TestedReg.get() != AArch64::LR)
return std::nullopt;
break;
} else if (!matchInst(*It, AArch64::XPACI, ScratchReg, ScratchReg) &&
!matchInst(*It, AArch64::XPACD, ScratchReg, ScratchReg)) {
return std::nullopt;
}

// ORRXrs ScratchReg, XZR, TestedReg, 0 (used by "MOV reg, reg" alias)
if (StepAndGetOpcode() != AArch64::ORRXrs)
if (!StepBack() || !matchInst(*It, AArch64::ORRXrs, ScratchReg,
Reg(AArch64::XZR), TestedReg, Imm(0)))
return std::nullopt;
if (It->getOperand(0).getReg() != ScratchReg ||
It->getOperand(1).getReg() != AArch64::XZR ||
It->getOperand(2).getReg() != TestedReg ||
It->getOperand(3).getImm() != 0)
return std::nullopt;

return std::make_pair(TestedReg, &*It);

case AArch64::TBZX:
// TBZX ScratchReg, 62, .Lon_success
ScratchReg = It->getOperand(0).getReg();
if (It->getOperand(1).getImm() != 62)
return std::nullopt;
// Not checking .Lon_success (see above).
return std::make_pair(TestedReg.get(), &*It);
}

// EORXrs ScratchReg, TestedReg, TestedReg, 1
if (StepAndGetOpcode() != AArch64::EORXrs)
return std::nullopt;
TestedReg = It->getOperand(1).getReg();
if (It->getOperand(0).getReg() != ScratchReg ||
It->getOperand(2).getReg() != TestedReg ||
It->getOperand(3).getImm() != 1)
if (matchInst(*It, AArch64::TBZX, ScratchReg, Imm(62) /*, .Lon_success*/)) {
if (!StepBack() || !matchInst(*It, AArch64::EORXrs, ScratchReg, TestedReg,
TestedReg, Imm(1)))
return std::nullopt;

return std::make_pair(TestedReg, &*It);
return std::make_pair(TestedReg.get(), &*It);
}

return std::nullopt;
}

std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,
Expand Down
Loading