|
| 1 | +//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#ifndef BOLT_CORE_MCINSTUTILS_H |
| 10 | +#define BOLT_CORE_MCINSTUTILS_H |
| 11 | + |
| 12 | +#include "bolt/Core/BinaryBasicBlock.h" |
| 13 | +#include "bolt/Core/MCPlus.h" |
| 14 | +#include <map> |
| 15 | +#include <variant> |
| 16 | + |
| 17 | +namespace llvm { |
| 18 | +class MCCodeEmitter; |
| 19 | +} |
| 20 | + |
| 21 | +namespace llvm { |
| 22 | +namespace bolt { |
| 23 | + |
| 24 | +class BinaryFunction; |
| 25 | + |
| 26 | +/// MCInstReference represents a reference to a constant MCInst as stored either |
| 27 | +/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock |
| 28 | +/// (after a CFG is created). |
| 29 | +/// |
| 30 | +/// The reference may be invalidated when the function containing the referenced |
| 31 | +/// instruction is modified. |
| 32 | +class MCInstReference { |
| 33 | +public: |
| 34 | + using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator; |
| 35 | + |
| 36 | + /// Constructs an empty reference. |
| 37 | + MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {} |
| 38 | + |
| 39 | + /// Constructs a reference to the instruction inside the basic block. |
| 40 | + MCInstReference(const BinaryBasicBlock &BB, const MCInst &Inst) |
| 41 | + : Reference(RefInBB(&BB, getInstIndexInBB(BB, Inst))) {} |
| 42 | + /// Constructs a reference to the instruction inside the basic block. |
| 43 | + MCInstReference(const BinaryBasicBlock &BB, unsigned Index) |
| 44 | + : Reference(RefInBB(&BB, Index)) {} |
| 45 | + |
| 46 | + /// Constructs a reference to the instruction inside the function without |
| 47 | + /// CFG information. |
| 48 | + MCInstReference(const BinaryFunction &BF, nocfg_const_iterator It) |
| 49 | + : Reference(RefInBF(&BF, It)) {} |
| 50 | + |
| 51 | + /// Locates an instruction inside a function and returns a reference. |
| 52 | + static MCInstReference get(const MCInst &Inst, const BinaryFunction &BF); |
| 53 | + |
| 54 | + bool operator==(const MCInstReference &Other) const { |
| 55 | + return Reference == Other.Reference; |
| 56 | + } |
| 57 | + |
| 58 | + const MCInst &getMCInst() const { |
| 59 | + assert(!empty() && "Empty reference"); |
| 60 | + if (auto *Ref = tryGetRefInBB()) { |
| 61 | + [[maybe_unused]] unsigned NumInstructions = Ref->BB->size(); |
| 62 | + assert(Ref->Index < NumInstructions && "Invalid reference"); |
| 63 | + return Ref->BB->getInstructionAtIndex(Ref->Index); |
| 64 | + } |
| 65 | + return getRefInBF().It->second; |
| 66 | + } |
| 67 | + |
| 68 | + operator const MCInst &() const { return getMCInst(); } |
| 69 | + |
| 70 | + bool empty() const { |
| 71 | + if (auto *Ref = tryGetRefInBB()) |
| 72 | + return Ref->BB == nullptr; |
| 73 | + return getRefInBF().BF == nullptr; |
| 74 | + } |
| 75 | + |
| 76 | + bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; } |
| 77 | + |
| 78 | + const BinaryFunction *getFunction() const { |
| 79 | + assert(!empty() && "Empty reference"); |
| 80 | + if (auto *Ref = tryGetRefInBB()) |
| 81 | + return Ref->BB->getFunction(); |
| 82 | + return getRefInBF().BF; |
| 83 | + } |
| 84 | + |
| 85 | + const BinaryBasicBlock *getBasicBlock() const { |
| 86 | + assert(!empty() && "Empty reference"); |
| 87 | + if (auto *Ref = tryGetRefInBB()) |
| 88 | + return Ref->BB; |
| 89 | + return nullptr; |
| 90 | + } |
| 91 | + |
| 92 | + /// Computes the original address of the instruction (or offset from base |
| 93 | + /// for PIC), assuming the containing function was not modified. |
| 94 | + /// |
| 95 | + /// This function is intended for the use cases like debug printing, as it |
| 96 | + /// is only as precise as BinaryContext::computeCodeSize() is and requires |
| 97 | + /// iterating over the prefix of the basic block (when CFG is available). |
| 98 | + /// |
| 99 | + /// MCCodeEmitter is not thread safe and the default instance from |
| 100 | + /// BinaryContext is used by default, thus pass an instance explicitly if |
| 101 | + /// this function may be called from multithreaded code. |
| 102 | + uint64_t computeAddress(const MCCodeEmitter *Emitter = nullptr) const; |
| 103 | + |
| 104 | + raw_ostream &print(raw_ostream &OS) const; |
| 105 | + |
| 106 | +private: |
| 107 | + static unsigned getInstIndexInBB(const BinaryBasicBlock &BB, |
| 108 | + const MCInst &Inst) { |
| 109 | + // Usage of pointer arithmetic assumes the instructions are stored in a |
| 110 | + // vector, see BasicBlockStorageIsVector in MCInstUtils.cpp. |
| 111 | + const MCInst *FirstInstInBB = &*BB.begin(); |
| 112 | + return &Inst - FirstInstInBB; |
| 113 | + } |
| 114 | + |
| 115 | + // Two cases are possible: |
| 116 | + // * functions with CFG reconstructed - a function stores a collection of |
| 117 | + // basic blocks, each basic block stores a contiguous vector of MCInst |
| 118 | + // * functions without CFG - there are no basic blocks created, |
| 119 | + // the instructions are directly stored in std::map in BinaryFunction |
| 120 | + // |
| 121 | + // In both cases, the direct parent of MCInst is stored together with an |
| 122 | + // index or iterator pointing to the instruction. |
| 123 | + |
| 124 | + // Helper struct: CFG is available, the direct parent is a basic block. |
| 125 | + struct RefInBB { |
| 126 | + RefInBB(const BinaryBasicBlock *BB, unsigned Index) |
| 127 | + : BB(BB), Index(Index) {} |
| 128 | + RefInBB(const RefInBB &Other) = default; |
| 129 | + RefInBB &operator=(const RefInBB &Other) = default; |
| 130 | + |
| 131 | + const BinaryBasicBlock *BB; |
| 132 | + unsigned Index; |
| 133 | + |
| 134 | + bool operator==(const RefInBB &Other) const { |
| 135 | + return BB == Other.BB && Index == Other.Index; |
| 136 | + } |
| 137 | + }; |
| 138 | + |
| 139 | + // Helper struct: CFG is *not* available, the direct parent is a function, |
| 140 | + // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value |
| 141 | + // is an instruction's offset). |
| 142 | + struct RefInBF { |
| 143 | + RefInBF(const BinaryFunction *BF, nocfg_const_iterator It) |
| 144 | + : BF(BF), It(It) {} |
| 145 | + RefInBF(const RefInBF &Other) = default; |
| 146 | + RefInBF &operator=(const RefInBF &Other) = default; |
| 147 | + |
| 148 | + const BinaryFunction *BF; |
| 149 | + nocfg_const_iterator It; |
| 150 | + |
| 151 | + bool operator==(const RefInBF &Other) const { |
| 152 | + return BF == Other.BF && It->first == Other.It->first; |
| 153 | + } |
| 154 | + }; |
| 155 | + |
| 156 | + std::variant<RefInBB, RefInBF> Reference; |
| 157 | + |
| 158 | + // Utility methods to be used like this: |
| 159 | + // |
| 160 | + // if (auto *Ref = tryGetRefInBB()) |
| 161 | + // return Ref->doSomething(...); |
| 162 | + // return getRefInBF().doSomethingElse(...); |
| 163 | + const RefInBB *tryGetRefInBB() const { |
| 164 | + assert(std::get_if<RefInBB>(&Reference) || |
| 165 | + std::get_if<RefInBF>(&Reference)); |
| 166 | + return std::get_if<RefInBB>(&Reference); |
| 167 | + } |
| 168 | + const RefInBF &getRefInBF() const { |
| 169 | + assert(std::get_if<RefInBF>(&Reference)); |
| 170 | + return *std::get_if<RefInBF>(&Reference); |
| 171 | + } |
| 172 | +}; |
| 173 | + |
| 174 | +static inline raw_ostream &operator<<(raw_ostream &OS, |
| 175 | + const MCInstReference &Ref) { |
| 176 | + return Ref.print(OS); |
| 177 | +} |
| 178 | + |
| 179 | +/// Instruction-matching helpers operating on a single instruction at a time. |
| 180 | +/// |
| 181 | +/// The idea is to make low-level instruction matching as readable as possible. |
| 182 | +/// The classes contained in this namespace are intended to be used as a |
| 183 | +/// domain-specific language to match MCInst with the particular opcode and |
| 184 | +/// operands. |
| 185 | +/// |
| 186 | +/// The goals of this DSL include |
| 187 | +/// * matching a single instruction against the template consisting of the |
| 188 | +/// particular target-specific opcode and a pattern of operands |
| 189 | +/// * matching operands against the known values (such as 42, AArch64::X1 or |
| 190 | +/// "the value of --brk-operand=N command line argument") |
| 191 | +/// * capturing operands of an instruction ("whatever is the destination |
| 192 | +/// register of AArch64::ADDXri instruction, store it to Xd variable to be |
| 193 | +/// queried later") |
| 194 | +/// * expressing repeated operands of a single matched instruction (such as |
| 195 | +/// "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xd) as well as across |
| 196 | +/// multiple calls to matchInst(), which is naturally achieved by sequentially |
| 197 | +/// capturing the operands and matching operands against the known values |
| 198 | +/// * matching multi-instruction code patterns by sequentially calling |
| 199 | +/// matchInst() while passing around already matched operands |
| 200 | +/// |
| 201 | +/// The non-goals (compared to MCPlusBuilder::MCInstMatcher) include |
| 202 | +/// * matching an arbitrary tree of instructions in a single matchInst() call |
| 203 | +/// * encapsulation of target-specific knowledge ("match an increment of Xm |
| 204 | +/// by 42") |
| 205 | +/// |
| 206 | +/// Unlike MCPlusBuilder::MCInstMatcher, this DSL focuses on the use cases when |
| 207 | +/// the precise control over the instruction order is important. For example, |
| 208 | +/// let's consider a target-specific function that has to match two particular |
| 209 | +/// instructions against this pattern (for two different registers Xm and Xn) |
| 210 | +/// |
| 211 | +/// ADDXrs Xm, Xn, Xm, #0 |
| 212 | +/// BR Xm |
| 213 | +/// |
| 214 | +/// and return the register holding the branch target. Assuming the instructions |
| 215 | +/// are available as MaybeAdd and MaybeBr, the following code can be used: |
| 216 | +/// |
| 217 | +/// // Bring the short names into the local scope: |
| 218 | +/// using namespace LowLevelInstMatcherDSL; |
| 219 | +/// // Declare the registers to capture: |
| 220 | +/// Reg Xn, Xm; |
| 221 | +/// // Capture the 0th and 1st operands, match the 2nd operand against the |
| 222 | +/// // just captured Xm register, match the 3rd operand against literal 0: |
| 223 | +/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0)) |
| 224 | +/// return AArch64::NoRegister; |
| 225 | +/// // Match the 0th operand against Xm: |
| 226 | +/// if (!matchInst(MaybeBr, AArch64::BR, Xm)) |
| 227 | +/// return AArch64::NoRegister; |
| 228 | +/// // Manually check that Xm and Xn did not match the same register: |
| 229 | +/// if (Xm.get() == Xn.get()) |
| 230 | +/// return AArch64::NoRegister; |
| 231 | +/// // Return the matched register: |
| 232 | +/// return Xm.get(); |
| 233 | +/// |
| 234 | +namespace LowLevelInstMatcherDSL { |
| 235 | + |
| 236 | +// The base class to match an operand of type T. |
| 237 | +// |
| 238 | +// The subclasses of OpMatcher are intended to be allocated on the stack and |
| 239 | +// to only be used by passing them to matchInst() and by calling their get() |
| 240 | +// function, thus the peculiar `mutable` specifiers: to make the calling code |
| 241 | +// compact and readable, the templated matchInst() function has to accept both |
| 242 | +// long-lived Imm/Reg wrappers declared as local variables (intended to capture |
| 243 | +// the first operand's value and match the subsequent operands, whether inside |
| 244 | +// a single instruction or across multiple instructions), as well as temporary |
| 245 | +// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR). |
| 246 | +template <typename T> class OpMatcher { |
| 247 | + mutable std::optional<T> Value; |
| 248 | + mutable std::optional<T> SavedValue; |
| 249 | + |
| 250 | + // Remember/restore the last Value - to be called by matchInst. |
| 251 | + void remember() const { SavedValue = Value; } |
| 252 | + void restore() const { Value = SavedValue; } |
| 253 | + |
| 254 | + template <class... OpMatchers> |
| 255 | + friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...); |
| 256 | + |
| 257 | +protected: |
| 258 | + OpMatcher(std::optional<T> ValueToMatch) : Value(ValueToMatch) {} |
| 259 | + |
| 260 | + bool matchValue(T OpValue) const { |
| 261 | + // Check that OpValue does not contradict the existing Value. |
| 262 | + bool MatchResult = !Value || *Value == OpValue; |
| 263 | + // If MatchResult is false, all matchers will be reset before returning from |
| 264 | + // matchInst, including this one, thus no need to assign conditionally. |
| 265 | + Value = OpValue; |
| 266 | + |
| 267 | + return MatchResult; |
| 268 | + } |
| 269 | + |
| 270 | +public: |
| 271 | + /// Returns the captured value. |
| 272 | + T get() const { |
| 273 | + assert(Value.has_value()); |
| 274 | + return *Value; |
| 275 | + } |
| 276 | +}; |
| 277 | + |
| 278 | +class Reg : public OpMatcher<MCPhysReg> { |
| 279 | + bool matches(const MCOperand &Op) const { |
| 280 | + if (!Op.isReg()) |
| 281 | + return false; |
| 282 | + |
| 283 | + return matchValue(Op.getReg()); |
| 284 | + } |
| 285 | + |
| 286 | + template <class... OpMatchers> |
| 287 | + friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...); |
| 288 | + |
| 289 | +public: |
| 290 | + Reg(std::optional<MCPhysReg> RegToMatch = std::nullopt) |
| 291 | + : OpMatcher<MCPhysReg>(RegToMatch) {} |
| 292 | +}; |
| 293 | + |
| 294 | +class Imm : public OpMatcher<int64_t> { |
| 295 | + bool matches(const MCOperand &Op) const { |
| 296 | + if (!Op.isImm()) |
| 297 | + return false; |
| 298 | + |
| 299 | + return matchValue(Op.getImm()); |
| 300 | + } |
| 301 | + |
| 302 | + template <class... OpMatchers> |
| 303 | + friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...); |
| 304 | + |
| 305 | +public: |
| 306 | + Imm(std::optional<int64_t> ImmToMatch = std::nullopt) |
| 307 | + : OpMatcher<int64_t>(ImmToMatch) {} |
| 308 | +}; |
| 309 | + |
| 310 | +/// Tries to match Inst and updates Ops on success. |
| 311 | +/// |
| 312 | +/// If Inst has the specified Opcode and its operand list prefix matches Ops, |
| 313 | +/// this function returns true and updates Ops, otherwise false is returned and |
| 314 | +/// values of Ops are kept as before matchInst was called. |
| 315 | +/// |
| 316 | +/// Please note that while Ops are technically passed by a const reference to |
| 317 | +/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their |
| 318 | +/// fields are marked mutable. |
| 319 | +template <class... OpMatchers> |
| 320 | +bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) { |
| 321 | + if (Inst.getOpcode() != Opcode) |
| 322 | + return false; |
| 323 | + assert(sizeof...(Ops) <= MCPlus::getNumPrimeOperands(Inst) && |
| 324 | + "Too many operands are matched for the Opcode"); |
| 325 | + |
| 326 | + // Ask each matcher to remember its current value in case of rollback. |
| 327 | + (Ops.remember(), ...); |
| 328 | + |
| 329 | + // Check if all matchers match the corresponding operands. |
| 330 | + auto It = Inst.begin(); |
| 331 | + auto AllMatched = (Ops.matches(*(It++)) && ... && true); |
| 332 | + |
| 333 | + // If match failed, restore the original captured values. |
| 334 | + if (!AllMatched) { |
| 335 | + (Ops.restore(), ...); |
| 336 | + return false; |
| 337 | + } |
| 338 | + |
| 339 | + return true; |
| 340 | +} |
| 341 | + |
| 342 | +} // namespace LowLevelInstMatcherDSL |
| 343 | + |
| 344 | +} // namespace bolt |
| 345 | +} // namespace llvm |
| 346 | + |
| 347 | +#endif |
0 commit comments