Skip to content

Commit fdb9bfb

Browse files
committed
Merge remote-tracking branch 'llvm_be_very_careful/main' into corentin/use_normalization_for_satisfaction
2 parents cde4bb7 + 7ccb5c0 commit fdb9bfb

File tree

1,629 files changed

+63877
-42526
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,629 files changed

+63877
-42526
lines changed

.github/workflows/libcxx-build-and-test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ jobs:
215215
- uses: maxim-lobanov/setup-xcode@60606e260d2fc5762a71e64e74b2174e8ea3c8bd # v1.6.0
216216
with:
217217
# https://github.com/actions/runner-images/blob/main/images/macos/macos-15-Readme.md
218-
xcode-version: '16.3'
218+
xcode-version: '26.0'
219219
- uses: seanmiddleditch/gha-setup-ninja@3b1f8f94a2f8254bd26914c4ab9474d4f0015f67 # v6
220220
- name: Build and test
221221
run: |
Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef BOLT_CORE_MCINSTUTILS_H
10+
#define BOLT_CORE_MCINSTUTILS_H
11+
12+
#include "bolt/Core/BinaryBasicBlock.h"
13+
#include "bolt/Core/MCPlus.h"
14+
#include <map>
15+
#include <variant>
16+
17+
namespace llvm {
18+
class MCCodeEmitter;
19+
}
20+
21+
namespace llvm {
22+
namespace bolt {
23+
24+
class BinaryFunction;
25+
26+
/// MCInstReference represents a reference to a constant MCInst as stored either
27+
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
28+
/// (after a CFG is created).
29+
///
30+
/// The reference may be invalidated when the function containing the referenced
31+
/// instruction is modified.
32+
class MCInstReference {
33+
public:
34+
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
35+
36+
/// Constructs an empty reference.
37+
MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {}
38+
39+
/// Constructs a reference to the instruction inside the basic block.
40+
MCInstReference(const BinaryBasicBlock &BB, const MCInst &Inst)
41+
: Reference(RefInBB(&BB, getInstIndexInBB(BB, Inst))) {}
42+
/// Constructs a reference to the instruction inside the basic block.
43+
MCInstReference(const BinaryBasicBlock &BB, unsigned Index)
44+
: Reference(RefInBB(&BB, Index)) {}
45+
46+
/// Constructs a reference to the instruction inside the function without
47+
/// CFG information.
48+
MCInstReference(const BinaryFunction &BF, nocfg_const_iterator It)
49+
: Reference(RefInBF(&BF, It)) {}
50+
51+
/// Locates an instruction inside a function and returns a reference.
52+
static MCInstReference get(const MCInst &Inst, const BinaryFunction &BF);
53+
54+
bool operator==(const MCInstReference &Other) const {
55+
return Reference == Other.Reference;
56+
}
57+
58+
const MCInst &getMCInst() const {
59+
assert(!empty() && "Empty reference");
60+
if (auto *Ref = tryGetRefInBB()) {
61+
[[maybe_unused]] unsigned NumInstructions = Ref->BB->size();
62+
assert(Ref->Index < NumInstructions && "Invalid reference");
63+
return Ref->BB->getInstructionAtIndex(Ref->Index);
64+
}
65+
return getRefInBF().It->second;
66+
}
67+
68+
operator const MCInst &() const { return getMCInst(); }
69+
70+
bool empty() const {
71+
if (auto *Ref = tryGetRefInBB())
72+
return Ref->BB == nullptr;
73+
return getRefInBF().BF == nullptr;
74+
}
75+
76+
bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; }
77+
78+
const BinaryFunction *getFunction() const {
79+
assert(!empty() && "Empty reference");
80+
if (auto *Ref = tryGetRefInBB())
81+
return Ref->BB->getFunction();
82+
return getRefInBF().BF;
83+
}
84+
85+
const BinaryBasicBlock *getBasicBlock() const {
86+
assert(!empty() && "Empty reference");
87+
if (auto *Ref = tryGetRefInBB())
88+
return Ref->BB;
89+
return nullptr;
90+
}
91+
92+
/// Computes the original address of the instruction (or offset from base
93+
/// for PIC), assuming the containing function was not modified.
94+
///
95+
/// This function is intended for the use cases like debug printing, as it
96+
/// is only as precise as BinaryContext::computeCodeSize() is and requires
97+
/// iterating over the prefix of the basic block (when CFG is available).
98+
///
99+
/// MCCodeEmitter is not thread safe and the default instance from
100+
/// BinaryContext is used by default, thus pass an instance explicitly if
101+
/// this function may be called from multithreaded code.
102+
uint64_t computeAddress(const MCCodeEmitter *Emitter = nullptr) const;
103+
104+
raw_ostream &print(raw_ostream &OS) const;
105+
106+
private:
107+
static unsigned getInstIndexInBB(const BinaryBasicBlock &BB,
108+
const MCInst &Inst) {
109+
// Usage of pointer arithmetic assumes the instructions are stored in a
110+
// vector, see BasicBlockStorageIsVector in MCInstUtils.cpp.
111+
const MCInst *FirstInstInBB = &*BB.begin();
112+
return &Inst - FirstInstInBB;
113+
}
114+
115+
// Two cases are possible:
116+
// * functions with CFG reconstructed - a function stores a collection of
117+
// basic blocks, each basic block stores a contiguous vector of MCInst
118+
// * functions without CFG - there are no basic blocks created,
119+
// the instructions are directly stored in std::map in BinaryFunction
120+
//
121+
// In both cases, the direct parent of MCInst is stored together with an
122+
// index or iterator pointing to the instruction.
123+
124+
// Helper struct: CFG is available, the direct parent is a basic block.
125+
struct RefInBB {
126+
RefInBB(const BinaryBasicBlock *BB, unsigned Index)
127+
: BB(BB), Index(Index) {}
128+
RefInBB(const RefInBB &Other) = default;
129+
RefInBB &operator=(const RefInBB &Other) = default;
130+
131+
const BinaryBasicBlock *BB;
132+
unsigned Index;
133+
134+
bool operator==(const RefInBB &Other) const {
135+
return BB == Other.BB && Index == Other.Index;
136+
}
137+
};
138+
139+
// Helper struct: CFG is *not* available, the direct parent is a function,
140+
// iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
141+
// is an instruction's offset).
142+
struct RefInBF {
143+
RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
144+
: BF(BF), It(It) {}
145+
RefInBF(const RefInBF &Other) = default;
146+
RefInBF &operator=(const RefInBF &Other) = default;
147+
148+
const BinaryFunction *BF;
149+
nocfg_const_iterator It;
150+
151+
bool operator==(const RefInBF &Other) const {
152+
return BF == Other.BF && It->first == Other.It->first;
153+
}
154+
};
155+
156+
std::variant<RefInBB, RefInBF> Reference;
157+
158+
// Utility methods to be used like this:
159+
//
160+
// if (auto *Ref = tryGetRefInBB())
161+
// return Ref->doSomething(...);
162+
// return getRefInBF().doSomethingElse(...);
163+
const RefInBB *tryGetRefInBB() const {
164+
assert(std::get_if<RefInBB>(&Reference) ||
165+
std::get_if<RefInBF>(&Reference));
166+
return std::get_if<RefInBB>(&Reference);
167+
}
168+
const RefInBF &getRefInBF() const {
169+
assert(std::get_if<RefInBF>(&Reference));
170+
return *std::get_if<RefInBF>(&Reference);
171+
}
172+
};
173+
174+
static inline raw_ostream &operator<<(raw_ostream &OS,
175+
const MCInstReference &Ref) {
176+
return Ref.print(OS);
177+
}
178+
179+
/// Instruction-matching helpers operating on a single instruction at a time.
180+
///
181+
/// The idea is to make low-level instruction matching as readable as possible.
182+
/// The classes contained in this namespace are intended to be used as a
183+
/// domain-specific language to match MCInst with the particular opcode and
184+
/// operands.
185+
///
186+
/// The goals of this DSL include
187+
/// * matching a single instruction against the template consisting of the
188+
/// particular target-specific opcode and a pattern of operands
189+
/// * matching operands against the known values (such as 42, AArch64::X1 or
190+
/// "the value of --brk-operand=N command line argument")
191+
/// * capturing operands of an instruction ("whatever is the destination
192+
/// register of AArch64::ADDXri instruction, store it to Xd variable to be
193+
/// queried later")
194+
/// * expressing repeated operands of a single matched instruction (such as
195+
/// "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xd) as well as across
196+
/// multiple calls to matchInst(), which is naturally achieved by sequentially
197+
/// capturing the operands and matching operands against the known values
198+
/// * matching multi-instruction code patterns by sequentially calling
199+
/// matchInst() while passing around already matched operands
200+
///
201+
/// The non-goals (compared to MCPlusBuilder::MCInstMatcher) include
202+
/// * matching an arbitrary tree of instructions in a single matchInst() call
203+
/// * encapsulation of target-specific knowledge ("match an increment of Xm
204+
/// by 42")
205+
///
206+
/// Unlike MCPlusBuilder::MCInstMatcher, this DSL focuses on the use cases when
207+
/// the precise control over the instruction order is important. For example,
208+
/// let's consider a target-specific function that has to match two particular
209+
/// instructions against this pattern (for two different registers Xm and Xn)
210+
///
211+
/// ADDXrs Xm, Xn, Xm, #0
212+
/// BR Xm
213+
///
214+
/// and return the register holding the branch target. Assuming the instructions
215+
/// are available as MaybeAdd and MaybeBr, the following code can be used:
216+
///
217+
/// // Bring the short names into the local scope:
218+
/// using namespace LowLevelInstMatcherDSL;
219+
/// // Declare the registers to capture:
220+
/// Reg Xn, Xm;
221+
/// // Capture the 0th and 1st operands, match the 2nd operand against the
222+
/// // just captured Xm register, match the 3rd operand against literal 0:
223+
/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
224+
/// return AArch64::NoRegister;
225+
/// // Match the 0th operand against Xm:
226+
/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
227+
/// return AArch64::NoRegister;
228+
/// // Manually check that Xm and Xn did not match the same register:
229+
/// if (Xm.get() == Xn.get())
230+
/// return AArch64::NoRegister;
231+
/// // Return the matched register:
232+
/// return Xm.get();
233+
///
234+
namespace LowLevelInstMatcherDSL {
235+
236+
// The base class to match an operand of type T.
237+
//
238+
// The subclasses of OpMatcher are intended to be allocated on the stack and
239+
// to only be used by passing them to matchInst() and by calling their get()
240+
// function, thus the peculiar `mutable` specifiers: to make the calling code
241+
// compact and readable, the templated matchInst() function has to accept both
242+
// long-lived Imm/Reg wrappers declared as local variables (intended to capture
243+
// the first operand's value and match the subsequent operands, whether inside
244+
// a single instruction or across multiple instructions), as well as temporary
245+
// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
246+
template <typename T> class OpMatcher {
247+
mutable std::optional<T> Value;
248+
mutable std::optional<T> SavedValue;
249+
250+
// Remember/restore the last Value - to be called by matchInst.
251+
void remember() const { SavedValue = Value; }
252+
void restore() const { Value = SavedValue; }
253+
254+
template <class... OpMatchers>
255+
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
256+
257+
protected:
258+
OpMatcher(std::optional<T> ValueToMatch) : Value(ValueToMatch) {}
259+
260+
bool matchValue(T OpValue) const {
261+
// Check that OpValue does not contradict the existing Value.
262+
bool MatchResult = !Value || *Value == OpValue;
263+
// If MatchResult is false, all matchers will be reset before returning from
264+
// matchInst, including this one, thus no need to assign conditionally.
265+
Value = OpValue;
266+
267+
return MatchResult;
268+
}
269+
270+
public:
271+
/// Returns the captured value.
272+
T get() const {
273+
assert(Value.has_value());
274+
return *Value;
275+
}
276+
};
277+
278+
class Reg : public OpMatcher<MCPhysReg> {
279+
bool matches(const MCOperand &Op) const {
280+
if (!Op.isReg())
281+
return false;
282+
283+
return matchValue(Op.getReg());
284+
}
285+
286+
template <class... OpMatchers>
287+
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
288+
289+
public:
290+
Reg(std::optional<MCPhysReg> RegToMatch = std::nullopt)
291+
: OpMatcher<MCPhysReg>(RegToMatch) {}
292+
};
293+
294+
class Imm : public OpMatcher<int64_t> {
295+
bool matches(const MCOperand &Op) const {
296+
if (!Op.isImm())
297+
return false;
298+
299+
return matchValue(Op.getImm());
300+
}
301+
302+
template <class... OpMatchers>
303+
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
304+
305+
public:
306+
Imm(std::optional<int64_t> ImmToMatch = std::nullopt)
307+
: OpMatcher<int64_t>(ImmToMatch) {}
308+
};
309+
310+
/// Tries to match Inst and updates Ops on success.
311+
///
312+
/// If Inst has the specified Opcode and its operand list prefix matches Ops,
313+
/// this function returns true and updates Ops, otherwise false is returned and
314+
/// values of Ops are kept as before matchInst was called.
315+
///
316+
/// Please note that while Ops are technically passed by a const reference to
317+
/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
318+
/// fields are marked mutable.
319+
template <class... OpMatchers>
320+
bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) {
321+
if (Inst.getOpcode() != Opcode)
322+
return false;
323+
assert(sizeof...(Ops) <= MCPlus::getNumPrimeOperands(Inst) &&
324+
"Too many operands are matched for the Opcode");
325+
326+
// Ask each matcher to remember its current value in case of rollback.
327+
(Ops.remember(), ...);
328+
329+
// Check if all matchers match the corresponding operands.
330+
auto It = Inst.begin();
331+
auto AllMatched = (Ops.matches(*(It++)) && ... && true);
332+
333+
// If match failed, restore the original captured values.
334+
if (!AllMatched) {
335+
(Ops.restore(), ...);
336+
return false;
337+
}
338+
339+
return true;
340+
}
341+
342+
} // namespace LowLevelInstMatcherDSL
343+
344+
} // namespace bolt
345+
} // namespace llvm
346+
347+
#endif

0 commit comments

Comments
 (0)