Skip to content

Commit 8b4927a

Browse files
authored
Merge branch 'main' into fixup/alias-analysis-atomic-op
2 parents 4895d8f + 5e4974f commit 8b4927a

File tree

505 files changed

+28188
-5580
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

505 files changed

+28188
-5580
lines changed

.github/CODEOWNERS

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@
9393
/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp @MaheshRavishankar @nicolasvasilache
9494
/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @dcaballe @MaheshRavishankar @nicolasvasilache
9595
/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @MaheshRavishankar @nicolasvasilache
96-
/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @hanhanW @nicolasvasilache
97-
/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @dcaballe @hanhanW @nicolasvasilache
98-
/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @banach-space @dcaballe @hanhanW @nicolasvasilache @Groverkss
96+
/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @nicolasvasilache
97+
/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @dcaballe @nicolasvasilache
98+
/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @banach-space @dcaballe @nicolasvasilache @Groverkss
9999

100100
# MemRef Dialect in MLIR.
101101
/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
@@ -112,16 +112,16 @@
112112
/mlir/include/mlir/Dialect/Vector @banach-space @dcaballe @nicolasvasilache @Groverkss
113113
/mlir/include/mlir/Dialect/Vector/IR @kuhar
114114
/mlir/lib/Dialect/Vector @banach-space @dcaballe @nicolasvasilache @Groverkss
115-
/mlir/lib/Dialect/Vector/Transforms/* @banach-space @dcaballe @hanhanW @nicolasvasilache
115+
/mlir/lib/Dialect/Vector/Transforms/* @banach-space @dcaballe @nicolasvasilache
116116
/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @banach-space @dcaballe @MaheshRavishankar @nicolasvasilache
117-
/mlir/**/*EmulateNarrowType* @dcaballe @hanhanW
117+
/mlir/**/*EmulateNarrowType* @dcaballe
118118

119119
# Presburger library in MLIR
120120
/mlir/**/*Presburger* @Groverkss @Superty
121121

122122
# Tensor Dialect in MLIR.
123-
/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @hanhanW @nicolasvasilache
124-
/mlir/lib/Dialect/Tensor/Transforms/* @hanhanW @nicolasvasilache
123+
/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @nicolasvasilache
124+
/mlir/lib/Dialect/Tensor/Transforms/* @nicolasvasilache
125125

126126
# Transform Dialect in MLIR.
127127
/mlir/include/mlir/Dialect/Transform/* @ftynse @nicolasvasilache @rolfmorel

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,7 @@ class BinaryContext {
11131113
return FragmentClasses.isEquivalent(LHS, RHS);
11141114
}
11151115

1116-
/// Add interprocedural reference for \p Function to \p Address
1116+
/// Add interprocedural branch reference from \p Function to \p Address.
11171117
void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
11181118
InterproceduralReferences.push_back({Function, Address});
11191119
}
@@ -1128,7 +1128,8 @@ class BinaryContext {
11281128
/// argument is false.
11291129
bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
11301130

1131-
/// Resolve inter-procedural dependencies from
1131+
/// Resolve inter-procedural branch dependencies discovered during
1132+
/// disassembly.
11321133
void processInterproceduralReferences();
11331134

11341135
/// Skip functions with all parent and child fragments transitively.

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -538,11 +538,6 @@ class MCPlusBuilder {
538538
llvm_unreachable("not implemented");
539539
}
540540

541-
virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
542-
MCContext *Ctx) {
543-
llvm_unreachable("not implemented");
544-
}
545-
546541
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
547542

548543
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {

bolt/lib/Core/BinaryContext.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,8 +1438,6 @@ void BinaryContext::processInterproceduralReferences() {
14381438
continue;
14391439
}
14401440

1441-
// Check if address falls in function padding space - this could be
1442-
// unmarked data in code. In this case adjust the padding space size.
14431441
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
14441442
assert(Section && "cannot get section for referenced address");
14451443

@@ -1451,7 +1449,7 @@ void BinaryContext::processInterproceduralReferences() {
14511449
if (SectionName == ".plt" || SectionName == ".plt.got")
14521450
continue;
14531451

1454-
// Check if it is aarch64 veneer written at Address
1452+
// Check if it is aarch64 veneer written at Address.
14551453
if (isAArch64() && handleAArch64Veneer(Address))
14561454
continue;
14571455

@@ -1463,6 +1461,8 @@ void BinaryContext::processInterproceduralReferences() {
14631461
exit(1);
14641462
}
14651463

1464+
// Check if the address falls into the function padding space - this could
1465+
// be an unmarked data in code. In this case, adjust the padding space size.
14661466
TargetFunction = getBinaryFunctionContainingAddress(Address,
14671467
/*CheckPastEnd=*/false,
14681468
/*UseMaxSize=*/true);

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,9 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
305305
: IndCallHandlerExitBBFunction->getSymbol(),
306306
IndCallSiteID, &*BC.Ctx);
307307

308-
if (!BC.isAArch64()) {
309-
Iter = BB.eraseInstruction(Iter);
310-
Iter = insertInstructions(CounterInstrs, BB, Iter);
311-
--Iter;
312-
} else
313-
Iter = insertInstructions(CounterInstrs, BB, Iter);
308+
Iter = BB.eraseInstruction(Iter);
309+
Iter = insertInstructions(CounterInstrs, BB, Iter);
310+
--Iter;
314311
}
315312

316313
bool Instrumentation::instrumentOneTarget(

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 55 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
4848

4949
namespace {
5050

51-
[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
51+
static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5252
Inst.setOpcode(AArch64::MRS);
5353
Inst.clear();
5454
Inst.addOperand(MCOperand::createReg(RegName));
5555
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
5656
}
5757

58-
[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
58+
static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5959
Inst.setOpcode(AArch64::MSR);
6060
Inst.clear();
6161
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2114,14 +2114,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
21142114
convertJmpToTailCall(Inst);
21152115
}
21162116

2117-
void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
2118-
MCContext *Ctx) override {
2119-
Inst.setOpcode(AArch64::B);
2120-
Inst.clear();
2121-
Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
2122-
Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
2123-
}
2124-
21252117
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
21262118
const MCSymbol *&TBB, const MCSymbol *&FBB,
21272119
MCInst *&CondBranch,
@@ -2479,14 +2471,21 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
24792471
}
24802472

24812473
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
2474+
InstructionListType Insts(5);
24822475
// Code sequence for instrumented indirect call handler:
2483-
// ret
2484-
2485-
InstructionListType Insts;
2486-
2487-
Insts.emplace_back();
2488-
createReturn(Insts.back());
2489-
2476+
// msr nzcv, x1
2477+
// ldp x0, x1, [sp], #16
2478+
// ldr x16, [sp], #16
2479+
// ldp x0, x1, [sp], #16
2480+
// br x16
2481+
setSystemFlag(Insts[0], AArch64::X1);
2482+
createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
2483+
// Here we load address of the next function which should be called in the
2484+
// original binary to X16 register. Writing to X16 is permitted without
2485+
// needing to restore.
2486+
loadReg(Insts[2], AArch64::X16, AArch64::SP);
2487+
createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
2488+
createIndirectBranch(Insts[4], AArch64::X16, 0);
24902489
return Insts;
24912490
}
24922491

@@ -2562,59 +2561,39 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
25622561
MCSymbol *HandlerFuncAddr,
25632562
int CallSiteID,
25642563
MCContext *Ctx) override {
2564+
InstructionListType Insts;
25652565
// Code sequence used to enter indirect call instrumentation helper:
2566-
// stp x0, x1, [sp, #-16]! createPushRegisters (1)
2567-
// mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
2566+
// stp x0, x1, [sp, #-16]! createPushRegisters
2567+
// mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
25682568
// mov x1 CallSiteID createLoadImmediate ->
25692569
// movk x1, #0x0, lsl #48
25702570
// movk x1, #0x0, lsl #32
25712571
// movk x1, #0x0, lsl #16
25722572
// movk x1, #0x0
2573-
// stp x0, x30, [sp, #-16]! (2)
2573+
// stp x0, x1, [sp, #-16]!
2574+
// bl *HandlerFuncAddr createIndirectCall ->
25742575
// adr x0 *HandlerFuncAddr -> adrp + add
2575-
// blr x0 (__bolt_instr_ind_call_handler_func)
2576-
// ldp x0, x30, [sp], #16 (2)
2577-
// mov x0, target ; move target address to used register
2578-
// ldp x0, x1, [sp], #16 (1)
2579-
2580-
InstructionListType Insts;
2576+
// blr x0
25812577
Insts.emplace_back();
2582-
createPushRegisters(Insts.back(), getIntArgRegister(0),
2583-
getIntArgRegister(1));
2578+
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
25842579
Insts.emplace_back(CallInst);
2585-
convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
2580+
convertIndirectCallToLoad(Insts.back(), AArch64::X0);
25862581
InstructionListType LoadImm =
25872582
createLoadImmediate(getIntArgRegister(1), CallSiteID);
25882583
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
25892584
Insts.emplace_back();
2590-
createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
2585+
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
25912586
Insts.resize(Insts.size() + 2);
2592-
InstructionListType Addr = materializeAddress(
2593-
HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
2587+
InstructionListType Addr =
2588+
materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
25942589
assert(Addr.size() == 2 && "Invalid Addr size");
25952590
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2596-
25972591
Insts.emplace_back();
2598-
createIndirectCallInst(Insts.back(), false,
2599-
CallInst.getOperand(0).getReg());
2592+
createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
26002593

2601-
Insts.emplace_back();
2602-
createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
2603-
2604-
// move x0 to indirect call register
2605-
Insts.emplace_back();
2606-
Insts.back().setOpcode(AArch64::ORRXrs);
2607-
Insts.back().insert(Insts.back().begin(),
2608-
MCOperand::createReg(CallInst.getOperand(0).getReg()));
2609-
Insts.back().insert(Insts.back().begin() + 1,
2610-
MCOperand::createReg(AArch64::XZR));
2611-
Insts.back().insert(Insts.back().begin() + 2,
2612-
MCOperand::createReg(getIntArgRegister(0)));
2613-
Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
2614-
2615-
Insts.emplace_back();
2616-
createPopRegisters(Insts.back(), getIntArgRegister(0),
2617-
getIntArgRegister(1));
2594+
// Carry over metadata including tail call marker if present.
2595+
stripAnnotations(Insts.back());
2596+
moveAnnotations(std::move(CallInst), Insts.back());
26182597

26192598
return Insts;
26202599
}
@@ -2623,53 +2602,43 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
26232602
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
26242603
const MCSymbol *IndCallHandler,
26252604
MCContext *Ctx) override {
2626-
// Code sequence used to check whether InstrTrampoline was initialized
2605+
// Code sequence used to check whether InstrTampoline was initialized
26272606
// and call it if so, returns via IndCallHandler
2628-
// adrp x0, InstrTrampoline
2629-
// ldr x0, [x0, #lo12:InstrTrampoline]
2607+
// stp x0, x1, [sp, #-16]!
2608+
// mrs x1, nzcv
2609+
// adr x0, InstrTrampoline -> adrp + add
2610+
// ldr x0, [x0]
26302611
// subs x0, x0, #0x0
26312612
// b.eq IndCallHandler
26322613
// str x30, [sp, #-16]!
26332614
// blr x0
26342615
// ldr x30, [sp], #16
26352616
// b IndCallHandler
26362617
InstructionListType Insts;
2637-
2638-
// load handler address
2639-
MCInst InstAdrp;
2640-
InstAdrp.setOpcode(AArch64::ADRP);
2641-
InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2642-
InstAdrp.addOperand(MCOperand::createImm(0));
2643-
setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
2644-
/* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
2645-
Insts.emplace_back(InstAdrp);
2646-
2647-
MCInst InstLoad;
2648-
InstLoad.setOpcode(AArch64::LDRXui);
2649-
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2650-
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2651-
InstLoad.addOperand(MCOperand::createImm(0));
2652-
setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
2653-
/* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
2654-
Insts.emplace_back(InstLoad);
2655-
2656-
InstructionListType CmpJmp =
2657-
createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
2658-
Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
2659-
26602618
Insts.emplace_back();
2661-
storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2662-
2619+
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2620+
Insts.emplace_back();
2621+
getSystemFlag(Insts.back(), getIntArgRegister(1));
2622+
Insts.emplace_back();
2623+
Insts.emplace_back();
2624+
InstructionListType Addr =
2625+
materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
2626+
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2627+
assert(Addr.size() == 2 && "Invalid Addr size");
2628+
Insts.emplace_back();
2629+
loadReg(Insts.back(), AArch64::X0, AArch64::X0);
2630+
InstructionListType cmpJmp =
2631+
createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
2632+
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
2633+
Insts.emplace_back();
2634+
storeReg(Insts.back(), AArch64::LR, AArch64::SP);
26632635
Insts.emplace_back();
26642636
Insts.back().setOpcode(AArch64::BLR);
2665-
Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
2666-
2637+
Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
26672638
Insts.emplace_back();
2668-
loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2669-
2639+
loadReg(Insts.back(), AArch64::LR, AArch64::SP);
26702640
Insts.emplace_back();
2671-
createDirectBranch(Insts.back(), IndCallHandler, Ctx);
2672-
2641+
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
26732642
return Insts;
26742643
}
26752644

bolt/runtime/instr.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,12 +1691,9 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
16911691
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
16921692
{
16931693
#if defined(__aarch64__)
1694-
// the target address is placed on stack
1695-
// the identifier of the indirect call site is placed in X1 register
1696-
16971694
// clang-format off
16981695
__asm__ __volatile__(SAVE_ALL
1699-
"ldr x0, [sp, #272]\n"
1696+
"ldp x0, x1, [sp, #288]\n"
17001697
"bl instrumentIndirectCall\n"
17011698
RESTORE_ALL
17021699
"ret\n"
@@ -1731,12 +1728,9 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
17311728
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
17321729
{
17331730
#if defined(__aarch64__)
1734-
// the target address is placed on stack
1735-
// the identifier of the indirect call site is placed in X1 register
1736-
17371731
// clang-format off
17381732
__asm__ __volatile__(SAVE_ALL
1739-
"ldr x0, [sp, #272]\n"
1733+
"ldp x0, x1, [sp, #288]\n"
17401734
"bl instrumentIndirectCall\n"
17411735
RESTORE_ALL
17421736
"ret\n"

bolt/runtime/sys_aarch64.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@
1818
"stp x24, x25, [sp, #-16]!\n" \
1919
"stp x26, x27, [sp, #-16]!\n" \
2020
"stp x28, x29, [sp, #-16]!\n" \
21-
"mrs x29, nzcv\n" \
22-
"stp x29, x30, [sp, #-16]!\n"
21+
"str x30, [sp,#-16]!\n"
2322
// Mirrors SAVE_ALL
2423
#define RESTORE_ALL \
25-
"ldp x29, x30, [sp], #16\n" \
26-
"msr nzcv, x29\n" \
24+
"ldr x30, [sp], #16\n" \
2725
"ldp x28, x29, [sp], #16\n" \
2826
"ldp x26, x27, [sp], #16\n" \
2927
"ldp x24, x25, [sp], #16\n" \

0 commit comments

Comments
 (0)