Skip to content

Commit bc6c8ef

Browse files
yavtukyavtuk
authored andcommitted
[bolt] simplify constant loads for X86 & AArch64
This patch fixed the issue related to load literal for AArch64 (bolt/test/AArch64/materialize-constant.s), address range for literal is limited +/- 1MB, emitCI puts the constants by the end of function and the one is out of available range. SimplifyRODataLoads is enabled by default for X86 & AArch64 Signed-off-by: Moksyakov Alexey <[email protected]>
1 parent 85cc465 commit bc6c8ef

File tree

6 files changed

+142
-41
lines changed

6 files changed

+142
-41
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,13 @@ class MCPlusBuilder {
18721872
return {};
18731873
}
18741874

1875+
virtual InstructionListType materializeConstant(const MCInst &Inst,
1876+
StringRef ConstantData,
1877+
uint64_t Offset) const {
1878+
llvm_unreachable("not implemented");
1879+
return {};
1880+
}
1881+
18751882
/// Creates a new unconditional branch instruction in Inst and set its operand
18761883
/// to TBB.
18771884
virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,8 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
11871187
uint64_t NumDynamicLocalLoadsFound = 0;
11881188

11891189
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1190-
for (MCInst &Inst : *BB) {
1190+
for (auto It = BB->begin(); It != BB->end(); ++It) {
1191+
const MCInst &Inst = *It;
11911192
unsigned Opcode = Inst.getOpcode();
11921193
const MCInstrDesc &Desc = BC.MII->get(Opcode);
11931194

@@ -1200,7 +1201,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12001201

12011202
if (MIB->hasPCRelOperand(Inst)) {
12021203
// Try to find the symbol that corresponds to the PC-relative operand.
1203-
MCOperand *DispOpI = MIB->getMemOperandDisp(Inst);
1204+
MCOperand *DispOpI = MIB->getMemOperandDisp(const_cast<MCInst &>(Inst));
12041205
assert(DispOpI != Inst.end() && "expected PC-relative displacement");
12051206
assert(DispOpI->isExpr() &&
12061207
"found PC-relative with non-symbolic displacement");
@@ -1226,28 +1227,49 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
12261227
}
12271228

12281229
// Get the contents of the section containing the target address of the
1229-
// memory operand. We are only interested in read-only sections.
1230+
// memory operand. We are only interested in read-only sections for X86,
1231+
// for aarch64 the sections can be read-only or executable.
12301232
ErrorOr<BinarySection &> DataSection =
12311233
BC.getSectionForAddress(TargetAddress);
12321234
if (!DataSection || DataSection->isWritable())
12331235
continue;
12341236

1237+
if (DataSection->isText()) {
1238+
// If data is not part of a function, check if it is part of a global CI
1239+
// Do not proceed if there aren't data markers for CIs
1240+
BinaryFunction *BFTgt =
1241+
BC.getBinaryFunctionContainingAddress(TargetAddress,
1242+
/*CheckPastEnd*/ false,
1243+
/*UseMaxSize*/ true);
1244+
const bool IsInsideFunc =
1245+
BFTgt && BFTgt->isInConstantIsland(TargetAddress);
1246+
1247+
auto CIEndIter = BC.AddressToConstantIslandMap.end();
1248+
auto CIIter = BC.AddressToConstantIslandMap.find(TargetAddress);
1249+
if (!IsInsideFunc && CIIter == CIEndIter)
1250+
continue;
1251+
}
1252+
12351253
if (BC.getRelocationAt(TargetAddress) ||
12361254
BC.getDynamicRelocationAt(TargetAddress))
12371255
continue;
12381256

1239-
uint32_t Offset = TargetAddress - DataSection->getAddress();
1240-
StringRef ConstantData = DataSection->getContents();
1241-
12421257
++NumLocalLoadsFound;
12431258
if (BB->hasProfile())
12441259
NumDynamicLocalLoadsFound += BB->getExecutionCount();
12451260

1246-
if (MIB->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
1247-
++NumLocalLoadsSimplified;
1248-
if (BB->hasProfile())
1249-
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
1250-
}
1261+
uint32_t Offset = TargetAddress - DataSection->getAddress();
1262+
StringRef ConstantData = DataSection->getContents();
1263+
const InstructionListType Instrs =
1264+
MIB->materializeConstant(Inst, ConstantData, Offset);
1265+
if (Instrs.empty())
1266+
continue;
1267+
1268+
It = std::next(BB->replaceInstruction(It, Instrs), Instrs.size() - 1);
1269+
1270+
++NumLocalLoadsSimplified;
1271+
if (BB->hasProfile())
1272+
NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
12511273
}
12521274
}
12531275

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static cl::opt<bool> SimplifyRODataLoads(
236236
"simplify-rodata-loads",
237237
cl::desc("simplify loads from read-only sections by replacing the memory "
238238
"operand with the constant found in the corresponding section"),
239-
cl::cat(BoltOptCategory));
239+
cl::init(true), cl::cat(BoltOptCategory));
240240

241241
static cl::list<std::string>
242242
SpecializeMemcpy1("memcpy1-spec",
@@ -432,9 +432,11 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
432432
std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
433433
opts::JTFootprintReductionFlag);
434434

435-
Manager.registerPass(
436-
std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
437-
opts::SimplifyRODataLoads);
435+
if (!BC.isRISCV()) {
436+
Manager.registerPass(
437+
std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
438+
opts::SimplifyRODataLoads);
439+
}
438440

439441
Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
440442
opts::RegReAssign);

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2770,6 +2770,54 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
27702770
return Insts;
27712771
}
27722772

2773+
InstructionListType materializeConstant(const MCInst &Inst,
2774+
StringRef ConstantData,
2775+
uint64_t Offset) const override {
2776+
struct InstInfo {
2777+
// Size in bytes that Inst loads from memory.
2778+
uint8_t DataSize;
2779+
// Number of instructions needed to materialize the constant.
2780+
uint8_t numInstrs;
2781+
// Opcode to use for materializing the constant.
2782+
unsigned Opcode;
2783+
};
2784+
2785+
InstInfo I;
2786+
InstructionListType Insts(0);
2787+
switch (Inst.getOpcode()) {
2788+
case AArch64::LDRWl:
2789+
I = {4, 2, AArch64::MOVKWi};
2790+
break;
2791+
case AArch64::LDRXl:
2792+
I = {8, 4, AArch64::MOVKXi};
2793+
break;
2794+
default:
2795+
return Insts;
2796+
}
2797+
2798+
if (ConstantData.size() - Offset < I.DataSize)
2799+
return Insts;
2800+
2801+
const uint64_t ImmVal =
2802+
DataExtractor(ConstantData, BC.AsmInfo->isLittleEndian(),
2803+
BC.AsmInfo->getCodePointerSize())
2804+
.getUnsigned(&Offset, I.DataSize);
2805+
2806+
Insts.resize(I.numInstrs);
2807+
unsigned shift = (Insts.size() - 1) * 16;
2808+
MCPhysReg Reg = Inst.getOperand(0).getReg();
2809+
for (unsigned i = 0; i < Insts.size(); i++, shift -= 16) {
2810+
Insts[i].setOpcode(I.Opcode);
2811+
Insts[i].clear();
2812+
Insts[i].addOperand(MCOperand::createReg(Reg));
2813+
Insts[i].addOperand(MCOperand::createReg(Reg));
2814+
Insts[i].addOperand(MCOperand::createImm((ImmVal >> shift) & 0xFFFF));
2815+
Insts[i].addOperand(MCOperand::createImm(shift));
2816+
}
2817+
2818+
return Insts;
2819+
}
2820+
27732821
std::optional<Relocation>
27742822
createRelocation(const MCFixup &Fixup,
27752823
const MCAsmBackend &MAB) const override {

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,24 @@ class X86MCPlusBuilder : public MCPlusBuilder {
14771477
return true;
14781478
}
14791479

1480+
InstructionListType materializeConstant(const MCInst &Inst,
1481+
StringRef ConstantData,
1482+
uint64_t Offset) const override {
1483+
InstructionListType Instrs;
1484+
MCInst InstCopy = Inst;
1485+
1486+
if (!replaceMemOperandWithImm(InstCopy, ConstantData, Offset))
1487+
return Instrs;
1488+
1489+
Instrs.emplace_back();
1490+
Instrs.back().setOpcode(InstCopy.getOpcode());
1491+
Instrs.back().clear();
1492+
for (unsigned i = 0; i < InstCopy.getNumOperands(); ++i)
1493+
Instrs.back().addOperand(InstCopy.getOperand(i));
1494+
1495+
return Instrs;
1496+
}
1497+
14801498
/// TODO: this implementation currently works for the most common opcodes that
14811499
/// load from memory. It can be extended to work with memory store opcodes as
14821500
/// well as more memory load opcodes.

bolt/test/AArch64/materialize-constant.s

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
11
// this test checks a load literal instructions changed to movk
22

3-
// REQUIRES: system-linux
3+
# REQUIRES: system-linux
44

5-
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
6+
# RUN: --defsym CIBIGFUNC=1 %s -o %t.o
7+
# RUN: %clang %cflags %t.o -Wl,-q -o %t.exe
8+
# RUN: llvm-bolt %t.exe -o %t.bolt --lite=0 \
9+
# RUN: --keep-nops --eliminate-unreachable=false \
10+
# RUN: | FileCheck %s --check-prefix=CHECK-LOGS
611

7-
# RUN: link_fdata %s %t.o %t.fdata
8-
# RUN: %clang %cflags -pie %t.o -o %t.exe -Wl,-q -Wl,-z,relro -Wl,-z,now
9-
# RUN: llvm-bolt %t.exe -o %t.bolt -data %t.fdata \
10-
# RUN: --keep-nops --eliminate-unreachable=false
11-
# RUN: llvm-objdump --disassemble-symbols=foo %t.bolt | FileCheck %s
12+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
13+
# RUN: --defsym CIOUTSIDEFUNC=1 %s -o %t.o
14+
# RUN: %clang %cflags %t.o -Wl,-q -o %t.exe
15+
# RUN: llvm-bolt %t.exe -o %t.bolt --lite=0 \
16+
# RUN: --keep-nops --eliminate-unreachable=false \
17+
# RUN: | FileCheck %s --check-prefix=CHECK-LOGS
1218

13-
# CHECK: mov{{.*}} w19, #0
14-
# CHECK-NEXT: mov{{.*}} w22, #0
15-
# CHECK-NEXT: movk{{.*}} w23, #0, lsl #16
16-
# CHECK-NEXT: movk{{.*}} w23, #100
17-
# CHECK-NEXT: movk{{.*}} w24, #0, lsl #16
18-
# CHECK-NEXT: movk{{.*}} w24, #3
19+
# CHECK-LOGS: simplified 2 out of 2 loads
1920

2021
.text
2122
.align 4
2223
.local foo
2324
.type foo, %function
2425
foo:
25-
# FDATA: 1 main 0 1 foo 0 0 10
2626
stp x29, x30, [sp, #-32]!
2727
stp x19, x20, [sp, #16]
2828
mov x29, sp
@@ -31,44 +31,48 @@ foo:
3131
mov w22, #0 // result = 0
3232

3333
ldr w23, .Llimit
34-
ldr w24, .LStep
35-
b .LStub
34+
ldr x24, .LStep
3635

36+
.ifdef CIBIGFUNC
37+
b .LStub
3738
.LConstants:
3839
.Llimit: .word 100
39-
.LStep: .word 3
40-
40+
.LStep: .xword 3
4141
.LStub:
4242
.rep 0x100000
4343
nop
4444
.endr
4545
b .Lmain_loop
46+
.endif
4647

4748
.Lmain_loop:
4849
madd w22, w19, w24, w22 // result += counter * increment
49-
5050
add w19, w19, #1
5151
cmp w19, w23
5252
b.lt .Lmain_loop
53-
5453
mov w0, w22
55-
5654
b .Lreturn_point
57-
5855
.Lreturn_point:
5956
ldp x19, x20, [sp, #16]
6057
ldp x29, x30, [sp], #32
6158
ret
6259
.size foo, .-foo
6360

61+
.ifdef CIOUTSIDEFUNC
62+
.LConstants:
63+
.Llimit: .word 100
64+
.LStep: .xword 3
65+
.endif
66+
6467

6568
.global main
6669
.type main, %function
6770
main:
6871
mov x0, #0
6972
bl foo
70-
mov x0, 0
71-
mov w8, #93
72-
svc #0
73+
mov x0, 0
74+
mov w8, #93
75+
svc #0
76+
77+
.size main, .-main
7378

74-
.size main, .-main

0 commit comments

Comments
 (0)