Skip to content

Commit ea967e2

Browse files
committed
feat(CodeGen): Add PrefetchInsertion pass
1 parent fb647e6 commit ea967e2

File tree

8 files changed

+226
-4
lines changed

8 files changed

+226
-4
lines changed

llvm/include/llvm/CodeGen/MachineBasicBlock.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,7 @@ class MachineBasicBlock
12991299

13001300
MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
13011301

1302-
const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
1302+
const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
13031303
return CallInstSymbols;
13041304
}
13051305

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,9 @@ class MachineInstr
123123
NoUSWrap = 1 << 20, // Instruction supports geps
124124
// no unsigned signed wrap.
125125
SameSign = 1 << 21, // Both operands have the same sign.
126-
InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
126+
InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
127127
// Implies NoUSWrap.
128+
Prefetch = 1 << 23, // Instruction is a prefetch.
128129
};
129130

130131
private:

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "WasmException.h"
1919
#include "WinCFGuard.h"
2020
#include "WinException.h"
21-
#include "llvm/Support/SMLoc.h"
2221
#include "llvm/ADT/APFloat.h"
2322
#include "llvm/ADT/APInt.h"
2423
#include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
120119
#include "llvm/Support/Format.h"
121120
#include "llvm/Support/MathExtras.h"
122121
#include "llvm/Support/Path.h"
122+
#include "llvm/Support/SMLoc.h"
123123
#include "llvm/Support/VCSRevision.h"
124124
#include "llvm/Support/VirtualFileSystem.h"
125125
#include "llvm/Support/raw_ostream.h"

llvm/lib/CodeGen/BasicBlockSections.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ class BasicBlockSections : public MachineFunctionPass {
106106
public:
107107
static char ID;
108108

109-
BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
109+
// BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
110+
// nullptr;
110111

111112
BasicBlockSections() : MachineFunctionPass(ID) {
112113
initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/SmallPtrSet.h"
2323
#include "llvm/ADT/SmallVector.h"
2424
#include "llvm/ADT/Statistic.h"
25+
#include "llvm/ADT/StringExtras.h"
2526
#include "llvm/Analysis/BlockFrequencyInfo.h"
2627
#include "llvm/Analysis/BranchProbabilityInfo.h"
2728
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
//===-- PrefetchInsertion.cpp ---=========-----------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// Prefetch insertion pass implementation.
11+
//===----------------------------------------------------------------------===//
12+
/// Prefetch insertion pass.
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "MCTargetDesc/X86MCTargetDesc.h"
16+
#include "X86.h"
17+
#include "X86InstrBuilder.h"
18+
#include "X86InstrInfo.h"
19+
#include "X86MachineFunctionInfo.h"
20+
#include "X86Subtarget.h"
21+
#include "llvm/ADT/SmallVector.h"
22+
#include "llvm/ADT/StringExtras.h"
23+
#include "llvm/ADT/StringRef.h"
24+
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
25+
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
26+
#include "llvm/CodeGen/MachineBasicBlock.h"
27+
#include "llvm/CodeGen/MachineFunction.h"
28+
#include "llvm/CodeGen/MachineFunctionPass.h"
29+
#include "llvm/CodeGen/MachineOperand.h"
30+
#include "llvm/CodeGen/Passes.h"
31+
#include "llvm/CodeGen/TargetInstrInfo.h"
32+
#include "llvm/InitializePasses.h"
33+
#include "llvm/MC/MCContext.h"
34+
#include "llvm/Target/TargetMachine.h"
35+
#include <map>
36+
37+
using namespace llvm;
38+
#define DEBUG_TYPE "prefetchinsertion"
39+
40+
static cl::opt<bool> UseCodePrefetchInstruction(
41+
"use-code-prefetch-instruction",
42+
cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true),
43+
cl::Hidden);
44+
static cl::opt<bool> PrefetchNextAddress(
45+
"prefetch-next-address",
46+
cl::desc(
47+
"Whether to prefetch the next address instead of the target address."),
48+
cl::init(false), cl::Hidden);
49+
50+
namespace {} // end anonymous namespace
51+
52+
namespace llvm {
53+
class PrefetchInsertion : public MachineFunctionPass {
54+
public:
55+
static char ID;
56+
57+
BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
58+
59+
PrefetchInsertion() : MachineFunctionPass(ID) {
60+
initializePrefetchInsertionPass(*PassRegistry::getPassRegistry());
61+
}
62+
63+
StringRef getPassName() const override { return "Prefetch Insertion Pass"; }
64+
65+
void getAnalysisUsage(AnalysisUsage &AU) const override;
66+
67+
/// Identify basic blocks that need separate sections and prepare to emit them
68+
/// accordingly.
69+
bool runOnMachineFunction(MachineFunction &MF) override;
70+
};
71+
72+
} // namespace llvm
73+
74+
char PrefetchInsertion::ID = 0;
75+
INITIALIZE_PASS_BEGIN(
76+
PrefetchInsertion, "prefetch-insertion",
77+
"Applies path clonings for the -basic-block-sections=list option", false,
78+
false)
79+
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
80+
INITIALIZE_PASS_END(
81+
PrefetchInsertion, "prefetch-insertion",
82+
"Applies path clonings for the -basic-block-sections=list option", false,
83+
false)
84+
85+
bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) {
86+
assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
87+
"BB Sections list not enabled!");
88+
if (hasInstrProfHashMismatch(MF))
89+
return false;
90+
// errs() << "Running on " << MF.getName() << "\n";
91+
Function &F = MF.getFunction();
92+
auto PtrTy = PointerType::getUnqual(F.getParent()->getContext());
93+
DenseSet<BBPosition> PrefetchTargets =
94+
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
95+
.getPrefetchTargetsForFunction(MF.getName());
96+
// errs() << "Targets: Function: " << F.getName() << " "
97+
// << PrefetchTargets.size() << "\n";
98+
DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
99+
for (const auto &P : PrefetchTargets)
100+
PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset);
101+
for (auto &[BBID, V] : PrefetchTargetsByBBID)
102+
llvm::sort(V);
103+
for (auto &BB : MF)
104+
BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]);
105+
106+
for (const BBPosition &P : PrefetchTargets) {
107+
SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
108+
PrefetchTargetName += F.getName();
109+
PrefetchTargetName += "_";
110+
PrefetchTargetName += utostr(P.BBID.BaseID);
111+
PrefetchTargetName += "_";
112+
PrefetchTargetName += utostr(P.BBOffset);
113+
F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
114+
}
115+
116+
SmallVector<PrefetchHint> PrefetchHints =
117+
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
118+
.getPrefetchHintsForFunction(MF.getName());
119+
// errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size()
120+
// << "\n";
121+
for (const PrefetchHint &H : PrefetchHints) {
122+
SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
123+
PrefetchTargetName += H.TargetFunctionName;
124+
PrefetchTargetName += "_";
125+
PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID);
126+
PrefetchTargetName += "_";
127+
PrefetchTargetName += utostr(H.TargetPosition.BBOffset);
128+
F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
129+
}
130+
131+
DenseMap<UniqueBBID, std::map<unsigned, SmallVector<PrefetchTarget>>>
132+
PrefetchHintsByBBID;
133+
for (const auto &H : PrefetchHints) {
134+
PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back(
135+
PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID,
136+
H.TargetPosition.BBOffset});
137+
}
138+
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
139+
for (auto &BB : MF) {
140+
auto It = PrefetchHintsByBBID.find(*BB.getBBID());
141+
if (It == PrefetchHintsByBBID.end())
142+
continue;
143+
auto BBPrefetchHintIt = It->second.begin();
144+
unsigned NumInsts = 0;
145+
auto E = BB.getFirstTerminator();
146+
unsigned NumCallsites = 0;
147+
for (auto I = BB.instr_begin();;) {
148+
auto Current = I;
149+
if (NumCallsites >= BBPrefetchHintIt->first || Current == E) {
150+
for (const auto &PrefetchTarget : BBPrefetchHintIt->second) {
151+
SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
152+
PrefetchTargetName += PrefetchTarget.TargetFunction;
153+
PrefetchTargetName += "_";
154+
PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID);
155+
PrefetchTargetName += "_";
156+
PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset);
157+
auto *GV =
158+
MF.getFunction().getParent()->getNamedValue(PrefetchTargetName);
159+
// errs() << "Inserting prefetch for " << GV->getName() << " at "
160+
// << MF.getName() << " " << BB.getName() << " " << NumInsts
161+
// << "\n";
162+
MachineInstr *PFetch = MF.CreateMachineInstr(
163+
UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1)
164+
: TII->get(X86::PREFETCHT1),
165+
Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(),
166+
true);
167+
PFetch->setFlag(MachineInstr::Prefetch);
168+
MachineInstrBuilder MIB(MF, PFetch);
169+
if (!PrefetchNextAddress) {
170+
MIB.addMemOperand(MF.getMachineMemOperand(
171+
MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
172+
/*base_alignment=*/llvm::Align(1)));
173+
}
174+
MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
175+
if (PrefetchNextAddress)
176+
MIB.addImm(0);
177+
else
178+
MIB.addGlobalAddress(GV);
179+
MIB.addReg(X86::NoRegister);
180+
BB.insert(Current, PFetch);
181+
}
182+
++BBPrefetchHintIt;
183+
if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end())
184+
break;
185+
}
186+
if (Current != E) {
187+
// Print the assembly for the instruction.
188+
if (!Current->isPosition() && !Current->isImplicitDef() &&
189+
!Current->isKill() && !Current->isDebugInstr()) {
190+
++NumInsts;
191+
}
192+
if (Current->isCall())
193+
++NumCallsites;
194+
++I;
195+
}
196+
}
197+
}
198+
return true;
199+
}
200+
201+
void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const {
202+
AU.setPreservesAll();
203+
AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
204+
MachineFunctionPass::getAnalysisUsage(AU);
205+
}
206+
207+
FunctionPass *llvm::createPrefetchInsertionPass() {
208+
return new PrefetchInsertion();
209+
}

llvm/lib/Target/X86/X86.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,13 @@ FunctionPass *createX86IndirectThunksPass();
166166
/// This pass replaces ret instructions with jmp's to __x86_return thunk.
167167
FunctionPass *createX86ReturnThunksPass();
168168

169+
/// This pass ensures instructions featuring a memory operand
170+
/// have distinctive <LineNumber, Discriminator> (with respect to each other)
171+
FunctionPass *createX86DiscriminateMemOpsPass();
172+
173+
/// This pass applies profiling information to insert cache prefetches.
174+
FunctionPass *createX86InsertPrefetchPass();
175+
169176
/// This pass insert wait instruction after X87 instructions which could raise
170177
/// fp exceptions when strict-fp enabled.
171178
FunctionPass *createX86InsertX87waitPass();

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,9 @@ void X86PassConfig::addPreEmitPass2() {
624624
// after all real instructions have been added to the epilog.
625625
if (TT.isOSWindows() && TT.isX86_64())
626626
addPass(createX86WinEHUnwindV2Pass());
627+
628+
if (TM->getBBSectionsType() == llvm::BasicBlockSection::List)
629+
addPass(createPrefetchInsertionPass());
627630
}
628631

629632
bool X86PassConfig::addPostFastRegAllocRewrite() {

0 commit comments

Comments
 (0)