Skip to content

Commit b0b3fdb

Browse files
Merge branch 'main' into ThreadSet
2 parents 18e880a + b256d0a commit b0b3fdb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1007
-972
lines changed

llvm/docs/ReleaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ Changes to the Python bindings
146146
Changes to the C API
147147
--------------------
148148

149+
* Add `LLVMGetOrInsertFunction` to get or insert a function, replacing the combination of `LLVMGetNamedFunction` and `LLVMAddFunction`.
150+
149151
Changes to the CodeGen infrastructure
150152
-------------------------------------
151153

llvm/include/llvm-c/Core.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,22 @@ LLVM_C_ABI unsigned LLVMGetDebugLocColumn(LLVMValueRef Val);
12061206
LLVM_C_ABI LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
12071207
LLVMTypeRef FunctionTy);
12081208

1209+
/**
1210+
* Obtain or insert a function into a module.
1211+
*
1212+
* If a function with the specified name already exists in the module, it
1213+
* is returned. Otherwise, a new function is created in the module with the
1214+
* specified name and type and is returned.
1215+
*
1216+
* The returned value corresponds to a llvm::Function instance.
1217+
*
1218+
* @see llvm::Module::getOrInsertFunction()
1219+
*/
1220+
LLVM_C_ABI LLVMValueRef LLVMGetOrInsertFunction(LLVMModuleRef M,
1221+
const char *Name,
1222+
size_t NameLen,
1223+
LLVMTypeRef FunctionTy);
1224+
12091225
/**
12101226
* Obtain a Function value from a Module by its name.
12111227
*

llvm/lib/IR/Core.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2403,6 +2403,14 @@ LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
24032403
GlobalValue::ExternalLinkage, Name, unwrap(M)));
24042404
}
24052405

2406+
LLVMValueRef LLVMGetOrInsertFunction(LLVMModuleRef M, const char *Name,
2407+
size_t NameLen, LLVMTypeRef FunctionTy) {
2408+
return wrap(unwrap(M)
2409+
->getOrInsertFunction(StringRef(Name, NameLen),
2410+
unwrap<FunctionType>(FunctionTy))
2411+
.getCallee());
2412+
}
2413+
24062414
LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
24072415
return wrap(unwrap(M)->getFunction(Name));
24082416
}

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ static cl::opt<double> MaxClonedRate(
129129
cl::Hidden, cl::init(7.5));
130130

131131
namespace {
132-
133132
class SelectInstToUnfold {
134133
SelectInst *SI;
135134
PHINode *SIUse;
@@ -143,10 +142,6 @@ class SelectInstToUnfold {
143142
explicit operator bool() const { return SI && SIUse; }
144143
};
145144

146-
void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
147-
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
148-
std::vector<BasicBlock *> *NewBBs);
149-
150145
class DFAJumpThreading {
151146
public:
152147
DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
@@ -176,16 +171,18 @@ class DFAJumpThreading {
176171
}
177172
}
178173

174+
static void unfold(DomTreeUpdater *DTU, LoopInfo *LI,
175+
SelectInstToUnfold SIToUnfold,
176+
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
177+
std::vector<BasicBlock *> *NewBBs);
178+
179179
AssumptionCache *AC;
180180
DominatorTree *DT;
181181
LoopInfo *LI;
182182
TargetTransformInfo *TTI;
183183
OptimizationRemarkEmitter *ORE;
184184
};
185-
186-
} // end anonymous namespace
187-
188-
namespace {
185+
} // namespace
189186

190187
/// Unfold the select instruction held in \p SIToUnfold by replacing it with
191188
/// control flow.
@@ -194,9 +191,10 @@ namespace {
194191
/// created basic blocks into \p NewBBs.
195192
///
196193
/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
197-
void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
198-
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
199-
std::vector<BasicBlock *> *NewBBs) {
194+
void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI,
195+
SelectInstToUnfold SIToUnfold,
196+
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
197+
std::vector<BasicBlock *> *NewBBs) {
200198
SelectInst *SI = SIToUnfold.getInst();
201199
PHINode *SIUse = SIToUnfold.getUse();
202200
assert(SI->hasOneUse());
@@ -351,10 +349,12 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
351349
SI->eraseFromParent();
352350
}
353351

352+
namespace {
354353
struct ClonedBlock {
355354
BasicBlock *BB;
356355
APInt State; ///< \p State corresponds to the next value of a switch stmnt.
357356
};
357+
} // namespace
358358

359359
typedef std::deque<BasicBlock *> PathType;
360360
typedef std::vector<PathType> PathsType;
@@ -384,6 +384,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
384384
return OS;
385385
}
386386

387+
namespace {
387388
/// ThreadingPath is a path in the control flow of a loop that can be threaded
388389
/// by cloning necessary basic blocks and replacing conditional branches with
389390
/// unconditional ones. A threading path includes a list of basic blocks, the
@@ -1366,6 +1367,7 @@ struct TransformDFA {
13661367
SmallPtrSet<const Value *, 32> EphValues;
13671368
std::vector<ThreadingPath> TPaths;
13681369
};
1370+
} // namespace
13691371

13701372
bool DFAJumpThreading::run(Function &F) {
13711373
LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
@@ -1444,8 +1446,6 @@ bool DFAJumpThreading::run(Function &F) {
14441446
return MadeChanges;
14451447
}
14461448

1447-
} // end anonymous namespace
1448-
14491449
/// Integrate with the new Pass Manager
14501450
PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
14511451
FunctionAnalysisManager &AM) {

llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

Lines changed: 59 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -201,18 +201,27 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
201201
/// unroll count is non-zero.
202202
///
203203
/// This function performs the following:
204-
/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
205-
/// - Create PHI nodes at the unrolling loop exit to combine
206-
/// values that exit the unrolling loop code and jump around it.
204+
/// - Update PHI nodes at the epilog loop exit
205+
/// - Create PHI nodes at the unrolling loop exit and epilog preheader to
206+
/// combine values that exit the unrolling loop code and jump around it.
207207
/// - Update PHI operands in the epilog loop by the new PHI nodes
208-
/// - Branch around the epilog loop if extra iters (ModVal) is zero.
208+
/// - At the unrolling loop exit, branch around the epilog loop if extra iters
209+
// (ModVal) is zero.
210+
/// - At the epilog preheader, add an llvm.assume call that extra iters is
211+
/// non-zero. If the unrolling loop exit is the predecessor, the above new
212+
/// branch guarantees that assumption. If the unrolling loop preheader is the
213+
/// predecessor, then the required first iteration from the original loop has
214+
/// yet to be executed, so it must be executed in the epilog loop. If we
215+
/// later unroll the epilog loop, that llvm.assume call somehow enables
216+
/// ScalarEvolution to compute a epilog loop maximum trip count, which enables
217+
/// eliminating the branch at the end of the final unrolled epilog iteration.
209218
///
210219
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
211220
BasicBlock *Exit, BasicBlock *PreHeader,
212221
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
213222
ValueToValueMapTy &VMap, DominatorTree *DT,
214223
LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
215-
unsigned Count) {
224+
unsigned Count, AssumptionCache &AC) {
216225
BasicBlock *Latch = L->getLoopLatch();
217226
assert(Latch && "Loop must have a latch");
218227
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -231,7 +240,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
231240
// EpilogLatch
232241
// Exit (EpilogPN)
233242

234-
// Update PHI nodes at NewExit and Exit.
243+
// Update PHI nodes at Exit.
235244
for (PHINode &PN : NewExit->phis()) {
236245
// PN should be used in another PHI located in Exit block as
237246
// Exit was split by SplitBlockPredecessors into Exit and NewExit
@@ -246,15 +255,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
246255
// epilogue edges have already been added.
247256
//
248257
// There is EpilogPreHeader incoming block instead of NewExit as
249-
// NewExit was spilt 1 more time to get EpilogPreHeader.
258+
// NewExit was split 1 more time to get EpilogPreHeader.
250259
assert(PN.hasOneUse() && "The phi should have 1 use");
251260
PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
252261
assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
253262

254-
// Add incoming PreHeader from branch around the Loop
255-
PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
256-
SE.forgetValue(&PN);
257-
258263
Value *V = PN.getIncomingValueForBlock(Latch);
259264
Instruction *I = dyn_cast<Instruction>(V);
260265
if (I && L->contains(I))
@@ -271,35 +276,52 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
271276
NewExit);
272277
// Now PHIs should look like:
273278
// NewExit:
274-
// PN = PHI [I, Latch], [poison, PreHeader]
279+
// PN = PHI [I, Latch]
275280
// ...
276281
// Exit:
277282
// EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
278283
}
279284

280-
// Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
281-
// Update corresponding PHI nodes in epilog loop.
285+
// Create PHI nodes at NewExit (from the unrolling loop Latch) and at
286+
// EpilogPreHeader (from PreHeader and NewExit). Update corresponding PHI
287+
// nodes in epilog loop.
282288
for (BasicBlock *Succ : successors(Latch)) {
283289
// Skip this as we already updated phis in exit blocks.
284290
if (!L->contains(Succ))
285291
continue;
292+
293+
// Succ here appears to always be just L->getHeader(). Otherwise, how do we
294+
// know its corresponding epilog block (from VMap) is EpilogHeader and thus
295+
// EpilogPreHeader is the right incoming block for VPN, as set below?
296+
// TODO: Can we thus avoid the enclosing loop over successors?
297+
assert(Succ == L->getHeader() &&
298+
"Expect the only in-loop successor of latch to be the loop header");
299+
286300
for (PHINode &PN : Succ->phis()) {
287-
// Add new PHI nodes to the loop exit block and update epilog
288-
// PHIs with the new PHI values.
289-
PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
290-
NewPN->insertBefore(NewExit->getFirstNonPHIIt());
291-
// Adding a value to the new PHI node from the unrolling loop preheader.
292-
NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
293-
// Adding a value to the new PHI node from the unrolling loop latch.
294-
NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
301+
// Add new PHI nodes to the loop exit block.
302+
PHINode *NewPN0 = PHINode::Create(PN.getType(), /*NumReservedValues=*/1,
303+
PN.getName() + ".unr");
304+
NewPN0->insertBefore(NewExit->getFirstNonPHIIt());
305+
// Add value to the new PHI node from the unrolling loop latch.
306+
NewPN0->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
307+
308+
// Add new PHI nodes to EpilogPreHeader.
309+
PHINode *NewPN1 = PHINode::Create(PN.getType(), /*NumReservedValues=*/2,
310+
PN.getName() + ".epil.init");
311+
NewPN1->insertBefore(EpilogPreHeader->getFirstNonPHIIt());
312+
// Add value to the new PHI node from the unrolling loop preheader.
313+
NewPN1->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
314+
// Add value to the new PHI node from the epilog loop guard.
315+
NewPN1->addIncoming(NewPN0, NewExit);
295316

296317
// Update the existing PHI node operand with the value from the new PHI
297318
// node. Corresponding instruction in epilog loop should be PHI.
298319
PHINode *VPN = cast<PHINode>(VMap[&PN]);
299-
VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
320+
VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN1);
300321
}
301322
}
302323

324+
// In NewExit, branch around the epilog loop if no extra iters.
303325
Instruction *InsertPt = NewExit->getTerminator();
304326
IRBuilder<> B(InsertPt);
305327
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
@@ -308,7 +330,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
308330
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
309331
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
310332
PreserveLCSSA);
311-
// Add the branch to the exit block (around the unrolling loop)
333+
// Add the branch to the exit block (around the epilog loop)
312334
MDNode *BranchWeights = nullptr;
313335
if (hasBranchWeightMD(*Latch->getTerminator())) {
314336
// Assume equal distribution in interval [0, Count).
@@ -322,10 +344,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
322344
DT->changeImmediateDominator(Exit, NewDom);
323345
}
324346

325-
// Split the main loop exit to maintain canonicalization guarantees.
326-
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
327-
SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
328-
PreserveLCSSA);
347+
// In EpilogPreHeader, assume extra iters is non-zero.
348+
IRBuilder<> B2(EpilogPreHeader, EpilogPreHeader->getFirstNonPHIIt());
349+
Value *ModIsNotNull = B2.CreateIsNotNull(ModVal, "lcmp.mod");
350+
AssumeInst *AI = cast<AssumeInst>(B2.CreateAssumption(ModIsNotNull));
351+
AC.registerAssumption(AI);
329352
}
330353

331354
/// Create a clone of the blocks in a loop and connect them together. A new
@@ -795,7 +818,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
795818
ConstantInt::get(BECount->getType(),
796819
Count - 1)) :
797820
B.CreateIsNotNull(ModVal, "lcmp.mod");
798-
BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
821+
BasicBlock *RemainderLoop =
822+
UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
799823
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
800824
// Branch to either remainder (extra iterations) loop or unrolling loop.
801825
MDNode *BranchWeights = nullptr;
@@ -808,7 +832,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
808832
PreHeaderBR->eraseFromParent();
809833
if (DT) {
810834
if (UseEpilogRemainder)
811-
DT->changeImmediateDominator(NewExit, PreHeader);
835+
DT->changeImmediateDominator(EpilogPreHeader, PreHeader);
812836
else
813837
DT->changeImmediateDominator(PrologExit, PreHeader);
814838
}
@@ -880,7 +904,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
880904
// from both the original loop and the remainder code reaching the exit
881905
// blocks. While the IDom of these exit blocks were from the original loop,
882906
// now the IDom is the preheader (which decides whether the original loop or
883-
// remainder code should run).
907+
// remainder code should run) unless the block still has just the original
908+
// predecessor (such as NewExit in the case of an epilog remainder).
884909
if (DT && !L->getExitingBlock()) {
885910
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
886911
// NB! We have to examine the dom children of all loop blocks, not just
@@ -891,7 +916,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
891916
auto *DomNodeBB = DT->getNode(BB);
892917
for (auto *DomChild : DomNodeBB->children()) {
893918
auto *DomChildBB = DomChild->getBlock();
894-
if (!L->contains(LI->getLoopFor(DomChildBB)))
919+
if (!L->contains(LI->getLoopFor(DomChildBB)) &&
920+
DomChildBB->getUniquePredecessor() != BB)
895921
ChildrenToUpdate.push_back(DomChildBB);
896922
}
897923
}
@@ -930,7 +956,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
930956
// Connect the epilog code to the original loop and update the
931957
// PHI functions.
932958
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
933-
NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
959+
NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC);
934960

935961
// Update counter in loop for unrolling.
936962
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.

llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
;; Check atoms are remapped for runtime unrolling.
66

77
; CHECK: for.body.epil:
8-
; CHECK-NEXT: store i64 %indvars.iv.unr, ptr %p, align 4, !dbg [[G2R1:!.*]]
8+
; CHECK-NEXT: store i64 %indvars.iv.epil.init, ptr %p, align 4, !dbg [[G2R1:!.*]]
99

1010
; CHECK: for.body.epil.1:
1111
; CHECK-NEXT: store i64 %indvars.iv.next.epil, ptr %p, align 4, !dbg [[G3R1:!.*]]

llvm/test/Transforms/HardwareLoops/ARM/structure.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,10 @@ for.inc: ; preds = %sw.bb, %sw.bb1, %fo
321321
; CHECK-UNROLL-NOT: dls
322322
; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body
323323
; CHECK-UNROLL: le lr, [[LOOP]]
324-
; CHECK-UNROLL: wls lr, r12, [[EXIT:.LBB[0-9_]+]]
324+
; CHECK-UNROLL: dls lr, r12
325325
; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
326326
; CHECK-UNROLL: le lr, [[EPIL]]
327-
; CHECK-UNROLL-NEXT: [[EXIT]]
327+
; CHECK-UNROLL-NEXT: {{\.LBB[0-9_]+}}: @ %for.cond.cleanup
328328

329329
define void @unroll_inc_int(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
330330
entry:
@@ -357,10 +357,10 @@ for.body:
357357
; CHECK-UNROLL-NOT: dls
358358
; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body
359359
; CHECK-UNROLL: le lr, [[LOOP]]
360-
; CHECK-UNROLL: wls lr, r12, [[EPIL_EXIT:.LBB[0-9_]+]]
360+
; CHECK-UNROLL: dls lr, r12
361361
; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
362362
; CHECK-UNROLL: le lr, [[EPIL]]
363-
; CHECK-UNROLL: [[EPIL_EXIT]]:
363+
; CHECK-UNROLL: {{\.LBB[0-9_]+}}: @ %for.cond.cleanup
364364
; CHECK-UNROLL: pop
365365
define void @unroll_inc_unsigned(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
366366
entry:

0 commit comments

Comments
 (0)