Skip to content

Commit e248d69

Browse files
committed
Recommit "[LAA] Support pointer phis in loop by analyzing each incoming pointer."
SCEV does not look through non-header PHIs inside the loop. Such phis can be analyzed by adding separate accesses for each incoming pointer value. This results in 2 more loops vectorized in SPEC2000/186.crafty and avoids regressions when sinking instructions before vectorizing. Fixes PR50296, PR50288. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D102266
1 parent 5a6dfbb commit e248d69

File tree

5 files changed

+201
-47
lines changed

5 files changed

+201
-47
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,11 @@ class MemoryDepChecker {
177177

178178
/// Register the location (instructions are given increasing numbers)
179179
/// of a write access.
180-
void addAccess(StoreInst *SI) {
181-
Value *Ptr = SI->getPointerOperand();
182-
Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
183-
InstMap.push_back(SI);
184-
++AccessIdx;
185-
}
180+
void addAccess(StoreInst *SI);
186181

187182
/// Register the location (instructions are given increasing numbers)
188183
/// of a write access.
189-
void addAccess(LoadInst *LI) {
190-
Value *Ptr = LI->getPointerOperand();
191-
Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
192-
InstMap.push_back(LI);
193-
++AccessIdx;
194-
}
184+
void addAccess(LoadInst *LI);
195185

196186
/// Check whether the dependencies between the accesses are safe.
197187
///

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,6 +1263,47 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
12631263
return Diff && *Diff == 1;
12641264
}
12651265

1266+
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
1267+
function_ref<void(Value *)> AddPointer) {
1268+
SmallPtrSet<Value *, 8> Visited;
1269+
SmallVector<Value *> WorkList;
1270+
WorkList.push_back(StartPtr);
1271+
1272+
while (!WorkList.empty()) {
1273+
Value *Ptr = WorkList.pop_back_val();
1274+
if (!Visited.insert(Ptr).second)
1275+
continue;
1276+
auto *PN = dyn_cast<PHINode>(Ptr);
1277+
// SCEV does not look through non-header PHIs inside the loop. Such phis
1278+
// can be analyzed by adding separate accesses for each incoming pointer
1279+
// value.
1280+
if (PN && InnermostLoop.contains(PN->getParent()) &&
1281+
PN->getParent() != InnermostLoop.getHeader()) {
1282+
for (const Use &Inc : PN->incoming_values())
1283+
WorkList.push_back(Inc);
1284+
} else
1285+
AddPointer(Ptr);
1286+
}
1287+
}
1288+
1289+
void MemoryDepChecker::addAccess(StoreInst *SI) {
1290+
visitPointers(SI->getPointerOperand(), *InnermostLoop,
1291+
[this, SI](Value *Ptr) {
1292+
Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1293+
InstMap.push_back(SI);
1294+
++AccessIdx;
1295+
});
1296+
}
1297+
1298+
void MemoryDepChecker::addAccess(LoadInst *LI) {
1299+
visitPointers(LI->getPointerOperand(), *InnermostLoop,
1300+
[this, LI](Value *Ptr) {
1301+
Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1302+
InstMap.push_back(LI);
1303+
++AccessIdx;
1304+
});
1305+
}
1306+
12661307
MemoryDepChecker::VectorizationSafetyStatus
12671308
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
12681309
switch (Type) {
@@ -1962,7 +2003,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
19622003
if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
19632004
Loc.AATags.TBAA = nullptr;
19642005

1965-
Accesses.addStore(Loc);
2006+
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2007+
[&Accesses, Loc](Value *Ptr) {
2008+
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2009+
Accesses.addStore(NewLoc);
2010+
});
19662011
}
19672012
}
19682013

@@ -2006,7 +2051,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
20062051
if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
20072052
Loc.AATags.TBAA = nullptr;
20082053

2009-
Accesses.addLoad(Loc, IsReadOnlyPtr);
2054+
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2055+
[&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
2056+
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2057+
Accesses.addLoad(NewLoc, IsReadOnlyPtr);
2058+
});
20102059
}
20112060

20122061
// If we write (or read-write) to a single destination and there are no

llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll

Lines changed: 142 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
define i32 @load_with_pointer_phi_no_runtime_checks(%s1* %data) {
66
; CHECK-LABEL: load_with_pointer_phi_no_runtime_checks
77
; CHECK-NEXT: loop.header:
8-
; CHECK-NEXT: Report: cannot identify array bounds
8+
; CHECK-NEXT: Memory dependences are safe
99
;
1010
entry:
1111
br label %loop.header
@@ -40,7 +40,7 @@ exit: ; preds = %loop.latch
4040
define i32 @store_with_pointer_phi_no_runtime_checks(%s1* %data) {
4141
; CHECK-LABEL: 'store_with_pointer_phi_no_runtime_checks'
4242
; CHECK-NEXT: loop.header:
43-
; CHECK-NEXT: Report: cannot identify array bounds
43+
; CHECK-NEXT: Memory dependences are safe
4444
;
4545
entry:
4646
br label %loop.header
@@ -75,7 +75,23 @@ exit: ; preds = %loop.latch
7575
define i32 @store_with_pointer_phi_runtime_checks(double* %A, double* %B, double* %C) {
7676
; CHECK-LABEL: 'store_with_pointer_phi_runtime_checks'
7777
; CHECK-NEXT: loop.header:
78-
; CHECK-NEXT: Report: cannot identify array bounds
78+
; CHECK-NEXT: Memory dependences are safe with run-time checks
79+
; CHECK: Run-time memory checks:
80+
; CHECK-NEXT: Check 0:
81+
; CHECK-NEXT: Comparing group ([[GROUP_B:.+]]):
82+
; CHECK-NEXT: %gep.1 = getelementptr inbounds double, double* %B, i64 %iv
83+
; CHECK-NEXT: Against group ([[GROUP_C:.+]]):
84+
; CHECK-NEXT: %gep.2 = getelementptr inbounds double, double* %C, i64 %iv
85+
; CHECK-NEXT: Check 1:
86+
; CHECK-NEXT: Comparing group ([[GROUP_B]]):
87+
; CHECK-NEXT: %gep.1 = getelementptr inbounds double, double* %B, i64 %iv
88+
; CHECK-NEXT: Against group ([[GROUP_A:.+]]):
89+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
90+
; CHECK-NEXT: Check 2:
91+
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
92+
; CHECK-NEXT: %gep.2 = getelementptr inbounds double, double* %C, i64 %iv
93+
; CHECK-NEXT: Against group ([[GROUP_A]]):
94+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
7995
;
8096
entry:
8197
br label %loop.header
@@ -184,10 +200,41 @@ exit: ; preds = %loop.latch
184200
define i32 @store_with_pointer_phi_incoming_phi(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
185201
; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi'
186202
; CHECK-NEXT: loop.header:
187-
; CHECK-NEXT: Report: cannot identify array bounds
188-
; CHECK-NEXT: Dependences:
189-
; CHECK-NEXT: Run-time memory checks:
190-
; CHECK-NEXT: Grouped accesses:
203+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
204+
; CHECK-NEXT: Dependences:
205+
; CHECK-NEXT: Unknown:
206+
; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 ->
207+
; CHECK-NEXT: store double %mul16, double* %ptr.2, align 8
208+
; CHECK-EMPTY:
209+
; CHECK-NEXT: Run-time memory checks:
210+
; CHECK-NEXT: Check 0:
211+
; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]):
212+
; CHECK-NEXT: double* %C
213+
; CHECK-NEXT: Against group ([[GROUP_B:.+]]):
214+
; CHECK-NEXT: double* %B
215+
; CHECK-NEXT: Check 1:
216+
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
217+
; CHECK-NEXT: double* %C
218+
; CHECK-NEXT: Against group ([[GROUP_A:.+]]):
219+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
220+
; CHECK-NEXT: double* %A
221+
; CHECK-NEXT: Check 2:
222+
; CHECK-NEXT: Comparing group ([[GROUP_B]]):
223+
; CHECK-NEXT: double* %B
224+
; CHECK-NEXT: Against group ([[GROUP_A]]):
225+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
226+
; CHECK-NEXT: double* %A
227+
; CHECK-NEXT: Grouped accesses:
228+
; CHECK-NEXT: Group [[GROUP_C]]:
229+
; CHECK-NEXT: (Low: %C High: (8 + %C))
230+
; CHECK-NEXT: Member: %C
231+
; CHECK-NEXT: Group [[GROUP_B]]:
232+
; CHECK-NEXT: (Low: %B High: (8 + %B))
233+
; CHECK-NEXT: Member: %B
234+
; CHECK-NEXT: Group [[GROUP_A]]:
235+
; CHECK-NEXT: (Low: %A High: (256000 + %A))
236+
; CHECK-NEXT: Member: {%A,+,8}<nuw><%loop.header>
237+
; CHECK-NEXT: Member: %A
191238
; CHECK-EMPTY
192239
entry:
193240
br label %loop.header
@@ -229,10 +276,41 @@ exit: ; preds = %loop.latch
229276
define i32 @store_with_pointer_phi_incoming_phi_irreducible_cycle(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
230277
; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi_irreducible_cycle'
231278
; CHECK-NEXT: loop.header:
232-
; CHECK-NEXT: Report: cannot identify array bounds
233-
; CHECK-NEXT: Dependences:
234-
; CHECK-NEXT: Run-time memory checks:
235-
; CHECK-NEXT: Grouped accesses:
279+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
280+
; CHECK-NEXT: Dependences:
281+
; CHECK-NEXT: Unknown:
282+
; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 ->
283+
; CHECK-NEXT: store double %mul16, double* %ptr.3, align 8
284+
; CHECK-EMPTY:
285+
; CHECK-NEXT: Run-time memory checks:
286+
; CHECK-NEXT: Check 0:
287+
; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]):
288+
; CHECK-NEXT: double* %C
289+
; CHECK-NEXT: Against group ([[GROUP_B:.+]]):
290+
; CHECK-NEXT: double* %B
291+
; CHECK-NEXT: Check 1:
292+
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
293+
; CHECK-NEXT: double* %C
294+
; CHECK-NEXT: Against group ([[GROUP_A:.+]]):
295+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
296+
; CHECK-NEXT: double* %A
297+
; CHECK-NEXT: Check 2:
298+
; CHECK-NEXT: Comparing group ([[GROUP_B]]):
299+
; CHECK-NEXT: double* %B
300+
; CHECK-NEXT: Against group ([[GROUP_A]]):
301+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
302+
; CHECK-NEXT: double* %A
303+
; CHECK-NEXT: Grouped accesses:
304+
; CHECK-NEXT: Group [[GROUP_C]]
305+
; CHECK-NEXT: (Low: %C High: (8 + %C))
306+
; CHECK-NEXT: Member: %C
307+
; CHECK-NEXT: Group [[GROUP_B]]
308+
; CHECK-NEXT: (Low: %B High: (8 + %B))
309+
; CHECK-NEXT: Member: %B
310+
; CHECK-NEXT: Group [[GROUP_A]]
311+
; CHECK-NEXT: (Low: %A High: (256000 + %A))
312+
; CHECK-NEXT: Member: {%A,+,8}<nuw><%loop.header>
313+
; CHECK-NEXT: Member: %A
236314
; CHECK-EMPTY
237315
entry:
238316
br label %loop.header
@@ -334,10 +412,59 @@ exit: ; preds = %loop.latch
334412
define void @phi_load_store_memdep_check(i1 %c, i16* %A, i16* %B, i16* %C) {
335413
; CHECK-LABEL: Loop access info in function 'phi_load_store_memdep_check':
336414
; CHECK-NEXT: for.body:
337-
; CHECK-NEXT: Report: cannot identify array bounds
338-
; CHECK-NEXT: Dependences:
339-
; CHECK-NEXT: Run-time memory checks:
340-
; CHECK-NEXT: Grouped accesses:
415+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
416+
; CHECK-NEXT: Dependences:
417+
; CHECK-NEXT: Unknown:
418+
; CHECK-NEXT: %lv3 = load i16, i16* %c.sink, align 2 ->
419+
; CHECK-NEXT: store i16 %add, i16* %c.sink, align 1
420+
; CHECK-EMPTY:
421+
; CHECK-NEXT: Unknown:
422+
; CHECK-NEXT: %lv3 = load i16, i16* %c.sink, align 2 ->
423+
; CHECK-NEXT: store i16 %add, i16* %c.sink, align 1
424+
; CHECK-EMPTY:
425+
; CHECK-NEXT: Unknown:
426+
; CHECK-NEXT: %lv = load i16, i16* %A, align 1 ->
427+
; CHECK-NEXT: store i16 %lv, i16* %A, align 1
428+
; CHECK-EMPTY:
429+
; CHECK-NEXT: Unknown:
430+
; CHECK-NEXT: store i16 %lv, i16* %A, align 1 ->
431+
; CHECK-NEXT: %lv2 = load i16, i16* %A, align 1
432+
; CHECK-EMPTY:
433+
; CHECK-NEXT: Run-time memory checks:
434+
; CHECK-NEXT: Check 0:
435+
; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]):
436+
; CHECK-NEXT: i16* %A
437+
; CHECK-NEXT: i16* %A
438+
; CHECK-NEXT: Against group ([[GROUP_C:.+]]):
439+
; CHECK-NEXT: i16* %C
440+
; CHECK-NEXT: i16* %C
441+
; CHECK-NEXT: Check 1:
442+
; CHECK-NEXT: Comparing group ([[GROUP_A]]):
443+
; CHECK-NEXT: i16* %A
444+
; CHECK-NEXT: i16* %A
445+
; CHECK-NEXT: Against group ([[GROUP_B:.+]]):
446+
; CHECK-NEXT: i16* %B
447+
; CHECK-NEXT: i16* %B
448+
; CHECK-NEXT: Check 2:
449+
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
450+
; CHECK-NEXT: i16* %C
451+
; CHECK-NEXT: i16* %C
452+
; CHECK-NEXT: Against group ([[GROUP_B]]):
453+
; CHECK-NEXT: i16* %B
454+
; CHECK-NEXT: i16* %B
455+
; CHECK-NEXT: Grouped accesses:
456+
; CHECK-NEXT: Group [[GROUP_A]]
457+
; CHECK-NEXT: (Low: %A High: (2 + %A))
458+
; CHECK-NEXT: Member: %A
459+
; CHECK-NEXT: Member: %A
460+
; CHECK-NEXT: Group [[GROUP_C]]
461+
; CHECK-NEXT: (Low: %C High: (2 + %C))
462+
; CHECK-NEXT: Member: %C
463+
; CHECK-NEXT: Member: %C
464+
; CHECK-NEXT: Group [[GROUP_B]]
465+
; CHECK-NEXT: (Low: %B High: (2 + %B))
466+
; CHECK-NEXT: Member: %B
467+
; CHECK-NEXT: Member: %B
341468
; CHECK-EMPTY:
342469
;
343470
entry:

llvm/test/Transforms/LoopDistribute/pointer-phi-in-loop.ll

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -55,21 +55,6 @@ for.end.loopexit: ; preds = %if.end
5555
define void @phi_load_distribute(i1 %c, i16* %A, i16* %B, i16* %C) {
5656
; CHECK-LABEL: @phi_load_distribute(
5757
; CHECK-NEXT: entry:
58-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
59-
; CHECK: for.body:
60-
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[IF_END:%.*]] ]
61-
; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 1
62-
; CHECK-NEXT: store i16 [[LV]], i16* [[A]], align 1
63-
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END]]
64-
; CHECK: if.then:
65-
; CHECK-NEXT: [[LV2:%.*]] = load i16, i16* [[A]], align 1
66-
; CHECK-NEXT: br label [[IF_END]]
67-
; CHECK: if.end:
68-
; CHECK-NEXT: [[C_SINK:%.*]] = phi i16* [ [[B:%.*]], [[IF_THEN]] ], [ [[C:%.*]], [[FOR_BODY]] ]
69-
; CHECK-NEXT: [[LV3:%.*]] = load i16, i16* [[C_SINK]], align 2
70-
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV]], 1
71-
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 [[IV_NEXT]], 1000
72-
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
7358
; CHECK: for.end.loopexit:
7459
; CHECK-NEXT: ret void
7560
;

llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
define i32 @load_with_pointer_phi_no_runtime_checks(%s1* %data) {
66
; CHECK-LABEL: @load_with_pointer_phi_no_runtime_checks
7-
; CHECK-NOT: vector.body
7+
; CHECK-NOT: memcheck
8+
; CHECK: vector.body:
89
;
910
entry:
1011
br label %loop.header
@@ -38,7 +39,8 @@ exit: ; preds = %loop.latch
3839

3940
define i32 @store_with_pointer_phi_no_runtime_checks(%s1* %data) {
4041
; CHECK-LABEL: @store_with_pointer_phi_no_runtime_checks
41-
; CHECK-NOT: vector.body
42+
; CHECK-NOT: memcheck
43+
; CHECK: vector.body
4244
;
4345
entry:
4446
br label %loop.header
@@ -72,7 +74,8 @@ exit: ; preds = %loop.latch
7274

7375
define i32 @store_with_pointer_phi_runtime_checks(double* %A, double* %B, double* %C) {
7476
; CHECK-LABEL: @store_with_pointer_phi_runtime_checks
75-
; CHECK-NOT: vector.body
77+
; CHECK: memcheck
78+
; CHECK: vector.body
7679
;
7780
entry:
7881
br label %loop.header

0 commit comments

Comments
 (0)