Skip to content

Commit 9070c25

Browse files
fhahntru
authored andcommitted
[LAA] Require AddRecs to be in the innermost loop for diff-checks.
The simpler diff-checks require pointers with add-recs from the same innermost loop, but this property wasn't check completely. Add the missing check to ensure both addrecs are in the innermost loop. Fixes #57315. (cherry picked from commit 9405af1)
1 parent 3367244 commit 9070c25

File tree

3 files changed

+39
-14
lines changed

3 files changed

+39
-14
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ class MemoryDepChecker {
253253
return {};
254254
}
255255

256+
const Loop *getInnermostLoop() const { return InnermostLoop; }
257+
256258
private:
257259
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
258260
/// applies dynamic knowledge to simplify SCEV expressions and convert them

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
280280

281281
auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
282282
auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
283-
if (!SrcAR || !SinkAR) {
283+
if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
284+
SinkAR->getLoop() != DC.getInnermostLoop()) {
284285
CanUseDiffCheck = false;
285286
return;
286287
}

llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,18 +172,29 @@ exit:
172172
ret void
173173
}
174174

175-
; FIXME: Full no-overlap checks are required instead of difference checks, as
175+
; Full no-overlap checks are required instead of difference checks, as
176176
; one of the add-recs used is invariant in the inner loop.
177177
; Test case for PR57315.
178178
define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) {
179179
; CHECK-LABEL: @nested_loop_outer_iv_addrec_invariant_in_inner1(
180-
; CHECK: entry:
181-
; CHECK-NEXT: [[B:%.*]] = ptrtoint ptr %b to i64
182-
; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr %a to i64
180+
; CHECK: entry:
181+
; CHECK-NEXT: [[N_SHL_2:%.]] = shl i64 %n, 2
182+
; CHECK-NEXT: [[B_GEP_UPPER:%.*]] = getelementptr i8, ptr %b, i64 [[N_SHL_2]]
183+
; CHECK-NEXT: br label %outer
184+
185+
; CHECK: outer.header:
186+
; CHECK: [[OUTER_IV_SHL_2:%.]] = shl i64 %outer.iv, 2
187+
; CHECK-NEXT: [[A_GEP_UPPER:%.*]] = getelementptr i8, ptr %a, i64 [[OUTER_IV_SHL_2]]
188+
; CHECK-NEXT: [[OUTER_IV_4:%.]] = add i64 [[OUTER_IV_SHL_2]], 4
189+
; CHECK-NEXT: [[A_GEP_UPPER_4:%.*]] = getelementptr i8, ptr %a, i64 [[OUTER_IV_4]]
190+
; CHECK: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
191+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
192+
183193
; CHECK: vector.memcheck:
184-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[A]], [[B]]
185-
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
186-
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph
194+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A_GEP_UPPER]], [[B_GEP_UPPER]]
195+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr %b, [[A_GEP_UPPER_4]]
196+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
197+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
187198
;
188199
entry:
189200
br label %outer.header
@@ -216,13 +227,24 @@ exit:
216227
; sink and source swapped.
217228
define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) {
218229
; CHECK-LABEL: @nested_loop_outer_iv_addrec_invariant_in_inner2(
219-
; CHECK: entry:
220-
; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr %a to i64
221-
; CHECK-NEXT: [[B:%.*]] = ptrtoint ptr %b to i64
230+
; CHECK: entry:
231+
; CHECK-NEXT: [[N_SHL_2:%.]] = shl i64 %n, 2
232+
; CHECK-NEXT: [[B_GEP_UPPER:%.*]] = getelementptr i8, ptr %b, i64 [[N_SHL_2]]
233+
; CHECK-NEXT: br label %outer
234+
235+
; CHECK: outer.header:
236+
; CHECK: [[OUTER_IV_SHL_2:%.]] = shl i64 %outer.iv, 2
237+
; CHECK-NEXT: [[A_GEP_UPPER:%.*]] = getelementptr i8, ptr %a, i64 [[OUTER_IV_SHL_2]]
238+
; CHECK-NEXT: [[OUTER_IV_4:%.]] = add i64 [[OUTER_IV_SHL_2]], 4
239+
; CHECK-NEXT: [[A_GEP_UPPER_4:%.*]] = getelementptr i8, ptr %a, i64 [[OUTER_IV_4]]
240+
; CHECK: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
241+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
242+
222243
; CHECK: vector.memcheck:
223-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[B]], [[A]]
224-
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
225-
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph
244+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr %b, [[A_GEP_UPPER_4]]
245+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A_GEP_UPPER]], [[B_GEP_UPPER]]
246+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
247+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
226248
;
227249
entry:
228250
br label %outer.header

0 commit comments

Comments
 (0)