Skip to content

Commit 50a5679

Browse files
committed
[LAA] Don't require Stride == 1/-1 for inbounds pointer AddRecs nowrap.
I might be missing something, but I *think* the checks for Stride == 1/-1 may be more restrictive than necessary. If we have a pointer AddRec, the maximum increment is 2^(pointer-index-wdith - 1) - 1. This means that if incrementing the AddRec wraps, the distance between the previously accessed location and the wrapped location is > 2^(pointer-index-wdith - 1), i.e. if the GEP for the AddRec is inbounds, this would be poison due to the object being larger than half the pointer index type space. The poison would be immediate UB when the memory access gets executed.. Similar reasoning can be applied for decrements I think.
1 parent 173907b commit 50a5679

File tree

10 files changed

+181
-102
lines changed

10 files changed

+181
-102
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1521,7 +1521,7 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
15211521
// and any memory access dependent on it would be immediate UB
15221522
// when executed.
15231523
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1524-
GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
1524+
GEP && GEP->isInBounds())
15251525
return Stride;
15261526

15271527
// If the null pointer is undefined, then a access sequence which would

llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
3030
; CHECK-EMPTY:
3131
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
3232
; CHECK-NEXT: SCEV assumptions:
33-
; CHECK-NEXT: {(4 + (8 * %n) + %vec),+,8}<%loop> Added Flags: <nusw>
3433
; CHECK-EMPTY:
3534
; CHECK-NEXT: Expressions re-written:
3635
;
@@ -157,7 +156,6 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
157156
; CHECK-EMPTY:
158157
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
159158
; CHECK-NEXT: SCEV assumptions:
160-
; CHECK-NEXT: {((8 * %n) + %vec),+,8}<%loop> Added Flags: <nusw>
161159
; CHECK-EMPTY:
162160
; CHECK-NEXT: Expressions re-written:
163161
;

llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ define i32 @check_no_dep_via_bounds_compare_symbolic_max_btc_neg_1(ptr %P, i32 %
105105
; CHECK-EMPTY:
106106
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
107107
; CHECK-NEXT: SCEV assumptions:
108-
; CHECK-NEXT: {(8 + (8 * %y) + %P),+,8}<%loop> Added Flags: <nusw>
109108
; CHECK-EMPTY:
110109
; CHECK-NEXT: Expressions re-written:
111110
;

llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ for.end: ; preds = %for.body
243243
; LAA: Memory dependences are safe{{$}}
244244
; LAA: SCEV assumptions:
245245
; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
246-
; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
246+
; LAA-EMPTY:
247247

248248
; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul:
249249
; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,15 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
1111
; CHECK: for.body.lver.check:
1212
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
1313
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
14-
; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
15-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
16-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
17-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
18-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]]
19-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
20-
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
21-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
22-
; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
14+
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
2315
; CHECK: for.body.ph.lver.orig:
2416
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
2517
; CHECK: for.body.lver.orig:
2618
; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
2719
; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
2820
; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
2921
; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
30-
; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
22+
; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
3123
; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXA_LVER_ORIG]], align 4
3224
; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
3325
; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXB_LVER_ORIG]], align 4
@@ -53,14 +45,14 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
5345
; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
5446
; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
5547
; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]]
56-
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0
48+
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META0:![0-9]+]]
5749
; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]]
5850
; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
5951
; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
6052
; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
6153
; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
6254
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
63-
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3
55+
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META3:![0-9]+]]
6456
; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
6557
; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
6658
; CHECK: for.body.ph:
@@ -83,7 +75,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
8375
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
8476
; CHECK: for.end.loopexit:
8577
; CHECK-NEXT: br label [[FOR_END:%.*]]
86-
; CHECK: for.end.loopexit2:
78+
; CHECK: for.end.loopexit1:
8779
; CHECK-NEXT: br label [[FOR_END]]
8880
; CHECK: for.end:
8981
; CHECK-NEXT: ret void
@@ -144,15 +136,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
144136
; CHECK: for.body.lver.check:
145137
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
146138
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
147-
; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
148-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
149-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
150-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
151-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]]
152-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
153-
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
154-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
155-
; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
139+
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
156140
; CHECK: for.body.ph.lver.orig:
157141
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
158142
; CHECK: for.body.lver.orig:
@@ -186,14 +170,14 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
186170
; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
187171
; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
188172
; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]]
189-
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5
173+
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META5:![0-9]+]]
190174
; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]]
191175
; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
192176
; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
193177
; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
194178
; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
195179
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
196-
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8
180+
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META8:![0-9]+]]
197181
; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
198182
; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
199183
; CHECK: for.body.ph:
@@ -216,7 +200,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
216200
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
217201
; CHECK: for.end.loopexit:
218202
; CHECK-NEXT: br label [[FOR_END:%.*]]
219-
; CHECK: for.end.loopexit2:
203+
; CHECK: for.end.loopexit1:
220204
; CHECK-NEXT: br label [[FOR_END]]
221205
; CHECK: for.end:
222206
; CHECK-NEXT: ret void

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -286,55 +286,58 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
286286
; CHECK-NEXT: entry:
287287
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
288288
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
289-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
289+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[N:%.*]], [[TMP1]]
290290
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
291291
; CHECK: vector.ph:
292292
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
293-
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8
293+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 3
294+
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i64 [[TMP7]], -1
294295
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
296+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_VEC]], 0
297+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP7]], i64 [[N_VEC]]
298+
; CHECK-NEXT: [[N_VEC1:%.*]] = sub i64 [[N]], [[TMP6]]
295299
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
296-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP3]], 2
297300
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
298-
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
299-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
300-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
301301
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
302302
; CHECK: vector.body:
303-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
304-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
305-
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
306-
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
307-
; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[STEP_ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
308-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], <vscale x 4 x i64> [[TMP8]]
309-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], <vscale x 4 x i64> [[TMP9]]
310-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
311-
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
303+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
304+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
305+
; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3
306+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]]
307+
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP9]], 5
308+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]]
309+
; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3
310+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]]
311+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4
312+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]])
313+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
314+
; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4
315+
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC1]])
316+
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC2]], 0
312317
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
313318
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
314319
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4
315320
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX]]
316321
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4
317322
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
318323
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
319-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[STEP_ADD]], [[DOTSPLAT]]
320-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
321-
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
324+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC1]]
325+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
322326
; CHECK: middle.block:
323-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
324-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
327+
; CHECK-NEXT: br label [[VECTOR_PH]]
325328
; CHECK: scalar.ph:
326-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
329+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
327330
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
328331
; CHECK: for.body:
329-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
332+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VECTOR_PH]] ]
330333
; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[INDVARS_IV]], 3
331334
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[ARRAYIDX_IDX]]
332335
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
333336
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
334337
; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4
335338
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
336339
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
337-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
340+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
338341
; CHECK: for.cond.cleanup:
339342
; CHECK-NEXT: ret void
340343
;

llvm/test/Transforms/LoopVectorize/X86/pr54634.ll

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,8 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
1919
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1
2020
; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[TBAA8]]
2121
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1
22-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 60
23-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
24-
; CHECK: vector.scevcheck:
25-
; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
26-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
27-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
28-
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT]]
29-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(13) [[TMP7]], i64 [[MUL_RESULT]]
30-
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr addrspace(13) [[TMP10]], [[TMP7]]
31-
; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]]
32-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(13) [[TMP7]], i64 8
33-
; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
34-
; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
35-
; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
36-
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT2]]
37-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(13) [[SCEVGEP]], i64 [[MUL_RESULT2]]
38-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr addrspace(13) [[TMP14]], [[SCEVGEP]]
39-
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]]
40-
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
41-
; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
22+
; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i64 [[TMP8]], 16
23+
; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4224
; CHECK: vector.ph:
4325
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16
4426
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]]
@@ -77,7 +59,7 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
7759
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
7860
; CHECK-NEXT: br i1 [[CMP_N]], label [[L44:%.*]], label [[SCALAR_PH]]
7961
; CHECK: scalar.ph:
80-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
62+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP:%.*]] ]
8163
; CHECK-NEXT: br label [[L26:%.*]]
8264
; CHECK: L26:
8365
; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[L26]] ]

0 commit comments

Comments
 (0)