Skip to content

Commit 78094b4

Browse files
committed
LAA: refine condition for invariant stores
Since SymbolicStrides are available when we iterate on stores in analyzeLoop, use the information to refine the loop-invariant check, resulting in better vectorization of degenerate cases.
1 parent 5187d0b commit 78094b4

File tree

4 files changed

+23
-40
lines changed

4 files changed

+23
-40
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2545,7 +2545,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
25452545
for (StoreInst *ST : Stores) {
25462546
Value *Ptr = ST->getPointerOperand();
25472547

2548-
if (isInvariant(Ptr)) {
2548+
const SCEV *PtrScev = replaceSymbolicStrideSCEV(*PSE, SymbolicStrides, Ptr);
2549+
if (PSE->getSE()->isLoopInvariant(PtrScev, TheLoop)) {
25492550
// Record store instructions to loop invariant addresses
25502551
StoresToInvariantAddresses.push_back(ST);
25512552
HasStoreStoreDependenceInvolvingLoopInvariantAddress |=

llvm/test/Analysis/LoopAccessAnalysis/invariant-dependence-before.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,12 @@ define void @stores_to_invariant_address(i32 %offset, ptr noalias %dst.1, ptr %d
795795
; CHECK-EMPTY:
796796
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
797797
; CHECK-NEXT: SCEV assumptions:
798+
; CHECK-NEXT: Equal predicate: %offset == 1
798799
; CHECK-EMPTY:
799800
; CHECK-NEXT: Expressions re-written:
801+
; CHECK-NEXT: [PSE] %gep = getelementptr i32, ptr %dst.2, i32 %iv.2.mul:
802+
; CHECK-NEXT: ((4 * (sext i32 {0,+,%offset}<%loop> to i64))<nsw> + %dst.2)
803+
; CHECK-NEXT: --> {%dst.2,+,4}<nw><%loop>
800804
;
801805
entry:
802806
%add = add i32 %offset, 3

llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,27 @@ define void @test_variable_stride(ptr %dst, i32 %scale) {
55
; CHECK-LABEL: define void @test_variable_stride
66
; CHECK-SAME: (ptr [[DST:%.*]], i32 [[SCALE:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
9+
; CHECK: vector.scevcheck:
10+
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[SCALE]], 1
11+
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
912
; CHECK: vector.ph:
1013
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1114
; CHECK: vector.body:
1215
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1316
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
1417
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
15-
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[SCALE]]
16-
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], [[SCALE]]
17-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP2]]
18-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP3]]
19-
; CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 2
20-
; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 2
18+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP0]]
19+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]]
20+
; CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 2
21+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 2
2122
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
22-
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
23-
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
23+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
24+
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2425
; CHECK: middle.block:
2526
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2627
; CHECK: scalar.ph:
27-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
28+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
2829
; CHECK-NEXT: br label [[LOOP:%.*]]
2930
; CHECK: loop:
3031
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -326,46 +326,23 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr
326326
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[OFFSET]], 3
327327
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
328328
; CHECK: vector.scevcheck:
329-
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -3, [[OFFSET]]
330-
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ADD]], 0
331-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 [[ADD]]
332-
; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP2]], i32 200)
333-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
334-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
335-
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
336-
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[MUL_RESULT]], 0
337-
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 0
338-
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], i1 [[TMP5]], i1 [[TMP4]]
339-
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW]]
340329
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
341-
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[IDENT_CHECK]]
342-
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
330+
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
343331
; CHECK: vector.ph:
344-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0
345-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
346332
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
347333
; CHECK: vector.body:
348334
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
349-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
350335
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
351-
; CHECK-NEXT: [[TMP10:%.*]] = mul <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
352-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0
353-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP11]]
354-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1
355-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP13]]
356-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP10]], i32 2
357-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP15]]
358-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3
336+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
337+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
338+
; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP1]], [[ADD]]
359339
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP17]]
360-
; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 8
361-
; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 8
362-
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 8
363-
; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 8
340+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0
341+
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8
364342
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST_2]], i64 [[TMP9]]
365343
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i32 0
366344
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP21]], align 8
367345
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
368-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
369346
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
370347
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
371348
; CHECK: middle.block:

0 commit comments

Comments
 (0)