Skip to content

Commit 2f7252a

Browse files
authored
[LV] Preserve GEP nusw when widening memory (#160885)
1 parent ec91d6b commit 2f7252a

File tree

2 files changed

+187
-4
lines changed

2 files changed

+187
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7487,12 +7487,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
74877487
VPSingleDefRecipe *VectorPtr;
74887488
if (Reverse) {
74897489
// When folding the tail, we may compute an address that we don't in the
7490-
// original scalar loop and it may not be inbounds. Drop Inbounds in that
7491-
// case.
7490+
// original scalar loop: drop the GEP no-wrap flags in this case.
7491+
// Otherwise preserve existing flags without no-unsigned-wrap, as we will
7492+
// emit negative indices.
74927493
GEPNoWrapFlags Flags =
7493-
(CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
7494+
CM.foldTailByMasking() || !GEP
74947495
? GEPNoWrapFlags::none()
7495-
: GEPNoWrapFlags::inBounds();
7496+
: GEP->getNoWrapFlags().withoutNoUnsignedWrap();
74967497
VectorPtr =
74977498
new VPVectorEndPointerRecipe(Ptr, &Plan.getVF(), getLoadStoreType(I),
74987499
/*Stride*/ -1, Flags, I->getDebugLoc());
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
3+
4+
define i32 @preserve_inbounds(i64 %start, ptr %ptr) {
5+
; CHECK-LABEL: define i32 @preserve_inbounds(
6+
; CHECK-SAME: i64 [[START:%.*]], ptr [[PTR:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
9+
; CHECK: [[VECTOR_PH]]:
10+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
11+
; CHECK: [[VECTOR_BODY]]:
12+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
13+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
14+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]]
15+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
16+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]]
17+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
18+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3
19+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
20+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
21+
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
22+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
23+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
24+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK: [[MIDDLE_BLOCK]]:
26+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
27+
; CHECK-NEXT: br label %[[END:.*]]
28+
; CHECK: [[SCALAR_PH:.*]]:
29+
; CHECK-NEXT: br label %[[LOOP:.*]]
30+
; CHECK: [[LOOP]]:
31+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
32+
; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ]
33+
; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ]
34+
; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1
35+
; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[REV_IND_NEXT]]
36+
; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4
37+
; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]]
38+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
39+
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
40+
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]]
41+
; CHECK: [[END]]:
42+
; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
43+
; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]]
44+
;
45+
entry:
46+
br label %loop
47+
48+
loop:
49+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
50+
%rev.ind = phi i64 [ %start, %entry ], [ %rev.ind.next, %loop ]
51+
%redux = phi i32 [ 0, %entry ], [ %redux.next, %loop ]
52+
%rev.ind.next = add i64 %rev.ind, -1
53+
%gep.ptr.ind = getelementptr inbounds i32, ptr %ptr, i64 %rev.ind.next
54+
%ld.ptr = load i32, ptr %gep.ptr.ind, align 4
55+
%redux.next = add i32 %ld.ptr, %redux
56+
%iv.next = add i32 %iv, 1
57+
%exit.cond = icmp ne i32 %iv.next, 1024
58+
br i1 %exit.cond, label %loop, label %end
59+
60+
end:
61+
ret i32 %redux.next
62+
}
63+
64+
define i32 @preserve_nusw(i64 %start, ptr %ptr) {
65+
; CHECK-LABEL: define i32 @preserve_nusw(
66+
; CHECK-SAME: i64 [[START:%.*]], ptr [[PTR:%.*]]) {
67+
; CHECK-NEXT: [[ENTRY:.*:]]
68+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
69+
; CHECK: [[VECTOR_PH]]:
70+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
71+
; CHECK: [[VECTOR_BODY]]:
72+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
73+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
74+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]]
75+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
76+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]]
77+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i32 0
78+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i32 -3
79+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
80+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
81+
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
82+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
83+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
84+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
85+
; CHECK: [[MIDDLE_BLOCK]]:
86+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
87+
; CHECK-NEXT: br label %[[END:.*]]
88+
; CHECK: [[SCALAR_PH:.*]]:
89+
; CHECK-NEXT: br label %[[LOOP:.*]]
90+
; CHECK: [[LOOP]]:
91+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
92+
; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ]
93+
; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ]
94+
; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1
95+
; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[REV_IND_NEXT]]
96+
; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4
97+
; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]]
98+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
99+
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
100+
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]]
101+
; CHECK: [[END]]:
102+
; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
103+
; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]]
104+
;
105+
entry:
106+
br label %loop
107+
108+
loop:
109+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
110+
%rev.ind = phi i64 [ %start, %entry ], [ %rev.ind.next, %loop ]
111+
%redux = phi i32 [ 0, %entry ], [ %redux.next, %loop ]
112+
%rev.ind.next = add i64 %rev.ind, -1
113+
%gep.ptr.ind = getelementptr nusw i32, ptr %ptr, i64 %rev.ind.next
114+
%ld.ptr = load i32, ptr %gep.ptr.ind, align 4
115+
%redux.next = add i32 %ld.ptr, %redux
116+
%iv.next = add i32 %iv, 1
117+
%exit.cond = icmp ne i32 %iv.next, 1024
118+
br i1 %exit.cond, label %loop, label %end
119+
120+
end:
121+
ret i32 %redux.next
122+
}
123+
124+
define i32 @drop_nuw(i64 %start, ptr %ptr) {
125+
; CHECK-LABEL: define i32 @drop_nuw(
126+
; CHECK-SAME: i64 [[START:%.*]], ptr [[PTR:%.*]]) {
127+
; CHECK-NEXT: [[ENTRY:.*:]]
128+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
129+
; CHECK: [[VECTOR_PH]]:
130+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
131+
; CHECK: [[VECTOR_BODY]]:
132+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
133+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
134+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]]
135+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
136+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]]
137+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
138+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 -3
139+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
140+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
141+
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
142+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
143+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
144+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
145+
; CHECK: [[MIDDLE_BLOCK]]:
146+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
147+
; CHECK-NEXT: br label %[[END:.*]]
148+
; CHECK: [[SCALAR_PH:.*]]:
149+
; CHECK-NEXT: br label %[[LOOP:.*]]
150+
; CHECK: [[LOOP]]:
151+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
152+
; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ]
153+
; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ]
154+
; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1
155+
; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[REV_IND_NEXT]]
156+
; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4
157+
; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]]
158+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
159+
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
160+
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]]
161+
; CHECK: [[END]]:
162+
; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
163+
; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]]
164+
;
165+
entry:
166+
br label %loop
167+
168+
loop:
169+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
170+
%rev.ind = phi i64 [ %start, %entry ], [ %rev.ind.next, %loop ]
171+
%redux = phi i32 [ 0, %entry ], [ %redux.next, %loop ]
172+
%rev.ind.next = add i64 %rev.ind, -1
173+
%gep.ptr.ind = getelementptr nuw i32, ptr %ptr, i64 %rev.ind.next
174+
%ld.ptr = load i32, ptr %gep.ptr.ind, align 4
175+
%redux.next = add i32 %ld.ptr, %redux
176+
%iv.next = add i32 %iv, 1
177+
%exit.cond = icmp ne i32 %iv.next, 1024
178+
br i1 %exit.cond, label %loop, label %end
179+
180+
end:
181+
ret i32 %redux.next
182+
}

0 commit comments

Comments
 (0)