Skip to content

Commit 95c525b

Browse files
authored
[VPlan] Preserve nusw on VectorEndPointer (#151558)
In createInterleaveGroups, get the nusw in addition to inbounds from the existing GEP, and set them on the VPVectorEndPointerRecipe.
1 parent 005401d commit 95c525b

File tree

2 files changed

+193
-7
lines changed

2 files changed

+193
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2581,10 +2581,10 @@ void VPlanTransforms::createInterleaveGroups(
25812581
auto *InsertPos =
25822582
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
25832583

2584-
bool InBounds = false;
2584+
GEPNoWrapFlags NW = GEPNoWrapFlags::none();
25852585
if (auto *Gep = dyn_cast<GetElementPtrInst>(
25862586
getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))
2587-
InBounds = Gep->isInBounds();
2587+
NW = Gep->getNoWrapFlags().withoutNoUnsignedWrap();
25882588

25892589
// Get or create the start address for the interleave group.
25902590
auto *Start =
@@ -2608,8 +2608,9 @@ void VPlanTransforms::createInterleaveGroups(
26082608
VPValue *OffsetVPV =
26092609
Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
26102610
VPBuilder B(InsertPos);
2611-
Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
2612-
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
2611+
Addr = NW.isInBounds()
2612+
? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
2613+
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
26132614
}
26142615
// If the group is reverse, adjust the index to refer to the last vector
26152616
// lane instead of the first. We adjust the index from the first vector
@@ -2618,9 +2619,7 @@ void VPlanTransforms::createInterleaveGroups(
26182619
if (IG->isReverse()) {
26192620
auto *ReversePtr = new VPVectorEndPointerRecipe(
26202621
Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos),
2621-
-(int64_t)IG->getFactor(),
2622-
InBounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(),
2623-
InsertPos->getDebugLoc());
2622+
-(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
26242623
ReversePtr->insertBefore(InsertPos);
26252624
Addr = ReversePtr;
26262625
}
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
2+
; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true %s | FileCheck %s
3+
4+
%struct.i32.pair = type { i32, i32 }
5+
6+
define void @nusw_preservation(ptr noalias %A, ptr %B) {
7+
; CHECK-LABEL: define void @nusw_preservation(
8+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
11+
; CHECK: [[VECTOR_PH]]:
12+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13+
; CHECK: [[VECTOR_BODY]]:
14+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
16+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
17+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0
18+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[TMP0]], i64 0
19+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 -6
20+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4
21+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
22+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
23+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
24+
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
25+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]]
26+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]]
27+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0
28+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw i32, ptr [[TMP5]], i64 0
29+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr nusw i32, ptr [[TMP6]], i64 -6
30+
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
31+
; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
32+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
33+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
34+
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
35+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
36+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
37+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
38+
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39+
; CHECK: [[MIDDLE_BLOCK]]:
40+
; CHECK-NEXT: br [[EXIT:label %.*]]
41+
; CHECK: [[SCALAR_PH]]:
42+
;
43+
entry:
44+
br label %loop
45+
46+
loop:
47+
%iv = phi i64 [ 1023, %entry ], [ %iv.next, %loop ]
48+
%x = getelementptr nusw %struct.i32.pair, ptr %A, i64 %iv, i32 0
49+
%load.x = load i32, ptr %x, align 4
50+
%trunc = trunc i64 %iv to i32
51+
%add = add nsw i32 %load.x, %trunc
52+
%y = getelementptr nusw %struct.i32.pair, ptr %A, i64 %iv, i32 1
53+
%load.y = load i32, ptr %y, align 4
54+
%sub = sub nsw i32 %load.y, %trunc
55+
%gep.B.iv.0 = getelementptr nusw %struct.i32.pair, ptr %B, i64 %iv, i32 0
56+
store i32 %add, ptr %gep.B.iv.0, align 4
57+
%gep.B.iv.1 = getelementptr nusw %struct.i32.pair, ptr %B, i64 %iv, i32 1
58+
store i32 %sub, ptr %gep.B.iv.1, align 4
59+
%iv.next = add nsw i64 %iv, -1
60+
%exit.cond = icmp sgt i64 %iv, 0
61+
br i1 %exit.cond, label %loop, label %exit
62+
63+
exit:
64+
ret void
65+
}
66+
67+
define void @inbounds_preservation(ptr noalias %A, ptr %B) {
68+
; CHECK-LABEL: define void @inbounds_preservation(
69+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]]) {
70+
; CHECK-NEXT: [[ENTRY:.*:]]
71+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
72+
; CHECK: [[VECTOR_PH]]:
73+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
74+
; CHECK: [[VECTOR_BODY]]:
75+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
76+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
77+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
78+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0
79+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
80+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -6
81+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4
82+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
83+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
84+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
85+
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
86+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]]
87+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]]
88+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0
89+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0
90+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -6
91+
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
92+
; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
93+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
94+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
95+
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
96+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
97+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
98+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
99+
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
100+
; CHECK: [[MIDDLE_BLOCK]]:
101+
; CHECK-NEXT: br [[EXIT:label %.*]]
102+
; CHECK: [[SCALAR_PH]]:
103+
;
104+
entry:
105+
br label %loop
106+
107+
loop:
108+
%iv = phi i64 [ 1023, %entry ], [ %iv.next, %loop ]
109+
%x = getelementptr inbounds %struct.i32.pair, ptr %A, i64 %iv, i32 0
110+
%load.x = load i32, ptr %x, align 4
111+
%trunc = trunc i64 %iv to i32
112+
%add = add nsw i32 %load.x, %trunc
113+
%y = getelementptr inbounds %struct.i32.pair, ptr %A, i64 %iv, i32 1
114+
%load.y = load i32, ptr %y, align 4
115+
%sub = sub nsw i32 %load.y, %trunc
116+
%gep.B.iv.0 = getelementptr inbounds %struct.i32.pair, ptr %B, i64 %iv, i32 0
117+
store i32 %add, ptr %gep.B.iv.0, align 4
118+
%gep.B.iv.1 = getelementptr inbounds %struct.i32.pair, ptr %B, i64 %iv, i32 1
119+
store i32 %sub, ptr %gep.B.iv.1, align 4
120+
%iv.next = add nsw i64 %iv, -1
121+
%exit.cond = icmp sgt i64 %iv, 0
122+
br i1 %exit.cond, label %loop, label %exit
123+
124+
exit:
125+
ret void
126+
}
127+
128+
define void @nuw_drop(ptr noalias %A, ptr %B) {
129+
; CHECK-LABEL: define void @nuw_drop(
130+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]]) {
131+
; CHECK-NEXT: [[ENTRY:.*:]]
132+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
133+
; CHECK: [[VECTOR_PH]]:
134+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
135+
; CHECK: [[VECTOR_BODY]]:
136+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
137+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
138+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
139+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0
140+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 0
141+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 -6
142+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4
143+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
144+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
145+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
146+
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
147+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]]
148+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]]
149+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0
150+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0
151+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 -6
152+
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
153+
; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
154+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
155+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
156+
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
157+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
158+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
159+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
160+
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
161+
; CHECK: [[MIDDLE_BLOCK]]:
162+
; CHECK-NEXT: br [[EXIT:label %.*]]
163+
; CHECK: [[SCALAR_PH]]:
164+
;
165+
entry:
166+
br label %loop
167+
168+
loop:
169+
%iv = phi i64 [ 1023, %entry ], [ %iv.next, %loop ]
170+
%x = getelementptr nuw %struct.i32.pair, ptr %A, i64 %iv, i32 0
171+
%load.x = load i32, ptr %x, align 4
172+
%trunc = trunc i64 %iv to i32
173+
%add = add nsw i32 %load.x, %trunc
174+
%y = getelementptr nuw %struct.i32.pair, ptr %A, i64 %iv, i32 1
175+
%load.y = load i32, ptr %y, align 4
176+
%sub = sub nsw i32 %load.y, %trunc
177+
%gep.B.iv.0 = getelementptr nuw %struct.i32.pair, ptr %B, i64 %iv, i32 0
178+
store i32 %add, ptr %gep.B.iv.0, align 4
179+
%gep.B.iv.1 = getelementptr nuw %struct.i32.pair, ptr %B, i64 %iv, i32 1
180+
store i32 %sub, ptr %gep.B.iv.1, align 4
181+
%iv.next = add nsw i64 %iv, -1
182+
%exit.cond = icmp sgt i64 %iv, 0
183+
br i1 %exit.cond, label %loop, label %exit
184+
185+
exit:
186+
ret void
187+
}

0 commit comments

Comments
 (0)