Skip to content

Commit 97f5542

Browse files
authored
[VPlan] Preserve nusw in createInBoundsPtrAdd (#151549)
Rename createInBoundsPtrAdd to createNoWrapPtrAdd, and preserve nusw as well as inbounds at the callsite.
1 parent 1b60236 commit 97f5542

File tree

4 files changed

+158
-10
lines changed

4 files changed

+158
-10
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,15 @@ class VPBuilder {
256256
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
257257
GEPNoWrapFlags::none(), DL, Name));
258258
}
259-
VPInstruction *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset,
260-
DebugLoc DL = DebugLoc::getUnknown(),
261-
const Twine &Name = "") {
262-
return tryInsertInstruction(
263-
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
264-
GEPNoWrapFlags::inBounds(), DL, Name));
259+
260+
VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset,
261+
GEPNoWrapFlags GEPFlags,
262+
DebugLoc DL = DebugLoc::getUnknown(),
263+
const Twine &Name = "") {
264+
return tryInsertInstruction(new VPInstruction(
265+
VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, DL, Name));
265266
}
267+
266268
VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
267269
DebugLoc DL = DebugLoc::getUnknown(),
268270
const Twine &Name = "") {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2615,9 +2615,7 @@ void VPlanTransforms::createInterleaveGroups(
26152615
VPValue *OffsetVPV =
26162616
Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
26172617
VPBuilder B(InsertPos);
2618-
Addr = NW.isInBounds()
2619-
? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
2620-
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
2618+
Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
26212619
}
26222620
// If the group is reverse, adjust the index to refer to the last vector
26232621
// lane instead of the first. We adjust the index from the first vector

llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ define void @test_ig_insert_pos_at_end_of_vpbb(ptr noalias %dst, ptr noalias %sr
8686
; CHECK: [[VECTOR_BODY]]:
8787
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
8888
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nusw { i16, i16, i16, i16 }, ptr [[SRC]], i64 [[TMP3]], i32 2
89-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 -4
89+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw i8, ptr [[TMP4]], i32 -4
9090
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2
9191
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
9292
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>

llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,151 @@ loop:
185185
exit:
186186
ret void
187187
}
188+
189+
define void @nusw_preservation_2(ptr %src, ptr noalias %dst) {
190+
; CHECK-LABEL: define void @nusw_preservation_2(
191+
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
192+
; CHECK-NEXT: [[ENTRY:.*:]]
193+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
194+
; CHECK: [[VECTOR_PH]]:
195+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
196+
; CHECK: [[VECTOR_BODY]]:
197+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
198+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
199+
; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
200+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[SRC]], i64 [[TMP0]]
201+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i32 -1
202+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
203+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
204+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
205+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC1]], [[STRIDED_VEC]]
206+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nusw i8, ptr [[DST]], i64 [[INDEX]]
207+
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP4]], align 1
208+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
209+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
210+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
211+
; CHECK: [[MIDDLE_BLOCK]]:
212+
; CHECK-NEXT: br [[EXIT:label %.*]]
213+
; CHECK: [[SCALAR_PH]]:
214+
;
215+
entry:
216+
br label %loop
217+
218+
loop: ; preds = %loop, %entry
219+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
220+
%iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ]
221+
%or.1 = or disjoint i64 %iv2, 1
222+
%gep.src.or.1 = getelementptr nusw i8, ptr %src, i64 %or.1
223+
%load.src.1 = load i8, ptr %gep.src.or.1, align 1
224+
%gep.src.iv2 = getelementptr nusw i8, ptr %src, i64 %iv2
225+
%load.src.2 = load i8, ptr %gep.src.iv2, align 1
226+
%add = add i8 %load.src.1, %load.src.2
227+
%gep.dst.iv = getelementptr nusw i8, ptr %dst, i64 %iv
228+
store i8 %add, ptr %gep.dst.iv, align 1
229+
%iv2.next = add i64 %iv2, 2
230+
%iv.next = add i64 %iv, 1
231+
%exit.cond = icmp eq i64 %iv.next, 100
232+
br i1 %exit.cond, label %exit, label %loop
233+
234+
exit:
235+
ret void
236+
}
237+
238+
define void @inbounds_preservation_2(ptr %src, ptr noalias %dst) {
239+
; CHECK-LABEL: define void @inbounds_preservation_2(
240+
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
241+
; CHECK-NEXT: [[ENTRY:.*:]]
242+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
243+
; CHECK: [[VECTOR_PH]]:
244+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
245+
; CHECK: [[VECTOR_BODY]]:
246+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
247+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
248+
; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
249+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP0]]
250+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -1
251+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
252+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
253+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
254+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC1]], [[STRIDED_VEC]]
255+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
256+
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP4]], align 1
257+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
258+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
259+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
260+
; CHECK: [[MIDDLE_BLOCK]]:
261+
; CHECK-NEXT: br [[EXIT:label %.*]]
262+
; CHECK: [[SCALAR_PH]]:
263+
;
264+
entry:
265+
br label %loop
266+
267+
loop: ; preds = %loop, %entry
268+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
269+
%iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ]
270+
%or.1 = or disjoint i64 %iv2, 1
271+
%gep.src.or.1 = getelementptr inbounds i8, ptr %src, i64 %or.1
272+
%load.src.1 = load i8, ptr %gep.src.or.1, align 1
273+
%gep.src.iv2 = getelementptr inbounds i8, ptr %src, i64 %iv2
274+
%load.src.2 = load i8, ptr %gep.src.iv2, align 1
275+
%add = add i8 %load.src.1, %load.src.2
276+
%gep.dst.iv = getelementptr inbounds i8, ptr %dst, i64 %iv
277+
store i8 %add, ptr %gep.dst.iv, align 1
278+
%iv2.next = add i64 %iv2, 2
279+
%iv.next = add i64 %iv, 1
280+
%exit.cond = icmp eq i64 %iv.next, 100
281+
br i1 %exit.cond, label %exit, label %loop
282+
283+
exit:
284+
ret void
285+
}
286+
287+
define void @nuw_drop_2(ptr %src, ptr noalias %dst) {
288+
; CHECK-LABEL: define void @nuw_drop_2(
289+
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
290+
; CHECK-NEXT: [[ENTRY:.*:]]
291+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
292+
; CHECK: [[VECTOR_PH]]:
293+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
294+
; CHECK: [[VECTOR_BODY]]:
295+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
296+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
297+
; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
298+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[SRC]], i64 [[TMP0]]
299+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -1
300+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
301+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
302+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
303+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC1]], [[STRIDED_VEC]]
304+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nuw i8, ptr [[DST]], i64 [[INDEX]]
305+
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP4]], align 1
306+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
307+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
308+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
309+
; CHECK: [[MIDDLE_BLOCK]]:
310+
; CHECK-NEXT: br [[EXIT:label %.*]]
311+
; CHECK: [[SCALAR_PH]]:
312+
;
313+
entry:
314+
br label %loop
315+
316+
loop: ; preds = %loop, %entry
317+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
318+
%iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ]
319+
%or.1 = or disjoint i64 %iv2, 1
320+
%gep.src.or.1 = getelementptr nuw i8, ptr %src, i64 %or.1
321+
%load.src.1 = load i8, ptr %gep.src.or.1, align 1
322+
%gep.src.iv2 = getelementptr nuw i8, ptr %src, i64 %iv2
323+
%load.src.2 = load i8, ptr %gep.src.iv2, align 1
324+
%add = add i8 %load.src.1, %load.src.2
325+
%gep.dst.iv = getelementptr nuw i8, ptr %dst, i64 %iv
326+
store i8 %add, ptr %gep.dst.iv, align 1
327+
%iv2.next = add i64 %iv2, 2
328+
%iv.next = add i64 %iv, 1
329+
%exit.cond = icmp eq i64 %iv.next, 100
330+
br i1 %exit.cond, label %exit, label %loop
331+
332+
exit:
333+
ret void
334+
}
335+

0 commit comments

Comments
 (0)