Skip to content

Commit 8bbcb5d

Browse files
[LoopVersioningLICM] Only mark pointers with generated checks as noalias
Currently when we version a loop all loads and stores have the noalias metadata added to them. If there were some pointers that could not be analyzed, and thus we could not generate runtime aliasing checks for, then we should not mark loads and stores using these pointers as noalias. Currently this does nothing, as LoopAccessAnalysis discards all results if it couldn't analyse every pointer leading to no loop ersioning happening, but an upcoming patch will change that and we need this first otherwise we incorrectly mark some pointers as noalias even when they aren't.
1 parent e3350a6 commit 8bbcb5d

File tree

2 files changed

+327
-10
lines changed

2 files changed

+327
-10
lines changed

llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,13 @@ bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
344344
}
345345
LoadAndStoreCounter++;
346346
Value *Ptr = St->getPointerOperand();
347+
// Don't allow stores that we don't have runtime checks for, as we won't be
348+
// able to mark them noalias meaning they would prevent any code motion.
349+
auto &Pointers = LAI->getRuntimePointerChecking()->Pointers;
350+
if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) {
351+
LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n");
352+
return false;
353+
}
347354
// Check loop invariant.
348355
if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
349356
InvariantCounter++;
@@ -361,6 +368,13 @@ bool LoopVersioningLICM::legalLoopInstructions() {
361368
InvariantCounter = 0;
362369
IsReadOnlyLoop = true;
363370
using namespace ore;
371+
// Get LoopAccessInfo from current loop via the proxy.
372+
LAI = &LAIs.getInfo(*CurLoop);
373+
// Check LoopAccessInfo for need of runtime check.
374+
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
375+
LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
376+
return false;
377+
}
364378
// Iterate over loop blocks and instructions of each block and check
365379
// instruction safety.
366380
for (auto *Block : CurLoop->getBlocks())
@@ -374,13 +388,6 @@ bool LoopVersioningLICM::legalLoopInstructions() {
374388
return false;
375389
}
376390
}
377-
// Get LoopAccessInfo from current loop via the proxy.
378-
LAI = &LAIs.getInfo(*CurLoop);
379-
// Check LoopAccessInfo for need of runtime check.
380-
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
381-
LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
382-
return false;
383-
}
384391
// Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
385392
if (LAI->getNumRuntimePointerChecks() >
386393
VectorizerParams::RuntimeMemoryCheckThreshold) {
@@ -515,13 +522,16 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
515522
StringRef Name = "LVAliasScope";
516523
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
517524
SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope};
525+
auto &Pointers = LAI->getRuntimePointerChecking()->Pointers;
526+
518527
// Iterate over each instruction of loop.
519528
// set no-alias for all load & store instructions.
520529
for (auto *Block : CurLoop->getBlocks()) {
521530
for (auto &Inst : *Block) {
522-
// Only interested in instruction that may modify or read memory.
523-
if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory())
524-
continue;
531+
// We can only add noalias to pointers that we've inserted checks for
532+
Value *V = getLoadStorePointerOperand(&Inst);
533+
if (!V || !any_of(Pointers, [&](auto &P) { return P.PointerValue == V; }))
534+
continue;
525535
// Set no-alias for current instruction.
526536
Inst.setMetadata(
527537
LLVMContext::MD_noalias,
Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt < %s -S -passes='function(loop-versioning-licm,loop-mssa(licm))' | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
6+
; In these tests we have a loop where we can calculate the bounds of some memory
7+
; accesses but not others.
8+
9+
; Load from a gep whose bounds can't be calculated as the offset is loaded from memory
10+
; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval
11+
define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) {
12+
; CHECK-LABEL: define void @gep_loaded_offset(
13+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) {
14+
; CHECK-NEXT: [[ENTRY:.*]]:
15+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
16+
; CHECK: [[WHILE_BODY]]:
17+
; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
18+
; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ]
19+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1
20+
; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4
21+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]]
22+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
23+
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4
24+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4
25+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
26+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
27+
; CHECK: [[WHILE_END]]:
28+
; CHECK-NEXT: ret void
29+
;
30+
entry:
31+
br label %while.body
32+
33+
while.body:
34+
%n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ]
35+
%p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
36+
%dec = add nsw i32 %n.addr, -1
37+
%rval = load i64, ptr %r, align 4
38+
%arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval
39+
%val = load i32, ptr %arrayidx, align 4
40+
%incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4
41+
store i32 %val, ptr %p.addr, align 4
42+
%tobool.not = icmp eq i32 %dec, 0
43+
br i1 %tobool.not, label %while.end, label %while.body
44+
45+
while.end:
46+
ret void
47+
}
48+
49+
; As above but with a store to the loaded address. This should prevent the loop
50+
; from being versioned, as we wouldn't be able to do any code motion.
51+
define void @gep_loaded_offset_with_store(ptr %p, ptr %q, ptr %r, i32 %n) {
52+
; CHECK-LABEL: define void @gep_loaded_offset_with_store(
53+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) {
54+
; CHECK-NEXT: [[ENTRY:.*]]:
55+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
56+
; CHECK: [[WHILE_BODY]]:
57+
; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
58+
; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ]
59+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1
60+
; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4
61+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]]
62+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
63+
; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
64+
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4
65+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4
66+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
67+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
68+
; CHECK: [[WHILE_END]]:
69+
; CHECK-NEXT: ret void
70+
;
71+
entry:
72+
br label %while.body
73+
74+
while.body:
75+
%n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ]
76+
%p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
77+
%dec = add nsw i32 %n.addr, -1
78+
%rval = load i64, ptr %r, align 4
79+
%arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval
80+
%val = load i32, ptr %arrayidx, align 4
81+
store i32 0, ptr %arrayidx, align 4
82+
%incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4
83+
store i32 %val, ptr %p.addr, align 4
84+
%tobool.not = icmp eq i32 %dec, 0
85+
br i1 %tobool.not, label %while.end, label %while.body
86+
87+
while.end:
88+
ret void
89+
}
90+
91+
; Load from a gep whose bounds can't be calculated as the pointer is loaded from memory
92+
; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval
93+
define void @gep_loaded_base(ptr %p, ptr %q, ptr %r, i32 %n) {
94+
; CHECK-LABEL: define void @gep_loaded_base(
95+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) {
96+
; CHECK-NEXT: [[ENTRY:.*]]:
97+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
98+
; CHECK: [[WHILE_BODY]]:
99+
; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
100+
; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ]
101+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1
102+
; CHECK-NEXT: [[RVAL:%.*]] = load ptr, ptr [[R]], align 4
103+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[RVAL]], i64 0
104+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
105+
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4
106+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4
107+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
108+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
109+
; CHECK: [[WHILE_END]]:
110+
; CHECK-NEXT: ret void
111+
;
112+
entry:
113+
br label %while.body
114+
115+
while.body:
116+
%n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ]
117+
%p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
118+
%dec = add nsw i32 %n.addr, -1
119+
%rval = load ptr, ptr %r, align 4
120+
%arrayidx = getelementptr inbounds i32, ptr %rval, i64 0
121+
%val = load i32, ptr %arrayidx, align 4
122+
%incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4
123+
store i32 %val, ptr %p.addr, align 4
124+
%tobool.not = icmp eq i32 %dec, 0
125+
br i1 %tobool.not, label %while.end, label %while.body
126+
127+
while.end:
128+
ret void
129+
}
130+
131+
; Load from a gep with an offset that scalar evolution can't describe
132+
; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of qval
133+
define void @gep_strange_offset(ptr %p, ptr %q, ptr %r, i32 %n) {
134+
; CHECK-LABEL: define void @gep_strange_offset(
135+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) {
136+
; CHECK-NEXT: [[ENTRY:.*]]:
137+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
138+
; CHECK: [[WHILE_BODY]]:
139+
; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
140+
; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ]
141+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1
142+
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[Q]], align 4
143+
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[DEC]], 2
144+
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[REM]] to i64
145+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[IDXPROM]]
146+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
147+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VAL]], [[QVAL]]
148+
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4
149+
; CHECK-NEXT: store i32 [[ADD]], ptr [[P_ADDR]], align 4
150+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
151+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
152+
; CHECK: [[WHILE_END]]:
153+
; CHECK-NEXT: ret void
154+
;
155+
entry:
156+
br label %while.body
157+
158+
while.body:
159+
%n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ]
160+
%p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
161+
%dec = add nsw i32 %n.addr, -1
162+
%qval = load i32, ptr %q, align 4
163+
%rem = srem i32 %dec, 2
164+
%idxprom = sext i32 %rem to i64
165+
%arrayidx = getelementptr inbounds i32, ptr %r, i64 %idxprom
166+
%val = load i32, ptr %arrayidx, align 4
167+
%add = add nsw i32 %val, %qval
168+
%incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4
169+
store i32 %add, ptr %p.addr, align 4
170+
%tobool.not = icmp eq i32 %dec, 0
171+
br i1 %tobool.not, label %while.end, label %while.body
172+
173+
while.end:
174+
ret void
175+
}
176+
177+
; A memcpy-like loop where the source address is loaded from a pointer
178+
; FIXME: We should be able to hoist the load of the source address pointer
179+
define void @memcpy_load_src(ptr %dst, ptr %src, i32 %n) {
180+
; CHECK-LABEL: define void @memcpy_load_src(
181+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) {
182+
; CHECK-NEXT: [[ENTRY:.*]]:
183+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
184+
; CHECK: [[WHILE_BODY]]:
185+
; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
186+
; CHECK-NEXT: [[DST_VAL:%.*]] = phi ptr [ [[DST_VAL_NEXT:%.*]], %[[WHILE_BODY]] ], [ [[DST]], %[[ENTRY]] ]
187+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1
188+
; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8
189+
; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1
190+
; CHECK-NEXT: [[DST_VAL_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST_VAL]], i64 1
191+
; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8
192+
; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1
193+
; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1
194+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
195+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
196+
; CHECK: [[WHILE_END]]:
197+
; CHECK-NEXT: ret void
198+
;
199+
entry:
200+
br label %while.body
201+
202+
while.body:
203+
%n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ]
204+
%dst_val = phi ptr [ %dst_val.next, %while.body ], [ %dst, %entry ]
205+
%dec = add nsw i32 %n_val, -1
206+
%src_val = load ptr, ptr %src, align 8
207+
%src_val.next = getelementptr inbounds nuw i8, ptr %src_val, i64 1
208+
%dst_val.next = getelementptr inbounds nuw i8, ptr %dst_val, i64 1
209+
store ptr %src_val.next, ptr %src, align 8
210+
%val = load i8, ptr %src_val, align 1
211+
store i8 %val, ptr %dst_val, align 1
212+
%tobool.not = icmp eq i32 %dec, 0
213+
br i1 %tobool.not, label %while.end, label %while.body
214+
215+
while.end:
216+
ret void
217+
}
218+
219+
; A memcpy-like loop where the destination address is loaded from a pointer
220+
; FIXME: We could hoist the load of the destination address, but doing the
221+
; bounds check of the store through that pointer itself requires using the
222+
; hoisted load.
223+
define void @memcpy_load_dst(ptr %dst, ptr %src, i32 %n) {
224+
; CHECK-LABEL: define void @memcpy_load_dst(
225+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) {
226+
; CHECK-NEXT: [[ENTRY:.*]]:
227+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
228+
; CHECK: [[WHILE_BODY]]:
229+
; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
230+
; CHECK-NEXT: [[SRC_VAL:%.*]] = phi ptr [ [[SRC_VAL_NEXT:%.*]], %[[WHILE_BODY]] ], [ [[SRC]], %[[ENTRY]] ]
231+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1
232+
; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8
233+
; CHECK-NEXT: [[SRC_VAL_NEXT]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1
234+
; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[DST_VAL]], i64 1
235+
; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8
236+
; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1
237+
; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1
238+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
239+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
240+
; CHECK: [[WHILE_END]]:
241+
; CHECK-NEXT: ret void
242+
;
243+
entry:
244+
br label %while.body
245+
246+
while.body:
247+
%n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ]
248+
%src_val = phi ptr [ %src_val.next, %while.body ], [ %src, %entry ]
249+
%dec = add nsw i32 %n_val, -1
250+
%dst_val = load ptr, ptr %dst, align 8
251+
%src_val.next = getelementptr inbounds nuw i8, ptr %src_val, i64 1
252+
%dst_val.next = getelementptr inbounds nuw i8, ptr %dst_val, i64 1
253+
store ptr %dst_val.next, ptr %dst, align 8
254+
%val = load i8, ptr %src_val, align 1
255+
store i8 %val, ptr %dst_val, align 1
256+
%tobool.not = icmp eq i32 %dec, 0
257+
br i1 %tobool.not, label %while.end, label %while.body
258+
259+
while.end:
260+
ret void
261+
}
262+
263+
; A memcpy-like loop where both the source and destination pointers are loaded from pointers
264+
; FIXME: We could hoist the loads of both addresses, but doing the bounds check
265+
; of the store through the destination address itself requires using the hoisted
266+
; load.
267+
define void @memcpy_load_src_dst(ptr %dst, ptr %src, i32 %n) {
268+
; CHECK-LABEL: define void @memcpy_load_src_dst(
269+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) {
270+
; CHECK-NEXT: [[ENTRY:.*]]:
271+
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
272+
; CHECK: [[WHILE_BODY]]:
273+
; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ]
274+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1
275+
; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8
276+
; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8
277+
; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1
278+
; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[DST_VAL]], i64 1
279+
; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8
280+
; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8
281+
; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1
282+
; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1
283+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
284+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]]
285+
; CHECK: [[WHILE_END]]:
286+
; CHECK-NEXT: ret void
287+
;
288+
entry:
289+
br label %while.body
290+
291+
while.body:
292+
%n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ]
293+
%dec = add nsw i32 %n_val, -1
294+
%src_val = load ptr, ptr %src, align 8
295+
%dst_val = load ptr, ptr %dst, align 8
296+
%src_val.next = getelementptr inbounds nuw i8, ptr %src_val, i64 1
297+
%dst_val.next = getelementptr inbounds nuw i8, ptr %dst_val, i64 1
298+
store ptr %src_val.next, ptr %src, align 8
299+
store ptr %dst_val.next, ptr %dst, align 8
300+
%val = load i8, ptr %src_val, align 1
301+
store i8 %val, ptr %dst_val, align 1
302+
%tobool.not = icmp eq i32 %dec, 0
303+
br i1 %tobool.not, label %while.end, label %while.body
304+
305+
while.end:
306+
ret void
307+
}

0 commit comments

Comments
 (0)