Skip to content

Commit 929f7b8

Browse files
badermahesh-attarde
authored andcommitted
[LV] Add additional tests for replicating load/store costs.
Includes test for llvm#161404
1 parent 2d3c0f1 commit 929f7b8

File tree

2 files changed

+270
-0
lines changed

2 files changed

+270
-0
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,45 @@ exit:
616616
ret double %red.next
617617
}
618618

619+
define i32 @test_ptr_iv_load_used_by_other_load(ptr %start, ptr %end) {
620+
; CHECK-LABEL: define i32 @test_ptr_iv_load_used_by_other_load(
621+
; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) {
622+
; CHECK-NEXT: [[ENTRY:.*]]:
623+
; CHECK-NEXT: br label %[[LOOP:.*]]
624+
; CHECK: [[LOOP]]:
625+
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ null, %[[ENTRY]] ]
626+
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[RED_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
627+
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IV]], align 8
628+
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 8
629+
; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[TMP1]], 0
630+
; CHECK-NEXT: [[C_EXT:%.*]] = zext i1 [[C]] to i32
631+
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[RED]], [[C_EXT]]
632+
; CHECK-NEXT: [[IV_NEXT]] = getelementptr nusw i8, ptr [[IV]], i64 32
633+
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV]], [[END]]
634+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
635+
; CHECK: [[EXIT]]:
636+
; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i32 [ [[RED]], %[[LOOP]] ]
637+
; CHECK-NEXT: ret i32 [[RED_LCSSA]]
638+
;
639+
entry:
640+
br label %loop
641+
642+
loop: ; preds = %loop, %entry
643+
%iv = phi ptr [ %iv.next, %loop ], [ null, %entry ]
644+
%red = phi i32 [ %red.next, %loop ], [ 0, %entry ]
645+
%0 = load ptr, ptr %iv, align 8
646+
%1 = load i8, ptr %0, align 8
647+
%c = icmp ne i8 %1, 0
648+
%c.ext = zext i1 %c to i32
649+
%red.next = or i32 %red, %c.ext
650+
%iv.next = getelementptr nusw i8, ptr %iv, i64 32
651+
%ec = icmp eq ptr %iv, %end
652+
br i1 %ec, label %exit, label %loop
653+
654+
exit:
655+
ret i32 %red
656+
}
657+
619658
attributes #0 = { "target-cpu"="neoverse-512tvb" }
620659

621660
!0 = !{!1, !2, i64 0}
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
2+
; RUN: opt -p loop-vectorize -mtriple=x86_64-linux-gnu -S %s | FileCheck --check-prefix=I64 %s
3+
; RUN: opt -p loop-vectorize -mtriple=i386-pc-linux-gnu -S %s | FileCheck --check-prefix=I32 %s
4+
5+
6+
define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
7+
; I64-LABEL: define void @test_store_initially_interleave(
8+
; I64-SAME: i32 [[N:%.*]], ptr noalias [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; I64-NEXT: [[ENTRY:.*]]:
10+
; I64-NEXT: br label %[[LOOP:.*]]
11+
; I64: [[LOOP]]:
12+
; I64-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[LOOP]] ]
13+
; I64-NEXT: [[CONV:%.*]] = uitofp i32 [[IV]] to double
14+
; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]]
15+
; I64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADD_PTR_I]], align 4
16+
; I64-NEXT: store double [[CONV]], ptr [[TMP0]], align 4
17+
; I64-NEXT: [[INC]] = add i32 [[IV]], 1
18+
; I64-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]]
19+
; I64-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
20+
; I64: [[EXIT]]:
21+
; I64-NEXT: ret void
22+
;
23+
; I32-LABEL: define void @test_store_initially_interleave(
24+
; I32-SAME: i32 [[N:%.*]], ptr noalias [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
25+
; I32-NEXT: [[ENTRY:.*:]]
26+
; I32-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
27+
; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 8
28+
; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
29+
; I32: [[VECTOR_PH]]:
30+
; I32-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8
31+
; I32-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
32+
; I32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 8, i32 [[N_MOD_VF]]
33+
; I32-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
34+
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
35+
; I32: [[VECTOR_BODY]]:
36+
; I32-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
37+
; I32-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
38+
; I32-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
39+
; I32-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
40+
; I32-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
41+
; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
42+
; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1
43+
; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2
44+
; I32-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3
45+
; I32-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 4
46+
; I32-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 5
47+
; I32-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 6
48+
; I32-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 7
49+
; I32-NEXT: [[TMP11:%.*]] = uitofp <2 x i32> [[VEC_IND]] to <2 x double>
50+
; I32-NEXT: [[TMP12:%.*]] = uitofp <2 x i32> [[STEP_ADD]] to <2 x double>
51+
; I32-NEXT: [[TMP13:%.*]] = uitofp <2 x i32> [[STEP_ADD_2]] to <2 x double>
52+
; I32-NEXT: [[TMP14:%.*]] = uitofp <2 x i32> [[STEP_ADD_3]] to <2 x double>
53+
; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]]
54+
; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]]
55+
; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
56+
; I32-NEXT: [[TMP18:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
57+
; I32-NEXT: [[TMP19:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP7]]
58+
; I32-NEXT: [[TMP20:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP8]]
59+
; I32-NEXT: [[TMP21:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP9]]
60+
; I32-NEXT: [[TMP22:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP10]]
61+
; I32-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP15]], align 4
62+
; I32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP16]], align 4
63+
; I32-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP17]], align 4
64+
; I32-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP18]], align 4
65+
; I32-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP19]], align 4
66+
; I32-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP20]], align 4
67+
; I32-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP21]], align 4
68+
; I32-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP22]], align 4
69+
; I32-NEXT: [[TMP31:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
70+
; I32-NEXT: store double [[TMP31]], ptr [[TMP23]], align 4
71+
; I32-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
72+
; I32-NEXT: store double [[TMP32]], ptr [[TMP24]], align 4
73+
; I32-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
74+
; I32-NEXT: store double [[TMP33]], ptr [[TMP25]], align 4
75+
; I32-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
76+
; I32-NEXT: store double [[TMP34]], ptr [[TMP26]], align 4
77+
; I32-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP13]], i32 0
78+
; I32-NEXT: store double [[TMP35]], ptr [[TMP27]], align 4
79+
; I32-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[TMP13]], i32 1
80+
; I32-NEXT: store double [[TMP36]], ptr [[TMP28]], align 4
81+
; I32-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[TMP14]], i32 0
82+
; I32-NEXT: store double [[TMP37]], ptr [[TMP29]], align 4
83+
; I32-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[TMP14]], i32 1
84+
; I32-NEXT: store double [[TMP38]], ptr [[TMP30]], align 4
85+
; I32-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
86+
; I32-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD_3]], splat (i32 2)
87+
; I32-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
88+
; I32-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
89+
; I32: [[MIDDLE_BLOCK]]:
90+
; I32-NEXT: br label %[[SCALAR_PH]]
91+
; I32: [[SCALAR_PH]]:
92+
;
93+
entry:
94+
br label %loop
95+
96+
loop:
97+
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
98+
%conv = uitofp i32 %iv to double
99+
%add.ptr.i = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 %iv
100+
%0 = load ptr, ptr %add.ptr.i, align 4
101+
store double %conv, ptr %0, align 4
102+
%inc = add i32 %iv, 1
103+
%ec = icmp eq i32 %iv, %n
104+
br i1 %ec, label %exit, label %loop
105+
106+
exit: ; preds = %loop
107+
ret void
108+
}
109+
110+
define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) #0 {
111+
; I64-LABEL: define void @test_store_loaded_value(
112+
; I64-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
113+
; I64-NEXT: [[BB:.*:]]
114+
; I64-NEXT: [[PRE:%.*]] = icmp slt i32 [[N]], 1
115+
; I64-NEXT: br i1 [[PRE]], [[EXIT:label %.*]], label %[[PH:.*]]
116+
; I64: [[PH]]:
117+
; I64-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
118+
; I64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N_EXT]], 4
119+
; I64-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
120+
; I64: [[VECTOR_PH]]:
121+
; I64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_EXT]], 4
122+
; I64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_EXT]], [[N_MOD_VF]]
123+
; I64-NEXT: br label %[[VECTOR_BODY:.*]]
124+
; I64: [[VECTOR_BODY]]:
125+
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
126+
; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
127+
; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
128+
; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
129+
; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
130+
; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
131+
; I64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
132+
; I64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
133+
; I64-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
134+
; I64-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8
135+
; I64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
136+
; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
137+
; I64-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
138+
; I64-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
139+
; I64-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
140+
; I64-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
141+
; I64-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
142+
; I64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
143+
; I64-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
144+
; I64-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
145+
; I64-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
146+
; I64-NEXT: store double [[TMP8]], ptr [[TMP16]], align 8
147+
; I64-NEXT: store double [[TMP9]], ptr [[TMP17]], align 8
148+
; I64-NEXT: store double [[TMP10]], ptr [[TMP18]], align 8
149+
; I64-NEXT: store double [[TMP11]], ptr [[TMP19]], align 8
150+
; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
151+
; I64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
152+
; I64-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
153+
; I64: [[MIDDLE_BLOCK]]:
154+
; I64-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_EXT]], [[N_VEC]]
155+
; I64-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
156+
; I64: [[SCALAR_PH]]:
157+
;
158+
; I32-LABEL: define void @test_store_loaded_value(
159+
; I32-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
160+
; I32-NEXT: [[BB:.*:]]
161+
; I32-NEXT: [[PRE:%.*]] = icmp slt i32 [[N]], 1
162+
; I32-NEXT: br i1 [[PRE]], [[EXIT:label %.*]], label %[[PH:.*]]
163+
; I32: [[PH]]:
164+
; I32-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
165+
; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N_EXT]], 4
166+
; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
167+
; I32: [[VECTOR_PH]]:
168+
; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_EXT]], 4
169+
; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_EXT]], [[N_MOD_VF]]
170+
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
171+
; I32: [[VECTOR_BODY]]:
172+
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
173+
; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
174+
; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
175+
; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
176+
; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
177+
; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
178+
; I32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
179+
; I32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
180+
; I32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
181+
; I32-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8
182+
; I32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
183+
; I32-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
184+
; I32-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
185+
; I32-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
186+
; I32-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
187+
; I32-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
188+
; I32-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
189+
; I32-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
190+
; I32-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
191+
; I32-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
192+
; I32-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
193+
; I32-NEXT: store double [[TMP8]], ptr [[TMP16]], align 8
194+
; I32-NEXT: store double [[TMP9]], ptr [[TMP17]], align 8
195+
; I32-NEXT: store double [[TMP10]], ptr [[TMP18]], align 8
196+
; I32-NEXT: store double [[TMP11]], ptr [[TMP19]], align 8
197+
; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
198+
; I32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
199+
; I32-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
200+
; I32: [[MIDDLE_BLOCK]]:
201+
; I32-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_EXT]], [[N_VEC]]
202+
; I32-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
203+
; I32: [[SCALAR_PH]]:
204+
;
205+
bb:
206+
%pre = icmp slt i32 %n, 1
207+
br i1 %pre, label %exit, label %ph
208+
209+
ph:
210+
%n.ext = zext i32 %n to i64
211+
br label %loop
212+
213+
loop:
214+
%iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
215+
%iv.next = add i64 %iv, 1
216+
%gep.src = getelementptr i8, ptr %src, i64 %iv
217+
%l = load double, ptr %gep.src, align 8
218+
%sext = shl i64 %iv, 1
219+
%gep.dst = getelementptr i8, ptr %dst, i64 %sext
220+
store double %l, ptr %gep.dst, align 8
221+
%ec = icmp eq i64 %iv.next, %n.ext
222+
br i1 %ec, label %exit, label %loop, !llvm.loop !0
223+
224+
exit:
225+
ret void
226+
}
227+
228+
attributes #0 = { "target-cpu"="znver2" }
229+
230+
!0 = distinct !{!0, !1}
231+
!1 = !{!"llvm.loop.vectorize.enable", i1 true}

0 commit comments

Comments
 (0)