@@ -98,6 +98,23 @@ entry:
98
98
ret void
99
99
}
100
100
101
+ define void @vld2_v4i32_align1 (<8 x i32 > *%src , <4 x i32 > *%dst ) {
102
+ ; CHECK-LABEL: vld2_v4i32_align1:
103
+ ; CHECK: @ %bb.0: @ %entry
104
+ ; CHECK-NEXT: vld20.32 {q0, q1}, [r0]
105
+ ; CHECK-NEXT: vld21.32 {q0, q1}, [r0]
106
+ ; CHECK-NEXT: vadd.i32 q0, q0, q1
107
+ ; CHECK-NEXT: vstrw.32 q0, [r1]
108
+ ; CHECK-NEXT: bx lr
109
+ entry:
110
+ %l1 = load <8 x i32 >, <8 x i32 >* %src , align 1
111
+ %s1 = shufflevector <8 x i32 > %l1 , <8 x i32 > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
112
+ %s2 = shufflevector <8 x i32 > %l1 , <8 x i32 > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
113
+ %a = add <4 x i32 > %s1 , %s2
114
+ store <4 x i32 > %a , <4 x i32 > *%dst
115
+ ret void
116
+ }
117
+
101
118
; i16
102
119
103
120
define void @vld2_v2i16 (<4 x i16 > *%src , <2 x i16 > *%dst ) {
@@ -115,7 +132,7 @@ define void @vld2_v2i16(<4 x i16> *%src, <2 x i16> *%dst) {
115
132
; CHECK-NEXT: strh r0, [r1]
116
133
; CHECK-NEXT: bx lr
117
134
entry:
118
- %l1 = load <4 x i16 >, <4 x i16 >* %src , align 4
135
+ %l1 = load <4 x i16 >, <4 x i16 >* %src , align 2
119
136
%s1 = shufflevector <4 x i16 > %l1 , <4 x i16 > undef , <2 x i32 > <i32 0 , i32 2 >
120
137
%s2 = shufflevector <4 x i16 > %l1 , <4 x i16 > undef , <2 x i32 > <i32 1 , i32 3 >
121
138
%a = add <2 x i16 > %s1 , %s2
@@ -126,13 +143,13 @@ entry:
126
143
define void @vld2_v4i16 (<8 x i16 > *%src , <4 x i16 > *%dst ) {
127
144
; CHECK-LABEL: vld2_v4i16:
128
145
; CHECK: @ %bb.0: @ %entry
129
- ; CHECK-NEXT: vldrw.u32 q0, [r0]
146
+ ; CHECK-NEXT: vldrh.u16 q0, [r0]
130
147
; CHECK-NEXT: vrev32.16 q1, q0
131
148
; CHECK-NEXT: vadd.i32 q0, q0, q1
132
149
; CHECK-NEXT: vstrh.32 q0, [r1]
133
150
; CHECK-NEXT: bx lr
134
151
entry:
135
- %l1 = load <8 x i16 >, <8 x i16 >* %src , align 4
152
+ %l1 = load <8 x i16 >, <8 x i16 >* %src , align 2
136
153
%s1 = shufflevector <8 x i16 > %l1 , <8 x i16 > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
137
154
%s2 = shufflevector <8 x i16 > %l1 , <8 x i16 > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
138
155
%a = add <4 x i16 > %s1 , %s2
@@ -149,7 +166,7 @@ define void @vld2_v8i16(<16 x i16> *%src, <8 x i16> *%dst) {
149
166
; CHECK-NEXT: vstrw.32 q0, [r1]
150
167
; CHECK-NEXT: bx lr
151
168
entry:
152
- %l1 = load <16 x i16 >, <16 x i16 >* %src , align 4
169
+ %l1 = load <16 x i16 >, <16 x i16 >* %src , align 2
153
170
%s1 = shufflevector <16 x i16 > %l1 , <16 x i16 > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
154
171
%s2 = shufflevector <16 x i16 > %l1 , <16 x i16 > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
155
172
%a = add <8 x i16 > %s1 , %s2
@@ -170,14 +187,31 @@ define void @vld2_v16i16(<32 x i16> *%src, <16 x i16> *%dst) {
170
187
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
171
188
; CHECK-NEXT: bx lr
172
189
entry:
173
- %l1 = load <32 x i16 >, <32 x i16 >* %src , align 4
190
+ %l1 = load <32 x i16 >, <32 x i16 >* %src , align 2
174
191
%s1 = shufflevector <32 x i16 > %l1 , <32 x i16 > undef , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
175
192
%s2 = shufflevector <32 x i16 > %l1 , <32 x i16 > undef , <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
176
193
%a = add <16 x i16 > %s1 , %s2
177
194
store <16 x i16 > %a , <16 x i16 > *%dst
178
195
ret void
179
196
}
180
197
198
+ define void @vld2_v8i16_align1 (<16 x i16 > *%src , <8 x i16 > *%dst ) {
199
+ ; CHECK-LABEL: vld2_v8i16_align1:
200
+ ; CHECK: @ %bb.0: @ %entry
201
+ ; CHECK-NEXT: vld20.16 {q0, q1}, [r0]
202
+ ; CHECK-NEXT: vld21.16 {q0, q1}, [r0]
203
+ ; CHECK-NEXT: vadd.i16 q0, q0, q1
204
+ ; CHECK-NEXT: vstrw.32 q0, [r1]
205
+ ; CHECK-NEXT: bx lr
206
+ entry:
207
+ %l1 = load <16 x i16 >, <16 x i16 >* %src , align 1
208
+ %s1 = shufflevector <16 x i16 > %l1 , <16 x i16 > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
209
+ %s2 = shufflevector <16 x i16 > %l1 , <16 x i16 > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
210
+ %a = add <8 x i16 > %s1 , %s2
211
+ store <8 x i16 > %a , <8 x i16 > *%dst
212
+ ret void
213
+ }
214
+
181
215
; i8
182
216
183
217
define void @vld2_v2i8 (<4 x i8 > *%src , <2 x i8 > *%dst ) {
@@ -195,7 +229,7 @@ define void @vld2_v2i8(<4 x i8> *%src, <2 x i8> *%dst) {
195
229
; CHECK-NEXT: strb r0, [r1]
196
230
; CHECK-NEXT: bx lr
197
231
entry:
198
- %l1 = load <4 x i8 >, <4 x i8 >* %src , align 4
232
+ %l1 = load <4 x i8 >, <4 x i8 >* %src , align 1
199
233
%s1 = shufflevector <4 x i8 > %l1 , <4 x i8 > undef , <2 x i32 > <i32 0 , i32 2 >
200
234
%s2 = shufflevector <4 x i8 > %l1 , <4 x i8 > undef , <2 x i32 > <i32 1 , i32 3 >
201
235
%a = add <2 x i8 > %s1 , %s2
@@ -212,7 +246,7 @@ define void @vld2_v4i8(<8 x i8> *%src, <4 x i8> *%dst) {
212
246
; CHECK-NEXT: vstrb.32 q0, [r1]
213
247
; CHECK-NEXT: bx lr
214
248
entry:
215
- %l1 = load <8 x i8 >, <8 x i8 >* %src , align 4
249
+ %l1 = load <8 x i8 >, <8 x i8 >* %src , align 1
216
250
%s1 = shufflevector <8 x i8 > %l1 , <8 x i8 > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
217
251
%s2 = shufflevector <8 x i8 > %l1 , <8 x i8 > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
218
252
%a = add <4 x i8 > %s1 , %s2
@@ -223,13 +257,13 @@ entry:
223
257
define void @vld2_v8i8 (<16 x i8 > *%src , <8 x i8 > *%dst ) {
224
258
; CHECK-LABEL: vld2_v8i8:
225
259
; CHECK: @ %bb.0: @ %entry
226
- ; CHECK-NEXT: vldrw.u32 q0, [r0]
260
+ ; CHECK-NEXT: vldrb.u8 q0, [r0]
227
261
; CHECK-NEXT: vrev16.8 q1, q0
228
262
; CHECK-NEXT: vadd.i16 q0, q0, q1
229
263
; CHECK-NEXT: vstrb.16 q0, [r1]
230
264
; CHECK-NEXT: bx lr
231
265
entry:
232
- %l1 = load <16 x i8 >, <16 x i8 >* %src , align 4
266
+ %l1 = load <16 x i8 >, <16 x i8 >* %src , align 1
233
267
%s1 = shufflevector <16 x i8 > %l1 , <16 x i8 > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
234
268
%s2 = shufflevector <16 x i8 > %l1 , <16 x i8 > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
235
269
%a = add <8 x i8 > %s1 , %s2
@@ -246,7 +280,7 @@ define void @vld2_v16i8(<32 x i8> *%src, <16 x i8> *%dst) {
246
280
; CHECK-NEXT: vstrw.32 q0, [r1]
247
281
; CHECK-NEXT: bx lr
248
282
entry:
249
- %l1 = load <32 x i8 >, <32 x i8 >* %src , align 4
283
+ %l1 = load <32 x i8 >, <32 x i8 >* %src , align 1
250
284
%s1 = shufflevector <32 x i8 > %l1 , <32 x i8 > undef , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
251
285
%s2 = shufflevector <32 x i8 > %l1 , <32 x i8 > undef , <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
252
286
%a = add <16 x i8 > %s1 , %s2
@@ -286,7 +320,7 @@ define void @vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
286
320
; CHECK-NEXT: vstrw.32 q0, [r1]
287
321
; CHECK-NEXT: pop {r4, pc}
288
322
entry:
289
- %l1 = load <4 x i64 >, <4 x i64 >* %src , align 4
323
+ %l1 = load <4 x i64 >, <4 x i64 >* %src , align 8
290
324
%s1 = shufflevector <4 x i64 > %l1 , <4 x i64 > undef , <2 x i32 > <i32 0 , i32 2 >
291
325
%s2 = shufflevector <4 x i64 > %l1 , <4 x i64 > undef , <2 x i32 > <i32 1 , i32 3 >
292
326
%a = add <2 x i64 > %s1 , %s2
@@ -350,7 +384,7 @@ define void @vld2_v4i64(<8 x i64> *%src, <4 x i64> *%dst) {
350
384
; CHECK-NEXT: vpop {d8, d9, d10, d11}
351
385
; CHECK-NEXT: pop {r4, r5, r6, pc}
352
386
entry:
353
- %l1 = load <8 x i64 >, <8 x i64 >* %src , align 4
387
+ %l1 = load <8 x i64 >, <8 x i64 >* %src , align 8
354
388
%s1 = shufflevector <8 x i64 > %l1 , <8 x i64 > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
355
389
%s2 = shufflevector <8 x i64 > %l1 , <8 x i64 > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
356
390
%a = add <4 x i64 > %s1 , %s2
@@ -452,12 +486,30 @@ entry:
452
486
ret void
453
487
}
454
488
489
+ define void @vld2_v4f32_align1 (<8 x float > *%src , <4 x float > *%dst ) {
490
+ ; CHECK-LABEL: vld2_v4f32_align1:
491
+ ; CHECK: @ %bb.0: @ %entry
492
+ ; CHECK-NEXT: vld20.32 {q0, q1}, [r0]
493
+ ; CHECK-NEXT: vld21.32 {q0, q1}, [r0]
494
+ ; CHECK-NEXT: vadd.f32 q0, q0, q1
495
+ ; CHECK-NEXT: vstrw.32 q0, [r1]
496
+ ; CHECK-NEXT: bx lr
497
+ entry:
498
+ %l1 = load <8 x float >, <8 x float >* %src , align 1
499
+ %s1 = shufflevector <8 x float > %l1 , <8 x float > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
500
+ %s2 = shufflevector <8 x float > %l1 , <8 x float > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
501
+ %a = fadd <4 x float > %s1 , %s2
502
+ store <4 x float > %a , <4 x float > *%dst
503
+ ret void
504
+ }
505
+
455
506
; f16
456
507
457
508
define void @vld2_v2f16 (<4 x half > *%src , <2 x half > *%dst ) {
458
509
; CHECK-LABEL: vld2_v2f16:
459
510
; CHECK: @ %bb.0: @ %entry
460
- ; CHECK-NEXT: ldrd r2, r0, [r0]
511
+ ; CHECK-NEXT: ldr r2, [r0]
512
+ ; CHECK-NEXT: ldr r0, [r0, #4]
461
513
; CHECK-NEXT: vmov.32 q0[0], r2
462
514
; CHECK-NEXT: vmov.32 q0[1], r0
463
515
; CHECK-NEXT: vmovx.f16 s4, s1
@@ -475,7 +527,7 @@ define void @vld2_v2f16(<4 x half> *%src, <2 x half> *%dst) {
475
527
; CHECK-NEXT: str r0, [r1]
476
528
; CHECK-NEXT: bx lr
477
529
entry:
478
- %l1 = load <4 x half >, <4 x half >* %src , align 4
530
+ %l1 = load <4 x half >, <4 x half >* %src , align 2
479
531
%s1 = shufflevector <4 x half > %l1 , <4 x half > undef , <2 x i32 > <i32 0 , i32 2 >
480
532
%s2 = shufflevector <4 x half > %l1 , <4 x half > undef , <2 x i32 > <i32 1 , i32 3 >
481
533
%a = fadd <2 x half > %s1 , %s2
@@ -486,7 +538,7 @@ entry:
486
538
define void @vld2_v4f16 (<8 x half > *%src , <4 x half > *%dst ) {
487
539
; CHECK-LABEL: vld2_v4f16:
488
540
; CHECK: @ %bb.0: @ %entry
489
- ; CHECK-NEXT: vldrw.u32 q0, [r0]
541
+ ; CHECK-NEXT: vldrh.u16 q0, [r0]
490
542
; CHECK-NEXT: vmov r2, s0
491
543
; CHECK-NEXT: vmovx.f16 s8, s0
492
544
; CHECK-NEXT: vmov r0, s1
@@ -513,7 +565,7 @@ define void @vld2_v4f16(<8 x half> *%src, <4 x half> *%dst) {
513
565
; CHECK-NEXT: strd r0, r2, [r1]
514
566
; CHECK-NEXT: bx lr
515
567
entry:
516
- %l1 = load <8 x half >, <8 x half >* %src , align 4
568
+ %l1 = load <8 x half >, <8 x half >* %src , align 2
517
569
%s1 = shufflevector <8 x half > %l1 , <8 x half > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
518
570
%s2 = shufflevector <8 x half > %l1 , <8 x half > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
519
571
%a = fadd <4 x half > %s1 , %s2
@@ -530,7 +582,7 @@ define void @vld2_v8f16(<16 x half> *%src, <8 x half> *%dst) {
530
582
; CHECK-NEXT: vstrw.32 q0, [r1]
531
583
; CHECK-NEXT: bx lr
532
584
entry:
533
- %l1 = load <16 x half >, <16 x half >* %src , align 4
585
+ %l1 = load <16 x half >, <16 x half >* %src , align 2
534
586
%s1 = shufflevector <16 x half > %l1 , <16 x half > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
535
587
%s2 = shufflevector <16 x half > %l1 , <16 x half > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
536
588
%a = fadd <8 x half > %s1 , %s2
@@ -551,14 +603,31 @@ define void @vld2_v16f16(<32 x half> *%src, <16 x half> *%dst) {
551
603
; CHECK-NEXT: vstrw.32 q2, [r1, #16]
552
604
; CHECK-NEXT: bx lr
553
605
entry:
554
- %l1 = load <32 x half >, <32 x half >* %src , align 4
606
+ %l1 = load <32 x half >, <32 x half >* %src , align 2
555
607
%s1 = shufflevector <32 x half > %l1 , <32 x half > undef , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
556
608
%s2 = shufflevector <32 x half > %l1 , <32 x half > undef , <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
557
609
%a = fadd <16 x half > %s1 , %s2
558
610
store <16 x half > %a , <16 x half > *%dst
559
611
ret void
560
612
}
561
613
614
+ define void @vld2_v8f16_align1 (<16 x half > *%src , <8 x half > *%dst ) {
615
+ ; CHECK-LABEL: vld2_v8f16_align1:
616
+ ; CHECK: @ %bb.0: @ %entry
617
+ ; CHECK-NEXT: vld20.16 {q0, q1}, [r0]
618
+ ; CHECK-NEXT: vld21.16 {q0, q1}, [r0]
619
+ ; CHECK-NEXT: vadd.f16 q0, q0, q1
620
+ ; CHECK-NEXT: vstrw.32 q0, [r1]
621
+ ; CHECK-NEXT: bx lr
622
+ entry:
623
+ %l1 = load <16 x half >, <16 x half >* %src , align 1
624
+ %s1 = shufflevector <16 x half > %l1 , <16 x half > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
625
+ %s2 = shufflevector <16 x half > %l1 , <16 x half > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
626
+ %a = fadd <8 x half > %s1 , %s2
627
+ store <8 x half > %a , <8 x half > *%dst
628
+ ret void
629
+ }
630
+
562
631
; f64
563
632
564
633
define void @vld2_v2f64 (<4 x double > *%src , <2 x double > *%dst ) {
@@ -571,7 +640,7 @@ define void @vld2_v2f64(<4 x double> *%src, <2 x double> *%dst) {
571
640
; CHECK-NEXT: vstrw.32 q0, [r1]
572
641
; CHECK-NEXT: bx lr
573
642
entry:
574
- %l1 = load <4 x double >, <4 x double >* %src , align 4
643
+ %l1 = load <4 x double >, <4 x double >* %src , align 8
575
644
%s1 = shufflevector <4 x double > %l1 , <4 x double > undef , <2 x i32 > <i32 0 , i32 2 >
576
645
%s2 = shufflevector <4 x double > %l1 , <4 x double > undef , <2 x i32 > <i32 1 , i32 3 >
577
646
%a = fadd <2 x double > %s1 , %s2
@@ -594,7 +663,7 @@ define void @vld2_v4f64(<8 x double> *%src, <4 x double> *%dst) {
594
663
; CHECK-NEXT: vstrw.32 q1, [r1]
595
664
; CHECK-NEXT: bx lr
596
665
entry:
597
- %l1 = load <8 x double >, <8 x double >* %src , align 4
666
+ %l1 = load <8 x double >, <8 x double >* %src , align 8
598
667
%s1 = shufflevector <8 x double > %l1 , <8 x double > undef , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
599
668
%s2 = shufflevector <8 x double > %l1 , <8 x double > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
600
669
%a = fadd <4 x double > %s1 , %s2
0 commit comments