You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
definevoid@load_factor2(i32*%ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2) {
5
+
; SVE-LABEL: load_factor2:
6
+
; SVE: // %bb.0:
7
+
; SVE-NEXT: ptrue p0.s
8
+
; SVE-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0]
9
+
; SVE-NEXT: str z0, [x1]
10
+
; SVE-NEXT: str z1, [x2]
11
+
; SVE-NEXT: ret
12
+
%wide.vec = load <vscale x 8 x i32>, ptr%ptr, align8
13
+
%ldN = tailcall { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.vec)
14
+
15
+
%3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
16
+
%4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
17
+
18
+
store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
19
+
store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
20
+
retvoid
21
+
}
22
+
23
+
definevoid@load_factor3(i32*%ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2, <vscale x 4 x i32>* %s3) {
24
+
; SVE-LABEL: load_factor3:
25
+
; SVE: // %bb.0:
26
+
; SVE-NEXT: ptrue p0.s
27
+
; SVE-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0]
28
+
; SVE-NEXT: str z0, [x1]
29
+
; SVE-NEXT: str z1, [x2]
30
+
; SVE-NEXT: str z2, [x3]
31
+
; SVE-NEXT: ret
32
+
%wide.vec = load <vscale x 12 x i32>, ptr%ptr, align8
33
+
%ldN = tailcall { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32> %wide.vec)
34
+
35
+
%3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
36
+
%4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
37
+
%5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 2
38
+
39
+
store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
40
+
store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
41
+
store <vscale x 4 x i32> %5, <vscale x 4 x i32>* %s3
42
+
retvoid
43
+
}
44
+
45
+
definevoid@load_factor4(i32*%ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2, <vscale x 4 x i32>* %s3, <vscale x 4 x i32>* %s4) {
46
+
; SVE-LABEL: load_factor4:
47
+
; SVE: // %bb.0:
48
+
; SVE-NEXT: ptrue p0.s
49
+
; SVE-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0]
50
+
; SVE-NEXT: str z0, [x1]
51
+
; SVE-NEXT: str z1, [x2]
52
+
; SVE-NEXT: str z2, [x3]
53
+
; SVE-NEXT: str z3, [x4]
54
+
; SVE-NEXT: ret
55
+
%wide.vec = load <vscale x 16 x i32>, ptr%ptr, align8
56
+
%ldN = tailcall { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %wide.vec)
57
+
58
+
%3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
59
+
%4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
60
+
%5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 2
61
+
%6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 3
62
+
63
+
store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
64
+
store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
65
+
store <vscale x 4 x i32> %5, <vscale x 4 x i32>* %s3
66
+
store <vscale x 4 x i32> %6, <vscale x 4 x i32>* %s4
67
+
retvoid
68
+
}
69
+
70
+
71
+
declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
72
+
declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32>)
73
+
declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32>)
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 3 x i64>, ptr [[PTR]], align 8
296
+
; CHECK-NEXT: [[LDN:%.*]] = tail call { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave3.nxv3i64(<vscale x 3 x i64> [[WIDE_VEC]])
297
+
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } [[LDN]], 0
298
+
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } [[LDN]], 1
299
+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } [[LDN]], 2
300
+
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP1]], ptr [[S1]], align 8
301
+
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP2]], ptr [[S2]], align 8
302
+
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP3]], ptr [[S3]], align 8
303
+
; CHECK-NEXT: ret void
304
+
;
305
+
%wide.vec = load <vscale x 3 x i64>, ptr%ptr, align8
306
+
%ldN = tailcall { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave3.nxv3i64(<vscale x 3 x i64> %wide.vec)
307
+
308
+
%3 = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } %ldN, 0
309
+
%4 = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } %ldN, 1
310
+
%5 = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } %ldN, 2
311
+
312
+
store <vscale x 1 x i64> %3, <vscale x 1 x i64>* %s1
313
+
store <vscale x 1 x i64> %4, <vscale x 1 x i64>* %s2
314
+
store <vscale x 1 x i64> %5, <vscale x 1 x i64>* %s3
315
+
retvoid
316
+
}
317
+
318
+
definevoid@deinterleave_nxi32_factor3(i32*%ptr, <vscale x 2 x i32>* %s1, <vscale x 2 x i32>* %s2, <vscale x 2 x i32>* %s3) {
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 6 x i32>, ptr [[PTR]], align 8
322
+
; CHECK-NEXT: [[LDN:%.*]] = tail call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv6i32(<vscale x 6 x i32> [[WIDE_VEC]])
323
+
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[LDN]], 0
324
+
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[LDN]], 1
325
+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[LDN]], 2
326
+
; CHECK-NEXT: store <vscale x 2 x i32> [[TMP1]], ptr [[S1]], align 8
327
+
; CHECK-NEXT: store <vscale x 2 x i32> [[TMP2]], ptr [[S2]], align 8
328
+
; CHECK-NEXT: store <vscale x 2 x i32> [[TMP3]], ptr [[S3]], align 8
329
+
; CHECK-NEXT: ret void
330
+
;
331
+
%wide.vec = load <vscale x 6 x i32>, ptr%ptr, align8
332
+
%ldN = tailcall { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv3i32(<vscale x 6 x i32> %wide.vec)
333
+
334
+
%3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %ldN, 0
335
+
%4 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %ldN, 1
336
+
%5 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %ldN, 2
337
+
338
+
store <vscale x 2 x i32> %3, <vscale x 2 x i32>* %s1
339
+
store <vscale x 2 x i32> %4, <vscale x 2 x i32>* %s2
340
+
store <vscale x 2 x i32> %5, <vscale x 2 x i32>* %s3
341
+
retvoid
342
+
}
343
+
344
+
definevoid@deinterleave_nxi16_factor3(i32*%ptr, <vscale x 4 x i16>* %s1, <vscale x 4 x i16>* %s2, <vscale x 4 x i16>* %s3) {
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 12 x i16>, ptr [[PTR]], align 8
348
+
; CHECK-NEXT: [[LDN:%.*]] = tail call { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.vector.deinterleave3.nxv12i16(<vscale x 12 x i16> [[WIDE_VEC]])
349
+
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } [[LDN]], 0
350
+
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } [[LDN]], 1
351
+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } [[LDN]], 2
352
+
; CHECK-NEXT: store <vscale x 4 x i16> [[TMP1]], ptr [[S1]], align 8
353
+
; CHECK-NEXT: store <vscale x 4 x i16> [[TMP2]], ptr [[S2]], align 8
354
+
; CHECK-NEXT: store <vscale x 4 x i16> [[TMP3]], ptr [[S3]], align 8
355
+
; CHECK-NEXT: ret void
356
+
;
357
+
%wide.vec = load <vscale x 12 x i16>, ptr%ptr, align8
358
+
%ldN = tailcall { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.vector.deinterleave3.nxv3i16(<vscale x 12 x i16> %wide.vec)
359
+
360
+
%3 = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } %ldN, 0
361
+
%4 = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } %ldN, 1
362
+
%5 = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } %ldN, 2
363
+
364
+
store <vscale x 4 x i16> %3, <vscale x 4 x i16>* %s1
365
+
store <vscale x 4 x i16> %4, <vscale x 4 x i16>* %s2
366
+
store <vscale x 4 x i16> %5, <vscale x 4 x i16>* %s3
367
+
retvoid
368
+
}
369
+
370
+
definevoid@deinterleave_nxi8_factor3(i32*%ptr, <vscale x 8 x i8>* %s1, <vscale x 8 x i8>* %s2, <vscale x 8 x i8>* %s3) {
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 24 x i8>, ptr [[PTR]], align 8
374
+
; CHECK-NEXT: [[LDN:%.*]] = tail call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave3.nxv24i8(<vscale x 24 x i8> [[WIDE_VEC]])
375
+
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } [[LDN]], 0
376
+
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } [[LDN]], 1
377
+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } [[LDN]], 2
378
+
; CHECK-NEXT: store <vscale x 8 x i8> [[TMP1]], ptr [[S1]], align 8
379
+
; CHECK-NEXT: store <vscale x 8 x i8> [[TMP2]], ptr [[S2]], align 8
380
+
; CHECK-NEXT: store <vscale x 8 x i8> [[TMP3]], ptr [[S3]], align 8
381
+
; CHECK-NEXT: ret void
382
+
;
383
+
%wide.vec = load <vscale x 24 x i8>, ptr%ptr, align8
384
+
%ldN = tailcall { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave3.nxv3i8(<vscale x 24 x i8> %wide.vec)
385
+
386
+
%3 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %ldN, 0
387
+
%4 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %ldN, 1
388
+
%5 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %ldN, 2
389
+
390
+
store <vscale x 8 x i8> %3, <vscale x 8 x i8>* %s1
391
+
store <vscale x 8 x i8> %4, <vscale x 8 x i8>* %s2
392
+
store <vscale x 8 x i8> %5, <vscale x 8 x i8>* %s3
393
+
retvoid
394
+
}
395
+
292
396
declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
293
397
declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
294
398
declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
@@ -312,4 +416,10 @@ declare <vscale x 4 x ptr> @llvm.vector.interleave2.nxv4p0(<vscale x 2 x ptr>, <
312
416
; Larger interleaves to test 'legalization'
313
417
declare <vscale x 8 x double> @llvm.vector.interleave2.nxv8f64(<vscale x 4 x double>, <vscale x 4 x double>)
314
418
419
+
; Interleaves with Factor=3
420
+
declare { <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.vector.deinterleave3.nxv3i64(<vscale x 3 x i64>)
421
+
declare { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv3i32(<vscale x 6 x i32>)
422
+
declare { <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.vector.deinterleave3.nxv3i16(<vscale x 12 x i16>)
423
+
declare { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave3.nxv3i8(<vscale x 24 x i8>)
0 commit comments