11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
33
4+ define i32 @lower_lshr (<4 x i32 > %a , <4 x i32 > %b , <4 x i32 > %c , <4 x i32 > %d , <4 x i32 > %e , <4 x i32 > %f , <4 x i32 > %g , <4 x i32 > %h ) {
5+ ; CHECK-LABEL: lower_lshr:
6+ ; CHECK: // %bb.0:
7+ ; CHECK-NEXT: addv s0, v0.4s
8+ ; CHECK-NEXT: addv s1, v1.4s
9+ ; CHECK-NEXT: addv s4, v4.4s
10+ ; CHECK-NEXT: addv s5, v5.4s
11+ ; CHECK-NEXT: addv s2, v2.4s
12+ ; CHECK-NEXT: addv s6, v6.4s
13+ ; CHECK-NEXT: mov v0.s[1], v1.s[0]
14+ ; CHECK-NEXT: addv s1, v3.4s
15+ ; CHECK-NEXT: addv s3, v7.4s
16+ ; CHECK-NEXT: mov v4.s[1], v5.s[0]
17+ ; CHECK-NEXT: mov v0.s[2], v2.s[0]
18+ ; CHECK-NEXT: mov v4.s[2], v6.s[0]
19+ ; CHECK-NEXT: mov v0.s[3], v1.s[0]
20+ ; CHECK-NEXT: mov v4.s[3], v3.s[0]
21+ ; CHECK-NEXT: xtn v1.4h, v0.4s
22+ ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
23+ ; CHECK-NEXT: xtn v2.4h, v4.4s
24+ ; CHECK-NEXT: shrn v3.4h, v4.4s, #16
25+ ; CHECK-NEXT: uhadd v0.4h, v1.4h, v0.4h
26+ ; CHECK-NEXT: uhadd v1.4h, v2.4h, v3.4h
27+ ; CHECK-NEXT: mov v0.d[1], v1.d[0]
28+ ; CHECK-NEXT: uaddlv s0, v0.8h
29+ ; CHECK-NEXT: fmov w0, s0
30+ ; CHECK-NEXT: ret
31+ %l87 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %a )
32+ %l174 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %b )
33+ %l257 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %c )
34+ %l340 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %d )
35+ %l427 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %e )
36+ %l514 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %f )
37+ %l597 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %g )
38+ %l680 = tail call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %h )
39+ %l681 = insertelement <8 x i32 > poison, i32 %l87 , i32 0
40+ %l682 = insertelement <8 x i32 > %l681 , i32 %l174 , i32 1
41+ %l683 = insertelement <8 x i32 > %l682 , i32 %l257 , i32 2
42+ %l684 = insertelement <8 x i32 > %l683 , i32 %l340 , i32 3
43+ %l685 = insertelement <8 x i32 > %l684 , i32 %l427 , i32 4
44+ %l686 = insertelement <8 x i32 > %l685 , i32 %l514 , i32 5
45+ %l687 = insertelement <8 x i32 > %l686 , i32 %l597 , i32 6
46+ %l688 = insertelement <8 x i32 > %l687 , i32 %l680 , i32 7
47+ %l689 = and <8 x i32 > %l688 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 , i32 65535 , i32 65535 , i32 65535 , i32 65535 >
48+ %l690 = lshr <8 x i32 > %l688 , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
49+ %l691 = add nuw nsw <8 x i32 > %l689 , %l690
50+ %l692 = lshr <8 x i32 > %l691 , <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
51+ %l693 = call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %l692 )
52+ ret i32 %l693
53+ }
54+ declare i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 >)
55+ declare i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 >)
56+
457define <16 x i8 > @lower_trunc_16xi8 (i16 %a , i16 %b , i16 %c , i16 %d , i16 %e , i16 %f , i16 %g , i16 %h , i16 %i , i16 %j , i16 %k , i16 %l , i16 %m , i16 %n , i16 %o , i16 %p ) {
558; CHECK-LABEL: lower_trunc_16xi8:
659; CHECK: // %bb.0:
760; CHECK-NEXT: fmov s0, w0
8- ; CHECK-NEXT: ldr h1, [sp]
9- ; CHECK-NEXT: add x8, sp, #8
10- ; CHECK-NEXT: ld1 { v1.h }[1], [x8]
11- ; CHECK-NEXT: add x8, sp, #16
61+ ; CHECK-NEXT: add x8, sp, #56
62+ ; CHECK-NEXT: ld1r { v1.8h }, [x8]
1263; CHECK-NEXT: mov v0.h[1], w1
13- ; CHECK-NEXT: ld1 { v1.h }[2], [x8]
14- ; CHECK-NEXT: add x8, sp, #24
64+ ; CHECK-NEXT: add v3.8h, v1.8h, v1.8h
1565; CHECK-NEXT: mov v0.h[2], w2
16- ; CHECK-NEXT: ld1 { v1.h }[3], [x8]
17- ; CHECK-NEXT: add x8, sp, #32
1866; CHECK-NEXT: mov v0.h[3], w3
19- ; CHECK-NEXT: ld1 { v1.h }[4], [x8]
20- ; CHECK-NEXT: add x8, sp, #40
21- ; CHECK-NEXT: ld1 { v1.h }[5], [x8]
22- ; CHECK-NEXT: add x8, sp, #48
2367; CHECK-NEXT: mov v0.h[4], w4
24- ; CHECK-NEXT: ld1 { v1.h }[6], [x8]
25- ; CHECK-NEXT: add x8, sp, #56
2668; CHECK-NEXT: mov v0.h[5], w5
27- ; CHECK-NEXT: ld1 { v1.h }[7], [x8]
2869; CHECK-NEXT: mov v0.h[6], w6
29- ; CHECK-NEXT: add v2.8h, v1.8h, v1.8h
30- ; CHECK-NEXT: mov v0.h[7], w7
31- ; CHECK-NEXT: add v3.8h, v0.8h, v0.8h
70+ ; CHECK-NEXT: add v2.8h, v0.8h, v0.8h
3271; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
33- ; CHECK-NEXT: uzp1 v1.16b, v3 .16b, v2 .16b
72+ ; CHECK-NEXT: uzp1 v1.16b, v2 .16b, v3 .16b
3473; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
3574; CHECK-NEXT: ret
3675 %a1 = insertelement <16 x i16 > poison, i16 %a , i16 0
@@ -41,14 +80,14 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
4180 %f1 = insertelement <16 x i16 > %e1 , i16 %f , i16 5
4281 %g1 = insertelement <16 x i16 > %f1 , i16 %g , i16 6
4382 %h1 = insertelement <16 x i16 > %g1 , i16 %h , i16 7
44- %i1 = insertelement <16 x i16 > %h1 , i16 %i , i16 8
45- %j1 = insertelement <16 x i16 > %i1 , i16 %j , i16 9
46- %k1 = insertelement <16 x i16 > %j1 , i16 %k , i16 10
47- %l1 = insertelement <16 x i16 > %k1 , i16 %l , i16 11
48- %m1 = insertelement <16 x i16 > %l1 , i16 %m , i16 12
49- %n1 = insertelement <16 x i16 > %m1 , i16 %n , i16 13
50- %o1 = insertelement <16 x i16 > %n1 , i16 %o , i16 14
51- %p1 = insertelement <16 x i16 > %o1 , i16 %p , i16 15
83+ %i1 = insertelement <16 x i16 > %f1 , i16 %i , i16 8
84+ %j1 = insertelement <16 x i16 > %g1 , i16 %j , i16 9
85+ %k1 = insertelement <16 x i16 > %f1 , i16 %k , i16 10
86+ %l1 = insertelement <16 x i16 > %g1 , i16 %l , i16 11
87+ %m1 = insertelement <16 x i16 > %f1 , i16 %m , i16 12
88+ %n1 = insertelement <16 x i16 > %g1 , i16 %n , i16 13
89+ %o1 = insertelement <16 x i16 > %f1 , i16 %o , i16 14
90+ %p1 = insertelement <16 x i16 > %g1 , i16 %p , i16 15
5291 %t = trunc <16 x i16 > %p1 to <16 x i8 >
5392 %s = add <16 x i16 > %p1 , %p1
5493 %t2 = trunc <16 x i16 > %s to <16 x i8 >
0 commit comments