@@ -10,11 +10,14 @@ target triple = "aarch64-unknown-linux-gnu"
1010define half @fadda_v4f16 (half %start , <4 x half > %a ) {
1111; CHECK-LABEL: fadda_v4f16:
1212; CHECK: // %bb.0:
13- ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
14- ; CHECK-NEXT: ptrue p0.h, vl4
1513; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
16- ; CHECK-NEXT: fadda h0, p0, h0, z1.h
17- ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
14+ ; CHECK-NEXT: fadd h0, h0, h1
15+ ; CHECK-NEXT: mov z2.h, z1.h[1]
16+ ; CHECK-NEXT: fadd h0, h0, h2
17+ ; CHECK-NEXT: mov z2.h, z1.h[2]
18+ ; CHECK-NEXT: mov z1.h, z1.h[3]
19+ ; CHECK-NEXT: fadd h0, h0, h2
20+ ; CHECK-NEXT: fadd h0, h0, h1
1821; CHECK-NEXT: ret
1922 %res = call half @llvm.vector.reduce.fadd.v4f16 (half %start , <4 x half > %a )
2023 ret half %res
@@ -23,11 +26,22 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
2326define half @fadda_v8f16 (half %start , <8 x half > %a ) {
2427; CHECK-LABEL: fadda_v8f16:
2528; CHECK: // %bb.0:
26- ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
27- ; CHECK-NEXT: ptrue p0.h, vl8
2829; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
29- ; CHECK-NEXT: fadda h0, p0, h0, z1.h
30- ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
30+ ; CHECK-NEXT: fadd h0, h0, h1
31+ ; CHECK-NEXT: mov z2.h, z1.h[1]
32+ ; CHECK-NEXT: fadd h0, h0, h2
33+ ; CHECK-NEXT: mov z2.h, z1.h[2]
34+ ; CHECK-NEXT: fadd h0, h0, h2
35+ ; CHECK-NEXT: mov z2.h, z1.h[3]
36+ ; CHECK-NEXT: fadd h0, h0, h2
37+ ; CHECK-NEXT: mov z2.h, z1.h[4]
38+ ; CHECK-NEXT: fadd h0, h0, h2
39+ ; CHECK-NEXT: mov z2.h, z1.h[5]
40+ ; CHECK-NEXT: fadd h0, h0, h2
41+ ; CHECK-NEXT: mov z2.h, z1.h[6]
42+ ; CHECK-NEXT: mov z1.h, z1.h[7]
43+ ; CHECK-NEXT: fadd h0, h0, h2
44+ ; CHECK-NEXT: fadd h0, h0, h1
3145; CHECK-NEXT: ret
3246 %res = call half @llvm.vector.reduce.fadd.v8f16 (half %start , <8 x half > %a )
3347 ret half %res
@@ -36,12 +50,38 @@ define half @fadda_v8f16(half %start, <8 x half> %a) {
3650define half @fadda_v16f16 (half %start , ptr %a ) {
3751; CHECK-LABEL: fadda_v16f16:
3852; CHECK: // %bb.0:
39- ; CHECK-NEXT: ldp q1, q2, [x0]
40- ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
41- ; CHECK-NEXT: ptrue p0.h, vl8
42- ; CHECK-NEXT: fadda h0, p0, h0, z1.h
43- ; CHECK-NEXT: fadda h0, p0, h0, z2.h
44- ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
53+ ; CHECK-NEXT: ldr q1, [x0]
54+ ; CHECK-NEXT: fadd h0, h0, h1
55+ ; CHECK-NEXT: mov z2.h, z1.h[1]
56+ ; CHECK-NEXT: fadd h0, h0, h2
57+ ; CHECK-NEXT: mov z2.h, z1.h[2]
58+ ; CHECK-NEXT: fadd h0, h0, h2
59+ ; CHECK-NEXT: mov z2.h, z1.h[3]
60+ ; CHECK-NEXT: fadd h0, h0, h2
61+ ; CHECK-NEXT: mov z2.h, z1.h[4]
62+ ; CHECK-NEXT: fadd h0, h0, h2
63+ ; CHECK-NEXT: mov z2.h, z1.h[5]
64+ ; CHECK-NEXT: fadd h0, h0, h2
65+ ; CHECK-NEXT: mov z2.h, z1.h[6]
66+ ; CHECK-NEXT: mov z1.h, z1.h[7]
67+ ; CHECK-NEXT: fadd h0, h0, h2
68+ ; CHECK-NEXT: fadd h0, h0, h1
69+ ; CHECK-NEXT: ldr q1, [x0, #16]
70+ ; CHECK-NEXT: mov z2.h, z1.h[1]
71+ ; CHECK-NEXT: fadd h0, h0, h1
72+ ; CHECK-NEXT: fadd h0, h0, h2
73+ ; CHECK-NEXT: mov z2.h, z1.h[2]
74+ ; CHECK-NEXT: fadd h0, h0, h2
75+ ; CHECK-NEXT: mov z2.h, z1.h[3]
76+ ; CHECK-NEXT: fadd h0, h0, h2
77+ ; CHECK-NEXT: mov z2.h, z1.h[4]
78+ ; CHECK-NEXT: fadd h0, h0, h2
79+ ; CHECK-NEXT: mov z2.h, z1.h[5]
80+ ; CHECK-NEXT: fadd h0, h0, h2
81+ ; CHECK-NEXT: mov z2.h, z1.h[6]
82+ ; CHECK-NEXT: mov z1.h, z1.h[7]
83+ ; CHECK-NEXT: fadd h0, h0, h2
84+ ; CHECK-NEXT: fadd h0, h0, h1
4585; CHECK-NEXT: ret
4686 %op = load <16 x half >, ptr %a
4787 %res = call half @llvm.vector.reduce.fadd.v16f16 (half %start , <16 x half > %op )
@@ -51,11 +91,10 @@ define half @fadda_v16f16(half %start, ptr %a) {
5191define float @fadda_v2f32 (float %start , <2 x float > %a ) {
5292; CHECK-LABEL: fadda_v2f32:
5393; CHECK: // %bb.0:
54- ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
55- ; CHECK-NEXT: ptrue p0.s, vl2
5694; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
57- ; CHECK-NEXT: fadda s0, p0, s0, z1.s
58- ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
95+ ; CHECK-NEXT: fadd s0, s0, s1
96+ ; CHECK-NEXT: mov z1.s, z1.s[1]
97+ ; CHECK-NEXT: fadd s0, s0, s1
5998; CHECK-NEXT: ret
6099 %res = call float @llvm.vector.reduce.fadd.v2f32 (float %start , <2 x float > %a )
61100 ret float %res
@@ -64,11 +103,14 @@ define float @fadda_v2f32(float %start, <2 x float> %a) {
64103define float @fadda_v4f32 (float %start , <4 x float > %a ) {
65104; CHECK-LABEL: fadda_v4f32:
66105; CHECK: // %bb.0:
67- ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
68- ; CHECK-NEXT: ptrue p0.s, vl4
69106; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
70- ; CHECK-NEXT: fadda s0, p0, s0, z1.s
71- ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
107+ ; CHECK-NEXT: fadd s0, s0, s1
108+ ; CHECK-NEXT: mov z2.s, z1.s[1]
109+ ; CHECK-NEXT: fadd s0, s0, s2
110+ ; CHECK-NEXT: mov z2.s, z1.s[2]
111+ ; CHECK-NEXT: mov z1.s, z1.s[3]
112+ ; CHECK-NEXT: fadd s0, s0, s2
113+ ; CHECK-NEXT: fadd s0, s0, s1
72114; CHECK-NEXT: ret
73115 %res = call float @llvm.vector.reduce.fadd.v4f32 (float %start , <4 x float > %a )
74116 ret float %res
@@ -77,12 +119,22 @@ define float @fadda_v4f32(float %start, <4 x float> %a) {
77119define float @fadda_v8f32 (float %start , ptr %a ) {
78120; CHECK-LABEL: fadda_v8f32:
79121; CHECK: // %bb.0:
80- ; CHECK-NEXT: ldp q1, q2, [x0]
81- ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
82- ; CHECK-NEXT: ptrue p0.s, vl4
83- ; CHECK-NEXT: fadda s0, p0, s0, z1.s
84- ; CHECK-NEXT: fadda s0, p0, s0, z2.s
85- ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
122+ ; CHECK-NEXT: ldr q1, [x0]
123+ ; CHECK-NEXT: fadd s0, s0, s1
124+ ; CHECK-NEXT: mov z2.s, z1.s[1]
125+ ; CHECK-NEXT: fadd s0, s0, s2
126+ ; CHECK-NEXT: mov z2.s, z1.s[2]
127+ ; CHECK-NEXT: mov z1.s, z1.s[3]
128+ ; CHECK-NEXT: fadd s0, s0, s2
129+ ; CHECK-NEXT: fadd s0, s0, s1
130+ ; CHECK-NEXT: ldr q1, [x0, #16]
131+ ; CHECK-NEXT: mov z2.s, z1.s[1]
132+ ; CHECK-NEXT: fadd s0, s0, s1
133+ ; CHECK-NEXT: fadd s0, s0, s2
134+ ; CHECK-NEXT: mov z2.s, z1.s[2]
135+ ; CHECK-NEXT: mov z1.s, z1.s[3]
136+ ; CHECK-NEXT: fadd s0, s0, s2
137+ ; CHECK-NEXT: fadd s0, s0, s1
86138; CHECK-NEXT: ret
87139 %op = load <8 x float >, ptr %a
88140 %res = call float @llvm.vector.reduce.fadd.v8f32 (float %start , <8 x float > %op )
@@ -102,11 +154,10 @@ define double @fadda_v1f64(double %start, <1 x double> %a) {
102154define double @fadda_v2f64 (double %start , <2 x double > %a ) {
103155; CHECK-LABEL: fadda_v2f64:
104156; CHECK: // %bb.0:
105- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
106- ; CHECK-NEXT: ptrue p0.d, vl2
107157; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
108- ; CHECK-NEXT: fadda d0, p0, d0, z1.d
109- ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
158+ ; CHECK-NEXT: fadd d0, d0, d1
159+ ; CHECK-NEXT: mov z1.d, z1.d[1]
160+ ; CHECK-NEXT: fadd d0, d0, d1
110161; CHECK-NEXT: ret
111162 %res = call double @llvm.vector.reduce.fadd.v2f64 (double %start , <2 x double > %a )
112163 ret double %res
@@ -115,12 +166,14 @@ define double @fadda_v2f64(double %start, <2 x double> %a) {
115166define double @fadda_v4f64 (double %start , ptr %a ) {
116167; CHECK-LABEL: fadda_v4f64:
117168; CHECK: // %bb.0:
118- ; CHECK-NEXT: ldp q1, q2, [x0]
119- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
120- ; CHECK-NEXT: ptrue p0.d, vl2
121- ; CHECK-NEXT: fadda d0, p0, d0, z1.d
122- ; CHECK-NEXT: fadda d0, p0, d0, z2.d
123- ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
169+ ; CHECK-NEXT: ldr q1, [x0]
170+ ; CHECK-NEXT: fadd d0, d0, d1
171+ ; CHECK-NEXT: mov z1.d, z1.d[1]
172+ ; CHECK-NEXT: fadd d0, d0, d1
173+ ; CHECK-NEXT: ldr q1, [x0, #16]
174+ ; CHECK-NEXT: fadd d0, d0, d1
175+ ; CHECK-NEXT: mov z1.d, z1.d[1]
176+ ; CHECK-NEXT: fadd d0, d0, d1
124177; CHECK-NEXT: ret
125178 %op = load <4 x double >, ptr %a
126179 %res = call double @llvm.vector.reduce.fadd.v4f64 (double %start , <4 x double > %op )
0 commit comments