1212; =>
1313; recip = 1.0 / D; a * recip; b * recip; c * recip;
1414define void @three_fdiv_float (float %D , float %a , float %b , float %c ) {
15- ; CHECK-SD-LABEL: three_fdiv_float:
16- ; CHECK-SD: // %bb.0:
17- ; CHECK-SD-NEXT: fmov s4, #1.00000000
18- ; CHECK-SD-NEXT: fdiv s4, s4, s0
19- ; CHECK-SD-NEXT: fmul s0, s1, s4
20- ; CHECK-SD-NEXT: fmul s1, s2, s4
21- ; CHECK-SD-NEXT: fmul s2, s3, s4
22- ; CHECK-SD-NEXT: b foo_3f
23- ;
24- ; CHECK-GI-LABEL: three_fdiv_float:
25- ; CHECK-GI: // %bb.0:
26- ; CHECK-GI-NEXT: fdiv s4, s1, s0
27- ; CHECK-GI-NEXT: fdiv s1, s2, s0
28- ; CHECK-GI-NEXT: fdiv s2, s3, s0
29- ; CHECK-GI-NEXT: fmov s0, s4
30- ; CHECK-GI-NEXT: b foo_3f
15+ ; CHECK-LABEL: three_fdiv_float:
16+ ; CHECK: // %bb.0:
17+ ; CHECK-NEXT: fmov s4, #1.00000000
18+ ; CHECK-NEXT: fdiv s4, s4, s0
19+ ; CHECK-NEXT: fmul s0, s1, s4
20+ ; CHECK-NEXT: fmul s1, s2, s4
21+ ; CHECK-NEXT: fmul s2, s3, s4
22+ ; CHECK-NEXT: b foo_3f
3123 %div = fdiv arcp float %a , %D
3224 %div1 = fdiv arcp float %b , %D
3325 %div2 = fdiv arcp float %c , %D
@@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
3628}
3729
3830define void @three_fdiv_double (double %D , double %a , double %b , double %c ) {
39- ; CHECK-SD-LABEL: three_fdiv_double:
40- ; CHECK-SD: // %bb.0:
41- ; CHECK-SD-NEXT: fmov d4, #1.00000000
42- ; CHECK-SD-NEXT: fdiv d4, d4, d0
43- ; CHECK-SD-NEXT: fmul d0, d1, d4
44- ; CHECK-SD-NEXT: fmul d1, d2, d4
45- ; CHECK-SD-NEXT: fmul d2, d3, d4
46- ; CHECK-SD-NEXT: b foo_3d
47- ;
48- ; CHECK-GI-LABEL: three_fdiv_double:
49- ; CHECK-GI: // %bb.0:
50- ; CHECK-GI-NEXT: fdiv d4, d1, d0
51- ; CHECK-GI-NEXT: fdiv d1, d2, d0
52- ; CHECK-GI-NEXT: fdiv d2, d3, d0
53- ; CHECK-GI-NEXT: fmov d0, d4
54- ; CHECK-GI-NEXT: b foo_3d
31+ ; CHECK-LABEL: three_fdiv_double:
32+ ; CHECK: // %bb.0:
33+ ; CHECK-NEXT: fmov d4, #1.00000000
34+ ; CHECK-NEXT: fdiv d4, d4, d0
35+ ; CHECK-NEXT: fmul d0, d1, d4
36+ ; CHECK-NEXT: fmul d1, d2, d4
37+ ; CHECK-NEXT: fmul d2, d3, d4
38+ ; CHECK-NEXT: b foo_3d
5539 %div = fdiv arcp double %a , %D
5640 %div1 = fdiv arcp double %b , %D
5741 %div2 = fdiv arcp double %c , %D
@@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
6044}
6145
6246define void @three_fdiv_4xfloat (<4 x float > %D , <4 x float > %a , <4 x float > %b , <4 x float > %c ) {
63- ; CHECK-SD-LABEL: three_fdiv_4xfloat:
64- ; CHECK-SD: // %bb.0:
65- ; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
66- ; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
67- ; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
68- ; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
69- ; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
70- ; CHECK-SD-NEXT: b foo_3_4xf
71- ;
72- ; CHECK-GI-LABEL: three_fdiv_4xfloat:
73- ; CHECK-GI: // %bb.0:
74- ; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s
75- ; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s
76- ; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
77- ; CHECK-GI-NEXT: mov v0.16b, v4.16b
78- ; CHECK-GI-NEXT: b foo_3_4xf
47+ ; CHECK-LABEL: three_fdiv_4xfloat:
48+ ; CHECK: // %bb.0:
49+ ; CHECK-NEXT: fmov v4.4s, #1.00000000
50+ ; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
51+ ; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
52+ ; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
53+ ; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
54+ ; CHECK-NEXT: b foo_3_4xf
7955 %div = fdiv arcp <4 x float > %a , %D
8056 %div1 = fdiv arcp <4 x float > %b , %D
8157 %div2 = fdiv arcp <4 x float > %c , %D
@@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
8460}
8561
8662define void @three_fdiv_2xdouble (<2 x double > %D , <2 x double > %a , <2 x double > %b , <2 x double > %c ) {
87- ; CHECK-SD-LABEL: three_fdiv_2xdouble:
88- ; CHECK-SD: // %bb.0:
89- ; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
90- ; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d
91- ; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d
92- ; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d
93- ; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d
94- ; CHECK-SD-NEXT: b foo_3_2xd
95- ;
96- ; CHECK-GI-LABEL: three_fdiv_2xdouble:
97- ; CHECK-GI: // %bb.0:
98- ; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d
99- ; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d
100- ; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
101- ; CHECK-GI-NEXT: mov v0.16b, v4.16b
102- ; CHECK-GI-NEXT: b foo_3_2xd
63+ ; CHECK-LABEL: three_fdiv_2xdouble:
64+ ; CHECK: // %bb.0:
65+ ; CHECK-NEXT: fmov v4.2d, #1.00000000
66+ ; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
67+ ; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
68+ ; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
69+ ; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
70+ ; CHECK-NEXT: b foo_3_2xd
10371 %div = fdiv arcp <2 x double > %a , %D
10472 %div1 = fdiv arcp <2 x double > %b , %D
10573 %div2 = fdiv arcp <2 x double > %c , %D
@@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) {
135103 ret void
136104}
137105
138- define void @splat_three_fdiv_4xfloat (float %D , < 4 x float > %a , < 4 x float > %b , < 4 x float > %c ) {
139- ; CHECK-SD-LABEL: splat_three_fdiv_4xfloat :
106+ define void @four_fdiv_multi_float (float %D , float %a , float %b , float %c ) # 0 {
107+ ; CHECK-SD-LABEL: four_fdiv_multi_float :
140108; CHECK-SD: // %bb.0:
141- ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
142- ; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
143- ; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
144- ; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
145- ; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
146- ; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
147- ; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
148- ; CHECK-SD-NEXT: b foo_3_4xf
109+ ; CHECK-SD-NEXT: fmov s4, #1.00000000
110+ ; CHECK-SD-NEXT: fdiv s5, s4, s0
111+ ; CHECK-SD-NEXT: fmul s4, s1, s5
112+ ; CHECK-SD-NEXT: fmul s1, s2, s5
113+ ; CHECK-SD-NEXT: fmul s2, s3, s5
114+ ; CHECK-SD-NEXT: fmul s3, s0, s5
115+ ; CHECK-SD-NEXT: fmov s0, s4
116+ ; CHECK-SD-NEXT: b foo_4f
149117;
150- ; CHECK-GI-LABEL: splat_three_fdiv_4xfloat :
118+ ; CHECK-GI-LABEL: four_fdiv_multi_float :
151119; CHECK-GI: // %bb.0:
152- ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
153- ; CHECK-GI-NEXT: dup v4.4s, v0.s[0]
154- ; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s
155- ; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s
156- ; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s
157- ; CHECK-GI-NEXT: b foo_3_4xf
120+ ; CHECK-GI-NEXT: fmov s4, #1.00000000
121+ ; CHECK-GI-NEXT: fdiv s5, s4, s0
122+ ; CHECK-GI-NEXT: fdiv s4, s0, s0
123+ ; CHECK-GI-NEXT: fmul s0, s1, s5
124+ ; CHECK-GI-NEXT: fmul s1, s2, s5
125+ ; CHECK-GI-NEXT: fmul s2, s3, s5
126+ ; CHECK-GI-NEXT: fmov s3, s4
127+ ; CHECK-GI-NEXT: b foo_4f
128+ %div = fdiv arcp float %a , %D
129+ %div1 = fdiv arcp float %b , %D
130+ %div2 = fdiv arcp float %c , %D
131+ %div3 = fdiv arcp float %D , %D
132+ tail call void @foo_4f (float %div , float %div1 , float %div2 , float %div3 )
133+ ret void
134+ }
135+
136+ define void @splat_three_fdiv_4xfloat (float %D , <4 x float > %a , <4 x float > %b , <4 x float > %c ) {
137+ ; CHECK-LABEL: splat_three_fdiv_4xfloat:
138+ ; CHECK: // %bb.0:
139+ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
140+ ; CHECK-NEXT: fmov v4.4s, #1.00000000
141+ ; CHECK-NEXT: dup v0.4s, v0.s[0]
142+ ; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
143+ ; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
144+ ; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
145+ ; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
146+ ; CHECK-NEXT: b foo_3_4xf
158147 %D.ins = insertelement <4 x float > poison, float %D , i64 0
159148 %splat = shufflevector <4 x float > %D.ins , <4 x float > poison, <4 x i32 > zeroinitializer
160149 %div = fdiv arcp <4 x float > %a , %splat
@@ -256,6 +245,7 @@ entry:
256245}
257246
258247declare void @foo_3f (float , float , float )
248+ declare void @foo_4f (float , float , float , float )
259249declare void @foo_3d (double , double , double )
260250declare void @foo_3_4xf (<4 x float >, <4 x float >, <4 x float >)
261251declare void @foo_3_2xd (<2 x double >, <2 x double >, <2 x double >)
0 commit comments