|
2 | 2 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32 |
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64 |
4 | 4 |
|
| 5 | +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) |
| 6 | +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) |
| 7 | +declare float @llvm.fma.f32(float, float, float) |
| 8 | +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) |
| 9 | + |
5 | 10 | ; This test checks combinations of FNEG and FMA intrinsics |
6 | 11 |
|
7 | 12 | define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
8 | 13 | ; X32-LABEL: test1: |
9 | | -; X32: # %bb.0: # %entry |
| 14 | +; X32: # %bb.0: |
10 | 15 | ; X32-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 |
11 | 16 | ; X32-NEXT: retl |
12 | 17 | ; |
13 | 18 | ; X64-LABEL: test1: |
14 | | -; X64: # %bb.0: # %entry |
| 19 | +; X64: # %bb.0: |
15 | 20 | ; X64-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 |
16 | 21 | ; X64-NEXT: retq |
17 | | -entry: |
18 | | - %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
19 | | - %0 = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 |
20 | | - ret <8 x float> %0 |
| 22 | + %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c |
| 23 | + %r = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 |
| 24 | + ret <8 x float> %r |
21 | 25 | } |
22 | 26 |
|
23 | | -declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) |
24 | | - |
25 | 27 | define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
26 | 28 | ; X32-LABEL: test2: |
27 | | -; X32: # %bb.0: # %entry |
| 29 | +; X32: # %bb.0: |
28 | 30 | ; X32-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 |
29 | 31 | ; X32-NEXT: retl |
30 | 32 | ; |
31 | 33 | ; X64-LABEL: test2: |
32 | | -; X64: # %bb.0: # %entry |
| 34 | +; X64: # %bb.0: |
33 | 35 | ; X64-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 |
34 | 36 | ; X64-NEXT: retq |
35 | | -entry: |
36 | | - %0 = tail call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
37 | | - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |
| 37 | + %t0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
| 38 | + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %t0 |
38 | 39 | ret <4 x float> %sub.i |
39 | 40 | } |
40 | 41 |
|
41 | | -declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) |
42 | | - |
43 | 42 | define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) { |
44 | 43 | ; X32-LABEL: test3: |
45 | | -; X32: # %bb.0: # %entry |
| 44 | +; X32: # %bb.0: |
46 | 45 | ; X32-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 |
47 | 46 | ; X32-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] |
48 | 47 | ; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0 |
49 | 48 | ; X32-NEXT: retl |
50 | 49 | ; |
51 | 50 | ; X64-LABEL: test3: |
52 | | -; X64: # %bb.0: # %entry |
| 51 | +; X64: # %bb.0: |
53 | 52 | ; X64-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 |
54 | 53 | ; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] |
55 | 54 | ; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 |
56 | 55 | ; X64-NEXT: retq |
57 | | -entry: |
58 | | - %0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
59 | | - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |
| 56 | + %a0 = extractelement <4 x float> %a, i64 0 |
| 57 | + %b0 = extractelement <4 x float> %b, i64 0 |
| 58 | + %c0 = extractelement <4 x float> %c, i64 0 |
| 59 | + %negb0 = fneg float %b0 |
| 60 | + %t0 = tail call float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2 |
| 61 | + %i = insertelement <4 x float> %a, float %t0, i64 0 |
| 62 | + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %i |
60 | 63 | ret <4 x float> %sub.i |
61 | 64 | } |
62 | 65 |
|
63 | | -declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) |
64 | | - |
65 | 66 | define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
66 | 67 | ; X32-LABEL: test4: |
67 | | -; X32: # %bb.0: # %entry |
| 68 | +; X32: # %bb.0: |
68 | 69 | ; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 |
69 | 70 | ; X32-NEXT: retl |
70 | 71 | ; |
71 | 72 | ; X64-LABEL: test4: |
72 | | -; X64: # %bb.0: # %entry |
| 73 | +; X64: # %bb.0: |
73 | 74 | ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 |
74 | 75 | ; X64-NEXT: retq |
75 | | -entry: |
76 | | - %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 |
77 | | - %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |
| 76 | + %negc = fneg <8 x float> %c |
| 77 | + %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2 |
| 78 | + %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t0 |
78 | 79 | ret <8 x float> %sub.i |
79 | 80 | } |
80 | 81 |
|
81 | 82 | define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) { |
82 | 83 | ; X32-LABEL: test5: |
83 | | -; X32: # %bb.0: # %entry |
| 84 | +; X32: # %bb.0: |
84 | 85 | ; X32-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
85 | 86 | ; X32-NEXT: retl |
86 | 87 | ; |
87 | 88 | ; X64-LABEL: test5: |
88 | | -; X64: # %bb.0: # %entry |
| 89 | +; X64: # %bb.0: |
89 | 90 | ; X64-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 |
90 | 91 | ; X64-NEXT: retq |
91 | | -entry: |
92 | | - %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
93 | | - %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2 |
94 | | - ret <8 x float> %0 |
| 92 | + %sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c |
| 93 | + %negsubc = fneg <8 x float> %sub.c |
| 94 | + %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2 |
| 95 | + ret <8 x float> %t0 |
95 | 96 | } |
96 | 97 |
|
97 | | -declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) |
98 | | - |
99 | | - |
100 | 98 | define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) { |
101 | 99 | ; X32-LABEL: test6: |
102 | | -; X32: # %bb.0: # %entry |
| 100 | +; X32: # %bb.0: |
103 | 101 | ; X32-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 |
104 | 102 | ; X32-NEXT: retl |
105 | 103 | ; |
106 | 104 | ; X64-LABEL: test6: |
107 | | -; X64: # %bb.0: # %entry |
| 105 | +; X64: # %bb.0: |
108 | 106 | ; X64-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 |
109 | 107 | ; X64-NEXT: retq |
110 | | -entry: |
111 | | - %0 = tail call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
112 | | - %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0 |
| 108 | + %t0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
| 109 | + %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %t0 |
113 | 110 | ret <2 x double> %sub.i |
114 | 111 | } |
115 | 112 |
|
116 | | -declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) |
117 | | - |
118 | 113 | define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c) { |
119 | 114 | ; X32-LABEL: test7: |
120 | | -; X32: # %bb.0: # %entry |
| 115 | +; X32: # %bb.0: |
121 | 116 | ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2 |
122 | 117 | ; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm1 |
123 | 118 | ; X32-NEXT: retl |
124 | 119 | ; |
125 | 120 | ; X64-LABEL: test7: |
126 | | -; X64: # %bb.0: # %entry |
| 121 | +; X64: # %bb.0: |
127 | 122 | ; X64-NEXT: vbroadcastss %xmm0, %ymm0 |
128 | 123 | ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 |
129 | 124 | ; X64-NEXT: retq |
130 | | -entry: |
131 | | - %0 = insertelement <8 x float> undef, float %a, i32 0 |
132 | | - %1 = fsub <8 x float> <float -0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %0 |
133 | | - %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer |
134 | | - %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c) |
135 | | - ret <8 x float> %3 |
| 125 | + %t0 = insertelement <8 x float> undef, float %a, i32 0 |
| 126 | + %t1 = fsub <8 x float> <float -0.0, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %t0 |
| 127 | + %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer |
| 128 | + %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) |
| 129 | + ret <8 x float> %t3 |
136 | 130 |
|
137 | 131 | } |
138 | 132 |
|
139 | 133 | define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c) { |
140 | 134 | ; X32-LABEL: test8: |
141 | | -; X32: # %bb.0: # %entry |
| 135 | +; X32: # %bb.0: |
142 | 136 | ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2 |
143 | 137 | ; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm1 |
144 | 138 | ; X32-NEXT: retl |
145 | 139 | ; |
146 | 140 | ; X64-LABEL: test8: |
147 | | -; X64: # %bb.0: # %entry |
| 141 | +; X64: # %bb.0: |
148 | 142 | ; X64-NEXT: vbroadcastss %xmm0, %ymm0 |
149 | 143 | ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 |
150 | 144 | ; X64-NEXT: retq |
151 | | -entry: |
152 | | - %0 = fsub float -0.0, %a |
153 | | - %1 = insertelement <8 x float> undef, float %0, i32 0 |
154 | | - %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer |
155 | | - %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c) |
156 | | - ret <8 x float> %3 |
| 145 | + %t0 = fsub float -0.0, %a |
| 146 | + %t1 = insertelement <8 x float> undef, float %t0, i32 0 |
| 147 | + %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer |
| 148 | + %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) |
| 149 | + ret <8 x float> %t3 |
157 | 150 | } |
158 | | - |
159 | | -declare <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) |
0 commit comments