11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFULLFP16
3- ; RUN: llc < %s -mtriple=aarch64 --enable-no-nans-fp-math | FileCheck %s --check-prefixes=CHECK,CHECK-NONANS
43; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FULLFP16
54
65define <1 x float > @dup_v1i32_oeq (float %a , float %b ) {
@@ -69,27 +68,13 @@ entry:
6968}
7069
7170define <1 x float > @dup_v1i32_one (float %a , float %b ) {
72- ; CHECK-NOFULLFP16-LABEL: dup_v1i32_one:
73- ; CHECK-NOFULLFP16: // %bb.0: // %entry
74- ; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1
75- ; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
76- ; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
77- ; CHECK-NOFULLFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
78- ; CHECK-NOFULLFP16-NEXT: ret
79- ;
80- ; CHECK-NONANS-LABEL: dup_v1i32_one:
81- ; CHECK-NONANS: // %bb.0: // %entry
82- ; CHECK-NONANS-NEXT: fcmeq s0, s0, s1
83- ; CHECK-NONANS-NEXT: mvn v0.8b, v0.8b
84- ; CHECK-NONANS-NEXT: ret
85- ;
86- ; CHECK-FULLFP16-LABEL: dup_v1i32_one:
87- ; CHECK-FULLFP16: // %bb.0: // %entry
88- ; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1
89- ; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
90- ; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
91- ; CHECK-FULLFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
92- ; CHECK-FULLFP16-NEXT: ret
71+ ; CHECK-LABEL: dup_v1i32_one:
72+ ; CHECK: // %bb.0: // %entry
73+ ; CHECK-NEXT: fcmgt s2, s0, s1
74+ ; CHECK-NEXT: fcmgt s0, s1, s0
75+ ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
76+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
77+ ; CHECK-NEXT: ret
9378entry:
9479 %0 = fcmp one float %a , %b
9580 %vcmpd.i = sext i1 %0 to i32
@@ -98,6 +83,20 @@ entry:
9883 ret <1 x float > %1
9984}
10085
86+ define <1 x float > @dup_v1i32_one_nnan (float %a , float %b ) {
87+ ; CHECK-LABEL: dup_v1i32_one_nnan:
88+ ; CHECK: // %bb.0: // %entry
89+ ; CHECK-NEXT: fcmeq s0, s0, s1
90+ ; CHECK-NEXT: mvn v0.8b, v0.8b
91+ ; CHECK-NEXT: ret
92+ entry:
93+ %0 = fcmp nnan one float %a , %b
94+ %vcmpd.i = sext i1 %0 to i32
95+ %vecinit.i = insertelement <1 x i32 > poison, i32 %vcmpd.i , i64 0
96+ %1 = bitcast <1 x i32 > %vecinit.i to <1 x float >
97+ ret <1 x float > %1
98+ }
99+
101100define <1 x float > @dup_v1i32_ord (float %a , float %b ) {
102101; CHECK-LABEL: dup_v1i32_ord:
103102; CHECK: // %bb.0: // %entry
@@ -115,26 +114,13 @@ entry:
115114}
116115
117116define <1 x float > @dup_v1i32_ueq (float %a , float %b ) {
118- ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq:
119- ; CHECK-NOFULLFP16: // %bb.0: // %entry
120- ; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1
121- ; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
122- ; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
123- ; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
124- ; CHECK-NOFULLFP16-NEXT: ret
125- ;
126- ; CHECK-NONANS-LABEL: dup_v1i32_ueq:
127- ; CHECK-NONANS: // %bb.0: // %entry
128- ; CHECK-NONANS-NEXT: fcmeq s0, s0, s1
129- ; CHECK-NONANS-NEXT: ret
130- ;
131- ; CHECK-FULLFP16-LABEL: dup_v1i32_ueq:
132- ; CHECK-FULLFP16: // %bb.0: // %entry
133- ; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1
134- ; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
135- ; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
136- ; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
137- ; CHECK-FULLFP16-NEXT: ret
117+ ; CHECK-LABEL: dup_v1i32_ueq:
118+ ; CHECK: // %bb.0: // %entry
119+ ; CHECK-NEXT: fcmgt s2, s0, s1
120+ ; CHECK-NEXT: fcmgt s0, s1, s0
121+ ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
122+ ; CHECK-NEXT: mvn v0.8b, v0.8b
123+ ; CHECK-NEXT: ret
138124entry:
139125 %0 = fcmp ueq float %a , %b
140126 %vcmpd.i = sext i1 %0 to i32
@@ -143,23 +129,25 @@ entry:
143129 ret <1 x float > %1
144130}
145131
132+ define <1 x float > @dup_v1i32_ueq_nnan (float %a , float %b ) {
133+ ; CHECK-LABEL: dup_v1i32_ueq_nnan:
134+ ; CHECK: // %bb.0: // %entry
135+ ; CHECK-NEXT: fcmeq s0, s0, s1
136+ ; CHECK-NEXT: ret
137+ entry:
138+ %0 = fcmp nnan ueq float %a , %b
139+ %vcmpd.i = sext i1 %0 to i32
140+ %vecinit.i = insertelement <1 x i32 > poison, i32 %vcmpd.i , i64 0
141+ %1 = bitcast <1 x i32 > %vecinit.i to <1 x float >
142+ ret <1 x float > %1
143+ }
144+
146145define <1 x float > @dup_v1i32_ugt (float %a , float %b ) {
147- ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ugt:
148- ; CHECK-NOFULLFP16: // %bb.0: // %entry
149- ; CHECK-NOFULLFP16-NEXT: fcmge s0, s1, s0
150- ; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
151- ; CHECK-NOFULLFP16-NEXT: ret
152- ;
153- ; CHECK-NONANS-LABEL: dup_v1i32_ugt:
154- ; CHECK-NONANS: // %bb.0: // %entry
155- ; CHECK-NONANS-NEXT: fcmgt s0, s0, s1
156- ; CHECK-NONANS-NEXT: ret
157- ;
158- ; CHECK-FULLFP16-LABEL: dup_v1i32_ugt:
159- ; CHECK-FULLFP16: // %bb.0: // %entry
160- ; CHECK-FULLFP16-NEXT: fcmge s0, s1, s0
161- ; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
162- ; CHECK-FULLFP16-NEXT: ret
146+ ; CHECK-LABEL: dup_v1i32_ugt:
147+ ; CHECK: // %bb.0: // %entry
148+ ; CHECK-NEXT: fcmge s0, s1, s0
149+ ; CHECK-NEXT: mvn v0.8b, v0.8b
150+ ; CHECK-NEXT: ret
163151entry:
164152 %0 = fcmp ugt float %a , %b
165153 %vcmpd.i = sext i1 %0 to i32
@@ -168,23 +156,25 @@ entry:
168156 ret <1 x float > %1
169157}
170158
159+ define <1 x float > @dup_v1i32_ugt_nnan (float %a , float %b ) {
160+ ; CHECK-LABEL: dup_v1i32_ugt_nnan:
161+ ; CHECK: // %bb.0: // %entry
162+ ; CHECK-NEXT: fcmgt s0, s0, s1
163+ ; CHECK-NEXT: ret
164+ entry:
165+ %0 = fcmp nnan ugt float %a , %b
166+ %vcmpd.i = sext i1 %0 to i32
167+ %vecinit.i = insertelement <1 x i32 > poison, i32 %vcmpd.i , i64 0
168+ %1 = bitcast <1 x i32 > %vecinit.i to <1 x float >
169+ ret <1 x float > %1
170+ }
171+
171172define <1 x float > @dup_v1i32_uge (float %a , float %b ) {
172- ; CHECK-NOFULLFP16-LABEL: dup_v1i32_uge:
173- ; CHECK-NOFULLFP16: // %bb.0: // %entry
174- ; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
175- ; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
176- ; CHECK-NOFULLFP16-NEXT: ret
177- ;
178- ; CHECK-NONANS-LABEL: dup_v1i32_uge:
179- ; CHECK-NONANS: // %bb.0: // %entry
180- ; CHECK-NONANS-NEXT: fcmge s0, s0, s1
181- ; CHECK-NONANS-NEXT: ret
182- ;
183- ; CHECK-FULLFP16-LABEL: dup_v1i32_uge:
184- ; CHECK-FULLFP16: // %bb.0: // %entry
185- ; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
186- ; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
187- ; CHECK-FULLFP16-NEXT: ret
173+ ; CHECK-LABEL: dup_v1i32_uge:
174+ ; CHECK: // %bb.0: // %entry
175+ ; CHECK-NEXT: fcmgt s0, s1, s0
176+ ; CHECK-NEXT: mvn v0.8b, v0.8b
177+ ; CHECK-NEXT: ret
188178entry:
189179 %0 = fcmp uge float %a , %b
190180 %vcmpd.i = sext i1 %0 to i32
@@ -193,23 +183,26 @@ entry:
193183 ret <1 x float > %1
194184}
195185
186+ define <1 x float > @dup_v1i32_uge_nnan (float %a , float %b ) {
187+ ; CHECK-LABEL: dup_v1i32_uge_nnan:
188+ ; CHECK: // %bb.0: // %entry
189+ ; CHECK-NEXT: fcmge s0, s0, s1
190+ ; CHECK-NEXT: ret
191+ entry:
192+ %0 = fcmp nnan uge float %a , %b
193+ %vcmpd.i = sext i1 %0 to i32
194+ %vecinit.i = insertelement <1 x i32 > poison, i32 %vcmpd.i , i64 0
195+ %1 = bitcast <1 x i32 > %vecinit.i to <1 x float >
196+ ret <1 x float > %1
197+ }
198+
199+
196200define <1 x float > @dup_v1i32_ult (float %a , float %b ) {
197- ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ult:
198- ; CHECK-NOFULLFP16: // %bb.0: // %entry
199- ; CHECK-NOFULLFP16-NEXT: fcmge s0, s0, s1
200- ; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
201- ; CHECK-NOFULLFP16-NEXT: ret
202- ;
203- ; CHECK-NONANS-LABEL: dup_v1i32_ult:
204- ; CHECK-NONANS: // %bb.0: // %entry
205- ; CHECK-NONANS-NEXT: fcmgt s0, s1, s0
206- ; CHECK-NONANS-NEXT: ret
207- ;
208- ; CHECK-FULLFP16-LABEL: dup_v1i32_ult:
209- ; CHECK-FULLFP16: // %bb.0: // %entry
210- ; CHECK-FULLFP16-NEXT: fcmge s0, s0, s1
211- ; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
212- ; CHECK-FULLFP16-NEXT: ret
201+ ; CHECK-LABEL: dup_v1i32_ult:
202+ ; CHECK: // %bb.0: // %entry
203+ ; CHECK-NEXT: fcmge s0, s0, s1
204+ ; CHECK-NEXT: mvn v0.8b, v0.8b
205+ ; CHECK-NEXT: ret
213206entry:
214207 %0 = fcmp ult float %a , %b
215208 %vcmpd.i = sext i1 %0 to i32
@@ -218,23 +211,25 @@ entry:
218211 ret <1 x float > %1
219212}
220213
214+ define <1 x float > @dup_v1i32_ult_nnan (float %a , float %b ) {
215+ ; CHECK-LABEL: dup_v1i32_ult_nnan:
216+ ; CHECK: // %bb.0: // %entry
217+ ; CHECK-NEXT: fcmgt s0, s1, s0
218+ ; CHECK-NEXT: ret
219+ entry:
220+ %0 = fcmp nnan ult float %a , %b
221+ %vcmpd.i = sext i1 %0 to i32
222+ %vecinit.i = insertelement <1 x i32 > poison, i32 %vcmpd.i , i64 0
223+ %1 = bitcast <1 x i32 > %vecinit.i to <1 x float >
224+ ret <1 x float > %1
225+ }
226+
221227define <1 x float > @dup_v1i32_ule (float %a , float %b ) {
222- ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ule:
223- ; CHECK-NOFULLFP16: // %bb.0: // %entry
224- ; CHECK-NOFULLFP16-NEXT: fcmgt s0, s0, s1
225- ; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
226- ; CHECK-NOFULLFP16-NEXT: ret
227- ;
228- ; CHECK-NONANS-LABEL: dup_v1i32_ule:
229- ; CHECK-NONANS: // %bb.0: // %entry
230- ; CHECK-NONANS-NEXT: fcmge s0, s1, s0
231- ; CHECK-NONANS-NEXT: ret
232- ;
233- ; CHECK-FULLFP16-LABEL: dup_v1i32_ule:
234- ; CHECK-FULLFP16: // %bb.0: // %entry
235- ; CHECK-FULLFP16-NEXT: fcmgt s0, s0, s1
236- ; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
237- ; CHECK-FULLFP16-NEXT: ret
228+ ; CHECK-LABEL: dup_v1i32_ule:
229+ ; CHECK: // %bb.0: // %entry
230+ ; CHECK-NEXT: fcmgt s0, s0, s1
231+ ; CHECK-NEXT: mvn v0.8b, v0.8b
232+ ; CHECK-NEXT: ret
238233entry:
239234 %0 = fcmp ule float %a , %b
240235 %vcmpd.i = sext i1 %0 to i32
@@ -243,6 +238,19 @@ entry:
243238 ret <1 x float > %1
244239}
245240
241+ define <1 x float > @dup_v1i32_ule_nnan (float %a , float %b ) {
242+ ; CHECK-LABEL: dup_v1i32_ule_nnan:
243+ ; CHECK: // %bb.0: // %entry
244+ ; CHECK-NEXT: fcmge s0, s1, s0
245+ ; CHECK-NEXT: ret
246+ entry:
247+ %0 = fcmp nnan ule float %a , %b
248+ %vcmpd.i = sext i1 %0 to i32
249+ %vecinit.i = insertelement <1 x i32 > poison, i32 %vcmpd.i , i64 0
250+ %1 = bitcast <1 x i32 > %vecinit.i to <1 x float >
251+ ret <1 x float > %1
252+ }
253+
246254define <1 x float > @dup_v1i32_une (float %a , float %b ) {
247255; CHECK-LABEL: dup_v1i32_une:
248256; CHECK: // %bb.0: // %entry
@@ -326,13 +334,6 @@ define <8 x half> @dup_v8i16(half %a, half %b) {
326334; CHECK-NOFULLFP16-NEXT: fcmeq s0, s0, s1
327335; CHECK-NOFULLFP16-NEXT: ret
328336;
329- ; CHECK-NONANS-LABEL: dup_v8i16:
330- ; CHECK-NONANS: // %bb.0: // %entry
331- ; CHECK-NONANS-NEXT: fcvt s1, h1
332- ; CHECK-NONANS-NEXT: fcvt s0, h0
333- ; CHECK-NONANS-NEXT: fcmeq s0, s0, s1
334- ; CHECK-NONANS-NEXT: ret
335- ;
336337; CHECK-FULLFP16-LABEL: dup_v8i16:
337338; CHECK-FULLFP16: // %bb.0: // %entry
338339; CHECK-FULLFP16-NEXT: fcmp h0, h1
@@ -350,6 +351,30 @@ define <8 x half> @dup_v8i16(half %a, half %b) {
350351 ret <8 x half > %1
351352}
352353
354+ define <8 x half > @dup_v8i16_nnan (half %a , half %b ) {
355+ ; FIXME: Could be replaced with fcmeq + dup but the type of the former is
356+ ; promoted to i32 during selection and then the optimization does not apply.
357+ ; CHECK-NOFULLFP16-LABEL: dup_v8i16_nnan:
358+ ; CHECK-NOFULLFP16: // %bb.0: // %entry
359+ ; CHECK-NOFULLFP16-NEXT: fcvt s1, h1
360+ ; CHECK-NOFULLFP16-NEXT: fcvt s0, h0
361+ ; CHECK-NOFULLFP16-NEXT: fcmeq s0, s0, s1
362+ ; CHECK-NOFULLFP16-NEXT: ret
363+ ;
364+ ; CHECK-FULLFP16-LABEL: dup_v8i16_nnan:
365+ ; CHECK-FULLFP16: // %bb.0: // %entry
366+ ; CHECK-FULLFP16-NEXT: fcmp h0, h1
367+ ; CHECK-FULLFP16-NEXT: csetm w8, eq
368+ ; CHECK-FULLFP16-NEXT: fmov s0, w8
369+ ; CHECK-FULLFP16-NEXT: ret
370+ entry:
371+ %0 = fcmp nnan oeq half %a , %b
372+ %vcmpd.i = sext i1 %0 to i16
373+ %vecinit.i = insertelement <8 x i16 > poison, i16 %vcmpd.i , i64 0
374+ %1 = bitcast <8 x i16 > %vecinit.i to <8 x half >
375+ ret <8 x half > %1
376+ }
377+
353378; Check that a mask is not generated for non-vectorized users.
354379define i32 @mask_i32 (float %a , float %b ) {
355380; CHECK-LABEL: mask_i32:
0 commit comments