1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve2 -o - < %s | FileCheck %s
2
+ ; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve2p1 -o - < %s | FileCheck %s
3
3
4
4
define <vscale x 8 x i1 > @not_icmp_sle_nxv8i16 (<vscale x 8 x i16 > %a , <vscale x 8 x i16 > %b ) {
5
5
; CHECK-LABEL: not_icmp_sle_nxv8i16:
@@ -220,6 +220,117 @@ define i1 @lane_mask_first(i64 %next, i64 %end) {
220
220
ret i1 %bit
221
221
}
222
222
223
+ define i1 @whilege_x2_first (i64 %next , i64 %end ) {
224
+ ; CHECK-LABEL: whilege_x2_first:
225
+ ; CHECK: // %bb.0:
226
+ ; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
227
+ ; CHECK-NEXT: cset w0, mi
228
+ ; CHECK-NEXT: ret
229
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64 (i64 %next , i64 %end )
230
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
231
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
232
+ ret i1 %bit
233
+ }
234
+
235
+ define i1 @whilegt_x2_first (i64 %next , i64 %end ) {
236
+ ; CHECK-LABEL: whilegt_x2_first:
237
+ ; CHECK: // %bb.0:
238
+ ; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
239
+ ; CHECK-NEXT: cset w0, mi
240
+ ; CHECK-NEXT: ret
241
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilegt.x2.nxv4i1.i64 (i64 %next , i64 %end )
242
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
243
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
244
+ ret i1 %bit
245
+ }
246
+
247
+ define i1 @whilehi_x2_first (i64 %next , i64 %end ) {
248
+ ; CHECK-LABEL: whilehi_x2_first:
249
+ ; CHECK: // %bb.0:
250
+ ; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1
251
+ ; CHECK-NEXT: cset w0, mi
252
+ ; CHECK-NEXT: ret
253
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilehi.x2.nxv4i1.i64 (i64 %next , i64 %end )
254
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
255
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
256
+ ret i1 %bit
257
+ }
258
+
259
+ define i1 @whilehs_x2_first (i64 %next , i64 %end ) {
260
+ ; CHECK-LABEL: whilehs_x2_first:
261
+ ; CHECK: // %bb.0:
262
+ ; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1
263
+ ; CHECK-NEXT: cset w0, mi
264
+ ; CHECK-NEXT: ret
265
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilehs.x2.nxv4i1.i64 (i64 %next , i64 %end )
266
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
267
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
268
+ ret i1 %bit
269
+ }
270
+
271
+ define i1 @whilele_x2_first (i64 %next , i64 %end ) {
272
+ ; CHECK-LABEL: whilele_x2_first:
273
+ ; CHECK: // %bb.0:
274
+ ; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
275
+ ; CHECK-NEXT: cset w0, mi
276
+ ; CHECK-NEXT: ret
277
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilele.x2.nxv4i1.i64 (i64 %next , i64 %end )
278
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
279
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
280
+ ret i1 %bit
281
+ }
282
+
283
+ define i1 @whilelo_x2_first (i64 %next , i64 %end ) {
284
+ ; CHECK-LABEL: whilelo_x2_first:
285
+ ; CHECK: // %bb.0:
286
+ ; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1
287
+ ; CHECK-NEXT: cset w0, mi
288
+ ; CHECK-NEXT: ret
289
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilelo.x2.nxv4i1.i64 (i64 %next , i64 %end )
290
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
291
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
292
+ ret i1 %bit
293
+ }
294
+
295
+ define i1 @whilels_x2_first (i64 %next , i64 %end ) {
296
+ ; CHECK-LABEL: whilels_x2_first:
297
+ ; CHECK: // %bb.0:
298
+ ; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1
299
+ ; CHECK-NEXT: cset w0, mi
300
+ ; CHECK-NEXT: ret
301
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilels.x2.nxv4i1.i64 (i64 %next , i64 %end )
302
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
303
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
304
+ ret i1 %bit
305
+ }
306
+
307
+ define i1 @whilelt_x2_first (i64 %next , i64 %end ) {
308
+ ; CHECK-LABEL: whilelt_x2_first:
309
+ ; CHECK: // %bb.0:
310
+ ; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
311
+ ; CHECK-NEXT: cset w0, mi
312
+ ; CHECK-NEXT: ret
313
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilelt.x2.nxv4i1.i64 (i64 %next , i64 %end )
314
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 0
315
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
316
+ ret i1 %bit
317
+ }
318
+
319
+ ; Do not combine to ptest when the extract is not from the first vector result
320
+ define i1 @whilege_x2_second_result (i64 %next , i64 %end ) {
321
+ ; CHECK-LABEL: whilege_x2_second_result:
322
+ ; CHECK: // %bb.0:
323
+ ; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
324
+ ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
325
+ ; CHECK-NEXT: fmov w8, s0
326
+ ; CHECK-NEXT: and w0, w8, #0x1
327
+ ; CHECK-NEXT: ret
328
+ %predpair = call { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64 (i64 %next , i64 %end )
329
+ %predicate = extractvalue { <vscale x 4 x i1 >, <vscale x 4 x i1 > } %predpair , 1
330
+ %bit = extractelement <vscale x 4 x i1 > %predicate , i64 0
331
+ ret i1 %bit
332
+ }
333
+
223
334
declare i64 @llvm.vscale.i64 ()
224
335
declare <vscale x 4 x i1 > @llvm.aarch64.sve.whilege.nxv4i1.i64 (i64 , i64 )
225
336
declare <vscale x 4 x i1 > @llvm.aarch64.sve.whilegt.nxv4i1.i64 (i64 , i64 )
@@ -230,3 +341,12 @@ declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
230
341
declare <vscale x 4 x i1 > @llvm.aarch64.sve.whilels.nxv4i1.i64 (i64 , i64 )
231
342
declare <vscale x 4 x i1 > @llvm.aarch64.sve.whilelt.nxv4i1.i64 (i64 , i64 )
232
343
declare <vscale x 4 x i1 > @llvm.get.active.lane.mask.nxv4i1.i64 (i64 , i64 )
344
+
345
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilege.x2.nxv4i1 (i64 , i64 )
346
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilegt.x2.nxv4i1 (i64 , i64 )
347
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilehi.x2.nxv4i1 (i64 , i64 )
348
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilehs.x2.nxv4i1 (i64 , i64 )
349
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilele.x2.nxv4i1 (i64 , i64 )
350
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilelo.x2.nxv4i1 (i64 , i64 )
351
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilels.x2.nxv4i1 (i64 , i64 )
352
+ declare { <vscale x 4 x i1 >, <vscale x 4 x i1 > } @llvm.aarch64.sve.whilelt.x2.nxv4i1 (i64 , i64 )
0 commit comments