Skip to content

Commit 63fae17

Browse files
committed
Initial half/bfloat tests
1 parent cf5b283 commit 63fae17

File tree

1 file changed

+140
-6
lines changed

1 file changed

+140
-6
lines changed

llvm/test/CodeGen/AArch64/vector-extract-last-active.ll

Lines changed: 140 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED
3-
; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED
2+
; RUN: llc -mtriple=aarch64 -mattr=+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED
3+
; RUN: llc -mtriple=aarch64 -mattr=+sve,+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED
44

55
define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) {
66
; NEON-FIXED-LABEL: extract_last_i8:
@@ -194,15 +194,115 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
194194
ret i64 %res
195195
}
196196

197+
define half @extract_last_half(<8 x half> %data, <8 x i16> %mask, half %passthru) {
198+
; NEON-FIXED-LABEL: extract_last_half:
199+
; NEON-FIXED: // %bb.0:
200+
; NEON-FIXED-NEXT: sub sp, sp, #16
201+
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
202+
; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
203+
; NEON-FIXED-NEXT: adrp x8, .LCPI4_0
204+
; NEON-FIXED-NEXT: mov x9, sp
205+
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI4_0]
206+
; NEON-FIXED-NEXT: str q0, [sp]
207+
; NEON-FIXED-NEXT: // kill: def $h2 killed $h2 def $s2
208+
; NEON-FIXED-NEXT: xtn v3.8b, v1.8h
209+
; NEON-FIXED-NEXT: umaxv h1, v1.8h
210+
; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
211+
; NEON-FIXED-NEXT: umaxv b3, v3.8b
212+
; NEON-FIXED-NEXT: fmov w8, s3
213+
; NEON-FIXED-NEXT: bfi x9, x8, #1, #3
214+
; NEON-FIXED-NEXT: fmov w8, s1
215+
; NEON-FIXED-NEXT: ldr h0, [x9]
216+
; NEON-FIXED-NEXT: tst w8, #0x1
217+
; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne
218+
; NEON-FIXED-NEXT: // kill: def $h0 killed $h0 killed $s0
219+
; NEON-FIXED-NEXT: add sp, sp, #16
220+
; NEON-FIXED-NEXT: ret
221+
;
222+
; SVE-FIXED-LABEL: extract_last_half:
223+
; SVE-FIXED: // %bb.0:
224+
; SVE-FIXED-NEXT: sub sp, sp, #16
225+
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
226+
; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
227+
; SVE-FIXED-NEXT: index z4.b, #0, #1
228+
; SVE-FIXED-NEXT: mov x9, sp
229+
; SVE-FIXED-NEXT: str q0, [sp]
230+
; SVE-FIXED-NEXT: xtn v3.8b, v1.8h
231+
; SVE-FIXED-NEXT: umaxv h1, v1.8h
232+
; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
233+
; SVE-FIXED-NEXT: umaxv b3, v3.8b
234+
; SVE-FIXED-NEXT: fmov w8, s3
235+
; SVE-FIXED-NEXT: bfi x9, x8, #1, #3
236+
; SVE-FIXED-NEXT: fmov w8, s1
237+
; SVE-FIXED-NEXT: ldr h0, [x9]
238+
; SVE-FIXED-NEXT: tst w8, #0x1
239+
; SVE-FIXED-NEXT: fcsel h0, h0, h2, ne
240+
; SVE-FIXED-NEXT: add sp, sp, #16
241+
; SVE-FIXED-NEXT: ret
242+
%notzero = icmp ne <8 x i16> %mask, zeroinitializer
243+
%res = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> %data, <8 x i1> %notzero, half %passthru)
244+
ret half %res
245+
}
246+
247+
define bfloat @extract_last_bfloat(<8 x bfloat> %data, <8 x i16> %mask, bfloat %passthru) {
248+
; NEON-FIXED-LABEL: extract_last_bfloat:
249+
; NEON-FIXED: // %bb.0:
250+
; NEON-FIXED-NEXT: sub sp, sp, #16
251+
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
252+
; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
253+
; NEON-FIXED-NEXT: adrp x8, .LCPI5_0
254+
; NEON-FIXED-NEXT: mov x9, sp
255+
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI5_0]
256+
; NEON-FIXED-NEXT: str q0, [sp]
257+
; NEON-FIXED-NEXT: // kill: def $h2 killed $h2 def $s2
258+
; NEON-FIXED-NEXT: xtn v3.8b, v1.8h
259+
; NEON-FIXED-NEXT: umaxv h1, v1.8h
260+
; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
261+
; NEON-FIXED-NEXT: umaxv b3, v3.8b
262+
; NEON-FIXED-NEXT: fmov w8, s3
263+
; NEON-FIXED-NEXT: bfi x9, x8, #1, #3
264+
; NEON-FIXED-NEXT: fmov w8, s1
265+
; NEON-FIXED-NEXT: ldr h0, [x9]
266+
; NEON-FIXED-NEXT: tst w8, #0x1
267+
; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne
268+
; NEON-FIXED-NEXT: // kill: def $h0 killed $h0 killed $s0
269+
; NEON-FIXED-NEXT: add sp, sp, #16
270+
; NEON-FIXED-NEXT: ret
271+
;
272+
; SVE-FIXED-LABEL: extract_last_bfloat:
273+
; SVE-FIXED: // %bb.0:
274+
; SVE-FIXED-NEXT: sub sp, sp, #16
275+
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
276+
; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
277+
; SVE-FIXED-NEXT: index z4.b, #0, #1
278+
; SVE-FIXED-NEXT: mov x9, sp
279+
; SVE-FIXED-NEXT: str q0, [sp]
280+
; SVE-FIXED-NEXT: xtn v3.8b, v1.8h
281+
; SVE-FIXED-NEXT: umaxv h1, v1.8h
282+
; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
283+
; SVE-FIXED-NEXT: umaxv b3, v3.8b
284+
; SVE-FIXED-NEXT: fmov w8, s3
285+
; SVE-FIXED-NEXT: bfi x9, x8, #1, #3
286+
; SVE-FIXED-NEXT: fmov w8, s1
287+
; SVE-FIXED-NEXT: ldr h0, [x9]
288+
; SVE-FIXED-NEXT: tst w8, #0x1
289+
; SVE-FIXED-NEXT: fcsel h0, h0, h2, ne
290+
; SVE-FIXED-NEXT: add sp, sp, #16
291+
; SVE-FIXED-NEXT: ret
292+
%notzero = icmp ne <8 x i16> %mask, zeroinitializer
293+
%res = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> %data, <8 x i1> %notzero, bfloat %passthru)
294+
ret bfloat %res
295+
}
296+
197297
define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %passthru) {
198298
; NEON-FIXED-LABEL: extract_last_float:
199299
; NEON-FIXED: // %bb.0:
200300
; NEON-FIXED-NEXT: sub sp, sp, #16
201301
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
202302
; NEON-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
203-
; NEON-FIXED-NEXT: adrp x8, .LCPI4_0
303+
; NEON-FIXED-NEXT: adrp x8, .LCPI6_0
204304
; NEON-FIXED-NEXT: mov x9, sp
205-
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI4_0]
305+
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI6_0]
206306
; NEON-FIXED-NEXT: str q0, [sp]
207307
; NEON-FIXED-NEXT: xtn v3.4h, v1.4s
208308
; NEON-FIXED-NEXT: umaxv s1, v1.4s
@@ -248,9 +348,9 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
248348
; NEON-FIXED-NEXT: sub sp, sp, #16
249349
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
250350
; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
251-
; NEON-FIXED-NEXT: adrp x8, .LCPI5_0
351+
; NEON-FIXED-NEXT: adrp x8, .LCPI7_0
252352
; NEON-FIXED-NEXT: mov x9, sp
253-
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI5_0]
353+
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI7_0]
254354
; NEON-FIXED-NEXT: str q0, [sp]
255355
; NEON-FIXED-NEXT: xtn v3.2s, v1.2d
256356
; NEON-FIXED-NEXT: umaxv s1, v1.4s
@@ -326,6 +426,36 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1
326426
ret i64 %res
327427
}
328428

429+
define half @extract_last_half_scalable(<vscale x 8 x half> %data, <vscale x 8 x i1> %mask, half %passthru) #0 {
430+
; CHECK-LABEL: extract_last_half_scalable:
431+
; CHECK: // %bb.0:
432+
; CHECK-NEXT: index z2.h, #0, #1
433+
; CHECK-NEXT: lastb w8, p0, z2.h
434+
; CHECK-NEXT: whilels p1.h, xzr, x8
435+
; CHECK-NEXT: lastb h0, p1, z0.h
436+
; CHECK-NEXT: ptrue p1.h
437+
; CHECK-NEXT: ptest p1, p0.b
438+
; CHECK-NEXT: fcsel h0, h0, h1, ne
439+
; CHECK-NEXT: ret
440+
%res = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %mask, half %passthru)
441+
ret half %res
442+
}
443+
444+
define bfloat @extract_last_bfloat_scalable(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, bfloat %passthru) #0 {
445+
; CHECK-LABEL: extract_last_bfloat_scalable:
446+
; CHECK: // %bb.0:
447+
; CHECK-NEXT: index z2.h, #0, #1
448+
; CHECK-NEXT: lastb w8, p0, z2.h
449+
; CHECK-NEXT: whilels p1.h, xzr, x8
450+
; CHECK-NEXT: lastb h0, p1, z0.h
451+
; CHECK-NEXT: ptrue p1.h
452+
; CHECK-NEXT: ptest p1, p0.b
453+
; CHECK-NEXT: fcsel h0, h0, h1, ne
454+
; CHECK-NEXT: ret
455+
%res = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, bfloat %passthru)
456+
ret bfloat %res
457+
}
458+
329459
define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 {
330460
; CHECK-LABEL: extract_last_float_scalable:
331461
; CHECK: // %bb.0:
@@ -374,12 +504,16 @@ declare i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8>, <16 x
374504
declare i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16>, <8 x i1>, i16)
375505
declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32>, <4 x i1>, i32)
376506
declare i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64>, <2 x i1>, i64)
507+
declare half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half>, <8 x i1>, half)
508+
declare bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat>, <8 x i1>, bfloat)
377509
declare float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float>, <4 x i1>, float)
378510
declare double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double>, <2 x i1>, double)
379511
declare i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
380512
declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
381513
declare i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
382514
declare i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
515+
declare half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)
516+
declare bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat)
383517
declare float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)
384518
declare double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
385519
declare i1 @llvm.experimental.vector.extract.last.active.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i1)

0 commit comments

Comments
 (0)