|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
2 | | -; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED |
3 | | -; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED |
| 2 | +; RUN: llc -mtriple=aarch64 -mattr=+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED |
| 3 | +; RUN: llc -mtriple=aarch64 -mattr=+sve,+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED |
4 | 4 |
|
5 | 5 | define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) { |
6 | 6 | ; NEON-FIXED-LABEL: extract_last_i8: |
@@ -194,15 +194,115 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { |
194 | 194 | ret i64 %res |
195 | 195 | } |
196 | 196 |
|
| 197 | +define half @extract_last_half(<8 x half> %data, <8 x i16> %mask, half %passthru) { |
| 198 | +; NEON-FIXED-LABEL: extract_last_half: |
| 199 | +; NEON-FIXED: // %bb.0: |
| 200 | +; NEON-FIXED-NEXT: sub sp, sp, #16 |
| 201 | +; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 |
| 202 | +; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h |
| 203 | +; NEON-FIXED-NEXT: adrp x8, .LCPI4_0 |
| 204 | +; NEON-FIXED-NEXT: mov x9, sp |
| 205 | +; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI4_0] |
| 206 | +; NEON-FIXED-NEXT: str q0, [sp] |
| 207 | +; NEON-FIXED-NEXT: // kill: def $h2 killed $h2 def $s2 |
| 208 | +; NEON-FIXED-NEXT: xtn v3.8b, v1.8h |
| 209 | +; NEON-FIXED-NEXT: umaxv h1, v1.8h |
| 210 | +; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b |
| 211 | +; NEON-FIXED-NEXT: umaxv b3, v3.8b |
| 212 | +; NEON-FIXED-NEXT: fmov w8, s3 |
| 213 | +; NEON-FIXED-NEXT: bfi x9, x8, #1, #3 |
| 214 | +; NEON-FIXED-NEXT: fmov w8, s1 |
| 215 | +; NEON-FIXED-NEXT: ldr h0, [x9] |
| 216 | +; NEON-FIXED-NEXT: tst w8, #0x1 |
| 217 | +; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne |
| 218 | +; NEON-FIXED-NEXT: // kill: def $h0 killed $h0 killed $s0 |
| 219 | +; NEON-FIXED-NEXT: add sp, sp, #16 |
| 220 | +; NEON-FIXED-NEXT: ret |
| 221 | +; |
| 222 | +; SVE-FIXED-LABEL: extract_last_half: |
| 223 | +; SVE-FIXED: // %bb.0: |
| 224 | +; SVE-FIXED-NEXT: sub sp, sp, #16 |
| 225 | +; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 |
| 226 | +; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h |
| 227 | +; SVE-FIXED-NEXT: index z4.b, #0, #1 |
| 228 | +; SVE-FIXED-NEXT: mov x9, sp |
| 229 | +; SVE-FIXED-NEXT: str q0, [sp] |
| 230 | +; SVE-FIXED-NEXT: xtn v3.8b, v1.8h |
| 231 | +; SVE-FIXED-NEXT: umaxv h1, v1.8h |
| 232 | +; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b |
| 233 | +; SVE-FIXED-NEXT: umaxv b3, v3.8b |
| 234 | +; SVE-FIXED-NEXT: fmov w8, s3 |
| 235 | +; SVE-FIXED-NEXT: bfi x9, x8, #1, #3 |
| 236 | +; SVE-FIXED-NEXT: fmov w8, s1 |
| 237 | +; SVE-FIXED-NEXT: ldr h0, [x9] |
| 238 | +; SVE-FIXED-NEXT: tst w8, #0x1 |
| 239 | +; SVE-FIXED-NEXT: fcsel h0, h0, h2, ne |
| 240 | +; SVE-FIXED-NEXT: add sp, sp, #16 |
| 241 | +; SVE-FIXED-NEXT: ret |
| 242 | + %notzero = icmp ne <8 x i16> %mask, zeroinitializer |
| 243 | + %res = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> %data, <8 x i1> %notzero, half %passthru) |
| 244 | + ret half %res |
| 245 | +} |
| 246 | + |
| 247 | +define bfloat @extract_last_bfloat(<8 x bfloat> %data, <8 x i16> %mask, bfloat %passthru) { |
| 248 | +; NEON-FIXED-LABEL: extract_last_bfloat: |
| 249 | +; NEON-FIXED: // %bb.0: |
| 250 | +; NEON-FIXED-NEXT: sub sp, sp, #16 |
| 251 | +; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 |
| 252 | +; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h |
| 253 | +; NEON-FIXED-NEXT: adrp x8, .LCPI5_0 |
| 254 | +; NEON-FIXED-NEXT: mov x9, sp |
| 255 | +; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI5_0] |
| 256 | +; NEON-FIXED-NEXT: str q0, [sp] |
| 257 | +; NEON-FIXED-NEXT: // kill: def $h2 killed $h2 def $s2 |
| 258 | +; NEON-FIXED-NEXT: xtn v3.8b, v1.8h |
| 259 | +; NEON-FIXED-NEXT: umaxv h1, v1.8h |
| 260 | +; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b |
| 261 | +; NEON-FIXED-NEXT: umaxv b3, v3.8b |
| 262 | +; NEON-FIXED-NEXT: fmov w8, s3 |
| 263 | +; NEON-FIXED-NEXT: bfi x9, x8, #1, #3 |
| 264 | +; NEON-FIXED-NEXT: fmov w8, s1 |
| 265 | +; NEON-FIXED-NEXT: ldr h0, [x9] |
| 266 | +; NEON-FIXED-NEXT: tst w8, #0x1 |
| 267 | +; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne |
| 268 | +; NEON-FIXED-NEXT: // kill: def $h0 killed $h0 killed $s0 |
| 269 | +; NEON-FIXED-NEXT: add sp, sp, #16 |
| 270 | +; NEON-FIXED-NEXT: ret |
| 271 | +; |
| 272 | +; SVE-FIXED-LABEL: extract_last_bfloat: |
| 273 | +; SVE-FIXED: // %bb.0: |
| 274 | +; SVE-FIXED-NEXT: sub sp, sp, #16 |
| 275 | +; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 |
| 276 | +; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h |
| 277 | +; SVE-FIXED-NEXT: index z4.b, #0, #1 |
| 278 | +; SVE-FIXED-NEXT: mov x9, sp |
| 279 | +; SVE-FIXED-NEXT: str q0, [sp] |
| 280 | +; SVE-FIXED-NEXT: xtn v3.8b, v1.8h |
| 281 | +; SVE-FIXED-NEXT: umaxv h1, v1.8h |
| 282 | +; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b |
| 283 | +; SVE-FIXED-NEXT: umaxv b3, v3.8b |
| 284 | +; SVE-FIXED-NEXT: fmov w8, s3 |
| 285 | +; SVE-FIXED-NEXT: bfi x9, x8, #1, #3 |
| 286 | +; SVE-FIXED-NEXT: fmov w8, s1 |
| 287 | +; SVE-FIXED-NEXT: ldr h0, [x9] |
| 288 | +; SVE-FIXED-NEXT: tst w8, #0x1 |
| 289 | +; SVE-FIXED-NEXT: fcsel h0, h0, h2, ne |
| 290 | +; SVE-FIXED-NEXT: add sp, sp, #16 |
| 291 | +; SVE-FIXED-NEXT: ret |
| 292 | + %notzero = icmp ne <8 x i16> %mask, zeroinitializer |
| 293 | + %res = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> %data, <8 x i1> %notzero, bfloat %passthru) |
| 294 | + ret bfloat %res |
| 295 | +} |
| 296 | + |
197 | 297 | define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %passthru) { |
198 | 298 | ; NEON-FIXED-LABEL: extract_last_float: |
199 | 299 | ; NEON-FIXED: // %bb.0: |
200 | 300 | ; NEON-FIXED-NEXT: sub sp, sp, #16 |
201 | 301 | ; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 |
202 | 302 | ; NEON-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s |
203 | | -; NEON-FIXED-NEXT: adrp x8, .LCPI4_0 |
| 303 | +; NEON-FIXED-NEXT: adrp x8, .LCPI6_0 |
204 | 304 | ; NEON-FIXED-NEXT: mov x9, sp |
205 | | -; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI4_0] |
| 305 | +; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI6_0] |
206 | 306 | ; NEON-FIXED-NEXT: str q0, [sp] |
207 | 307 | ; NEON-FIXED-NEXT: xtn v3.4h, v1.4s |
208 | 308 | ; NEON-FIXED-NEXT: umaxv s1, v1.4s |
@@ -248,9 +348,9 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % |
248 | 348 | ; NEON-FIXED-NEXT: sub sp, sp, #16 |
249 | 349 | ; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 |
250 | 350 | ; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d |
251 | | -; NEON-FIXED-NEXT: adrp x8, .LCPI5_0 |
| 351 | +; NEON-FIXED-NEXT: adrp x8, .LCPI7_0 |
252 | 352 | ; NEON-FIXED-NEXT: mov x9, sp |
253 | | -; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI5_0] |
| 353 | +; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI7_0] |
254 | 354 | ; NEON-FIXED-NEXT: str q0, [sp] |
255 | 355 | ; NEON-FIXED-NEXT: xtn v3.2s, v1.2d |
256 | 356 | ; NEON-FIXED-NEXT: umaxv s1, v1.4s |
@@ -326,6 +426,36 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1 |
326 | 426 | ret i64 %res |
327 | 427 | } |
328 | 428 |
|
| 429 | +define half @extract_last_half_scalable(<vscale x 8 x half> %data, <vscale x 8 x i1> %mask, half %passthru) #0 { |
| 430 | +; CHECK-LABEL: extract_last_half_scalable: |
| 431 | +; CHECK: // %bb.0: |
| 432 | +; CHECK-NEXT: index z2.h, #0, #1 |
| 433 | +; CHECK-NEXT: lastb w8, p0, z2.h |
| 434 | +; CHECK-NEXT: whilels p1.h, xzr, x8 |
| 435 | +; CHECK-NEXT: lastb h0, p1, z0.h |
| 436 | +; CHECK-NEXT: ptrue p1.h |
| 437 | +; CHECK-NEXT: ptest p1, p0.b |
| 438 | +; CHECK-NEXT: fcsel h0, h0, h1, ne |
| 439 | +; CHECK-NEXT: ret |
| 440 | + %res = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %mask, half %passthru) |
| 441 | + ret half %res |
| 442 | +} |
| 443 | + |
| 444 | +define bfloat @extract_last_bfloat_scalable(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, bfloat %passthru) #0 { |
| 445 | +; CHECK-LABEL: extract_last_bfloat_scalable: |
| 446 | +; CHECK: // %bb.0: |
| 447 | +; CHECK-NEXT: index z2.h, #0, #1 |
| 448 | +; CHECK-NEXT: lastb w8, p0, z2.h |
| 449 | +; CHECK-NEXT: whilels p1.h, xzr, x8 |
| 450 | +; CHECK-NEXT: lastb h0, p1, z0.h |
| 451 | +; CHECK-NEXT: ptrue p1.h |
| 452 | +; CHECK-NEXT: ptest p1, p0.b |
| 453 | +; CHECK-NEXT: fcsel h0, h0, h1, ne |
| 454 | +; CHECK-NEXT: ret |
| 455 | + %res = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, bfloat %passthru) |
| 456 | + ret bfloat %res |
| 457 | +} |
| 458 | + |
329 | 459 | define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 { |
330 | 460 | ; CHECK-LABEL: extract_last_float_scalable: |
331 | 461 | ; CHECK: // %bb.0: |
@@ -374,12 +504,16 @@ declare i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8>, <16 x |
374 | 504 | declare i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16>, <8 x i1>, i16) |
375 | 505 | declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32>, <4 x i1>, i32) |
376 | 506 | declare i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64>, <2 x i1>, i64) |
| 507 | +declare half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half>, <8 x i1>, half) |
| 508 | +declare bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat>, <8 x i1>, bfloat) |
377 | 509 | declare float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float>, <4 x i1>, float) |
378 | 510 | declare double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double>, <2 x i1>, double) |
379 | 511 | declare i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8) |
380 | 512 | declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16) |
381 | 513 | declare i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) |
382 | 514 | declare i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64) |
| 515 | +declare half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half) |
| 516 | +declare bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat) |
383 | 517 | declare float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float) |
384 | 518 | declare double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double) |
385 | 519 | declare i1 @llvm.experimental.vector.extract.last.active.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i1) |
|
0 commit comments