|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | | -; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s |
| 2 | +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -o - %s | FileCheck %s |
3 | 3 |
|
4 | 4 | define <4 x i32> @smull(<4 x i16> %x, ptr %y) { |
5 | 5 | ; CHECK-LABEL: smull: |
|
422 | 422 | ret <4 x i32> %r |
423 | 423 | } |
424 | 424 |
|
| 425 | +; We shouldn't sink the splat operand for scalable vectors. |
| 426 | +define <vscale x 4 x float> @fmul_scalable(ptr %x, ptr %y) { |
| 427 | +; CHECK-LABEL: fmul_scalable: |
| 428 | +; CHECK: // %bb.0: // %entry |
| 429 | +; CHECK-NEXT: rdvl x8, #1 |
| 430 | +; CHECK-NEXT: ldr s1, [x0] |
| 431 | +; CHECK-NEXT: mov z0.s, #0 // =0x0 |
| 432 | +; CHECK-NEXT: sxtw x8, w8 |
| 433 | +; CHECK-NEXT: ptrue p0.s |
| 434 | +; CHECK-NEXT: mov w9, #1 // =0x1 |
| 435 | +; CHECK-NEXT: mov z1.s, s1 |
| 436 | +; CHECK-NEXT: lsl x8, x8, #2 |
| 437 | +; CHECK-NEXT: .LBB13_1: // %l1 |
| 438 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 439 | +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] |
| 440 | +; CHECK-NEXT: subs w9, w9, #1 |
| 441 | +; CHECK-NEXT: add x1, x1, x8 |
| 442 | +; CHECK-NEXT: fmul z2.s, z2.s, z1.s |
| 443 | +; CHECK-NEXT: fadd z0.s, z2.s, z0.s |
| 444 | +; CHECK-NEXT: b.eq .LBB13_1 |
| 445 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 446 | +; CHECK-NEXT: ret |
| 447 | +entry: |
| 448 | + %x.val = load float, ptr %x |
| 449 | + %x.ins = insertelement <vscale x 4 x float> poison, float %x.val, i64 0 |
| 450 | + %a = shufflevector <vscale x 4 x float> %x.ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer |
| 451 | + %33 = tail call i32 @llvm.vscale.i32() |
| 452 | + %34 = shl nuw nsw i32 %33, 4 |
| 453 | + br label %l1 |
| 454 | + |
| 455 | +l1: |
| 456 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 457 | + %q = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 458 | + %idx.y = mul nuw nsw i32 %p, %34 |
| 459 | + %ptr.y = getelementptr float, ptr %y, i32 %idx.y |
| 460 | + %l = load <vscale x 4 x float>, ptr %ptr.y |
| 461 | + %b = fmul <vscale x 4 x float> %l, %a |
| 462 | + %c = fadd <vscale x 4 x float> %b, %q |
| 463 | + %pa = add i32 %p, 1 |
| 464 | + %c1 = icmp eq i32 %p, 0 |
| 465 | + br i1 %c1, label %l1, label %l2 |
| 466 | + |
| 467 | +l2: |
| 468 | + ret <vscale x 4 x float> %c |
| 469 | +} |
| 470 | + |
425 | 471 |
|
426 | 472 | declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) |
427 | 473 | declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) |
|
0 commit comments