Skip to content

Commit 6b9c2e2

Browse files
committed
Add scalable test
1 parent 3686f68 commit 6b9c2e2

File tree

1 file changed

+47
-1
lines changed

1 file changed

+47
-1
lines changed

llvm/test/CodeGen/AArch64/sinksplat.ll

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -o - %s | FileCheck %s
33

44
define <4 x i32> @smull(<4 x i16> %x, ptr %y) {
55
; CHECK-LABEL: smull:
@@ -422,6 +422,52 @@ l2:
422422
ret <4 x i32> %r
423423
}
424424

425+
; We shouldn't sink the splat operand for scalable vectors.
426+
define <vscale x 4 x float> @fmul_scalable(ptr %x, ptr %y) {
427+
; CHECK-LABEL: fmul_scalable:
428+
; CHECK: // %bb.0: // %entry
429+
; CHECK-NEXT: rdvl x8, #1
430+
; CHECK-NEXT: ldr s1, [x0]
431+
; CHECK-NEXT: mov z0.s, #0 // =0x0
432+
; CHECK-NEXT: sxtw x8, w8
433+
; CHECK-NEXT: ptrue p0.s
434+
; CHECK-NEXT: mov w9, #1 // =0x1
435+
; CHECK-NEXT: mov z1.s, s1
436+
; CHECK-NEXT: lsl x8, x8, #2
437+
; CHECK-NEXT: .LBB13_1: // %l1
438+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
439+
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
440+
; CHECK-NEXT: subs w9, w9, #1
441+
; CHECK-NEXT: add x1, x1, x8
442+
; CHECK-NEXT: fmul z2.s, z2.s, z1.s
443+
; CHECK-NEXT: fadd z0.s, z2.s, z0.s
444+
; CHECK-NEXT: b.eq .LBB13_1
445+
; CHECK-NEXT: // %bb.2: // %l2
446+
; CHECK-NEXT: ret
447+
entry:
448+
%x.val = load float, ptr %x
449+
%x.ins = insertelement <vscale x 4 x float> poison, float %x.val, i64 0
450+
%a = shufflevector <vscale x 4 x float> %x.ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
451+
%33 = tail call i32 @llvm.vscale.i32()
452+
%34 = shl nuw nsw i32 %33, 4
453+
br label %l1
454+
455+
l1:
456+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
457+
%q = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %c, %l1 ]
458+
%idx.y = mul nuw nsw i32 %p, %34
459+
%ptr.y = getelementptr float, ptr %y, i32 %idx.y
460+
%l = load <vscale x 4 x float>, ptr %ptr.y
461+
%b = fmul <vscale x 4 x float> %l, %a
462+
%c = fadd <vscale x 4 x float> %b, %q
463+
%pa = add i32 %p, 1
464+
%c1 = icmp eq i32 %p, 0
465+
br i1 %c1, label %l1, label %l2
466+
467+
l2:
468+
ret <vscale x 4 x float> %c
469+
}
470+
425471

426472
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
427473
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)

0 commit comments

Comments
 (0)