11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve - o - %s | FileCheck %s
2+ ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
33
44define <4 x i32 > @smull (<4 x i16 > %x , ptr %y ) {
55; CHECK-LABEL: smull:
422422 ret <4 x i32 > %r
423423}
424424
425+ ; We shouldn't sink without fullfp16.
426+ define <4 x half > @fmul_half (ptr %x , ptr %y ) {
427+ ; CHECK-LABEL: fmul_half:
428+ ; CHECK: // %bb.0: // %entry
429+ ; CHECK-NEXT: ld1r { v1.4h }, [x0]
430+ ; CHECK-NEXT: movi d0, #0000000000000000
431+ ; CHECK-NEXT: mov x8, xzr
432+ ; CHECK-NEXT: fcvtl v1.4s, v1.4h
433+ ; CHECK-NEXT: .LBB13_1: // %l1
434+ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
435+ ; CHECK-NEXT: ldr d2, [x1, x8]
436+ ; CHECK-NEXT: fcvtl v0.4s, v0.4h
437+ ; CHECK-NEXT: add x8, x8, #8
438+ ; CHECK-NEXT: cmp w8, #8
439+ ; CHECK-NEXT: fcvtl v2.4s, v2.4h
440+ ; CHECK-NEXT: fmul v2.4s, v2.4s, v1.4s
441+ ; CHECK-NEXT: fcvtn v2.4h, v2.4s
442+ ; CHECK-NEXT: fcvtl v2.4s, v2.4h
443+ ; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
444+ ; CHECK-NEXT: fcvtn v0.4h, v0.4s
445+ ; CHECK-NEXT: b.eq .LBB13_1
446+ ; CHECK-NEXT: // %bb.2: // %l2
447+ ; CHECK-NEXT: ret
448+ entry:
449+ %x.val = load half , ptr %x
450+ %x.ins = insertelement <4 x half > poison, half %x.val , i64 0
451+ %a = shufflevector <4 x half > %x.ins , <4 x half > undef , <4 x i32 > zeroinitializer
452+ br label %l1
453+
454+ l1:
455+ %p = phi i32 [ 0 , %entry ], [ %pa , %l1 ]
456+ %q = phi <4 x half > [ zeroinitializer , %entry ], [ %c , %l1 ]
457+ %idx.y = mul nuw nsw i32 %p , 4
458+ %ptr.y = getelementptr half , ptr %y , i32 %idx.y
459+ %l = load <4 x half >, ptr %ptr.y
460+ %b = fmul <4 x half > %l , %a
461+ %c = fadd <4 x half > %b , %q
462+ %pa = add i32 %p , 1
463+ %c1 = icmp eq i32 %p , 0
464+ br i1 %c1 , label %l1 , label %l2
465+
466+ l2:
467+ ret <4 x half > %c
468+ }
469+
470+ define <4 x half > @fmul_half_fullfp16 (ptr %x , ptr %y ) "target-features" ="+fullfp16" {
471+ ; CHECK-LABEL: fmul_half_fullfp16:
472+ ; CHECK: // %bb.0: // %entry
473+ ; CHECK-NEXT: movi d0, #0000000000000000
474+ ; CHECK-NEXT: ldr h1, [x0]
475+ ; CHECK-NEXT: mov x8, xzr
476+ ; CHECK-NEXT: .LBB14_1: // %l1
477+ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
478+ ; CHECK-NEXT: ldr d2, [x1, x8]
479+ ; CHECK-NEXT: add x8, x8, #8
480+ ; CHECK-NEXT: cmp w8, #8
481+ ; CHECK-NEXT: fmul v2.4h, v2.4h, v1.h[0]
482+ ; CHECK-NEXT: fadd v0.4h, v2.4h, v0.4h
483+ ; CHECK-NEXT: b.eq .LBB14_1
484+ ; CHECK-NEXT: // %bb.2: // %l2
485+ ; CHECK-NEXT: ret
486+ entry:
487+ %x.val = load half , ptr %x
488+ %x.ins = insertelement <4 x half > poison, half %x.val , i64 0
489+ %a = shufflevector <4 x half > %x.ins , <4 x half > undef , <4 x i32 > zeroinitializer
490+ br label %l1
491+
492+ l1:
493+ %p = phi i32 [ 0 , %entry ], [ %pa , %l1 ]
494+ %q = phi <4 x half > [ zeroinitializer , %entry ], [ %c , %l1 ]
495+ %idx.y = mul nuw nsw i32 %p , 4
496+ %ptr.y = getelementptr half , ptr %y , i32 %idx.y
497+ %l = load <4 x half >, ptr %ptr.y
498+ %b = fmul <4 x half > %l , %a
499+ %c = fadd <4 x half > %b , %q
500+ %pa = add i32 %p , 1
501+ %c1 = icmp eq i32 %p , 0
502+ br i1 %c1 , label %l1 , label %l2
503+
504+ l2:
505+ ret <4 x half > %c
506+ }
507+
425508; We shouldn't sink the splat operand for scalable vectors.
426- define <vscale x 4 x float > @fmul_scalable (ptr %x , ptr %y ) {
509+ define <vscale x 4 x float > @fmul_scalable (ptr %x , ptr %y ) "target-features" = "+sve" {
427510; CHECK-LABEL: fmul_scalable:
428511; CHECK: // %bb.0: // %entry
429512; CHECK-NEXT: ptrue p0.s
@@ -433,14 +516,14 @@ define <vscale x 4 x float> @fmul_scalable(ptr %x, ptr %y) {
433516; CHECK-NEXT: mov w9, #1 // =0x1
434517; CHECK-NEXT: ld1rw { z1.s }, p0/z, [x0]
435518; CHECK-NEXT: lsl x8, x8, #2
436- ; CHECK-NEXT: .LBB13_1 : // %l1
519+ ; CHECK-NEXT: .LBB15_1 : // %l1
437520; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
438521; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
439522; CHECK-NEXT: subs w9, w9, #1
440523; CHECK-NEXT: add x1, x1, x8
441524; CHECK-NEXT: fmul z2.s, z2.s, z1.s
442525; CHECK-NEXT: fadd z0.s, z2.s, z0.s
443- ; CHECK-NEXT: b.eq .LBB13_1
526+ ; CHECK-NEXT: b.eq .LBB15_1
444527; CHECK-NEXT: // %bb.2: // %l2
445528; CHECK-NEXT: ret
446529entry:
0 commit comments