@@ -424,21 +424,74 @@ entry:
424424 ret <2 x double > %0
425425}
426426
427- declare <4 x float > @llvm.ceil.v4f32 (<4 x float >)
428- declare <4 x float > @llvm.trunc.v4f32 (<4 x float >)
429- declare <4 x float > @llvm.rint.v4f32 (<4 x float >)
430- declare <4 x float > @llvm.nearbyint.v4f32 (<4 x float >)
431- declare <4 x float > @llvm.floor.v4f32 (<4 x float >)
432- declare <4 x float > @llvm.round.v4f32 (<4 x float >)
433- declare <8 x half > @llvm.ceil.v8f16 (<8 x half >)
434- declare <8 x half > @llvm.trunc.v8f16 (<8 x half >)
435- declare <8 x half > @llvm.rint.v8f16 (<8 x half >)
436- declare <8 x half > @llvm.nearbyint.v8f16 (<8 x half >)
437- declare <8 x half > @llvm.floor.v8f16 (<8 x half >)
438- declare <8 x half > @llvm.round.v8f16 (<8 x half >)
439- declare <2 x double > @llvm.ceil.v2f64 (<2 x double >)
440- declare <2 x double > @llvm.trunc.v2f64 (<2 x double >)
441- declare <2 x double > @llvm.rint.v2f64 (<2 x double >)
442- declare <2 x double > @llvm.nearbyint.v2f64 (<2 x double >)
443- declare <2 x double > @llvm.floor.v2f64 (<2 x double >)
444- declare <2 x double > @llvm.round.v2f64 (<2 x double >)
427+ define arm_aapcs_vfpcc <4 x float > @froundeven_float32_t (<4 x float > %src ) {
428+ ; CHECK-MVE-LABEL: froundeven_float32_t:
429+ ; CHECK-MVE: @ %bb.0: @ %entry
430+ ; CHECK-MVE-NEXT: vrintn.f32 s3, s3
431+ ; CHECK-MVE-NEXT: vrintn.f32 s2, s2
432+ ; CHECK-MVE-NEXT: vrintn.f32 s1, s1
433+ ; CHECK-MVE-NEXT: vrintn.f32 s0, s0
434+ ; CHECK-MVE-NEXT: bx lr
435+ ;
436+ ; CHECK-MVEFP-LABEL: froundeven_float32_t:
437+ ; CHECK-MVEFP: @ %bb.0: @ %entry
438+ ; CHECK-MVEFP-NEXT: vrintn.f32 q0, q0
439+ ; CHECK-MVEFP-NEXT: bx lr
440+ entry:
441+ %0 = call fast <4 x float > @llvm.roundeven.v4f32 (<4 x float > %src )
442+ ret <4 x float > %0
443+ }
444+
445+ define arm_aapcs_vfpcc <8 x half > @froundeven_float16_t (<8 x half > %src ) {
446+ ; CHECK-MVE-LABEL: froundeven_float16_t:
447+ ; CHECK-MVE: @ %bb.0: @ %entry
448+ ; CHECK-MVE-NEXT: vmovx.f16 s4, s0
449+ ; CHECK-MVE-NEXT: vrintn.f16 s0, s0
450+ ; CHECK-MVE-NEXT: vrintn.f16 s4, s4
451+ ; CHECK-MVE-NEXT: vins.f16 s0, s4
452+ ; CHECK-MVE-NEXT: vmovx.f16 s4, s1
453+ ; CHECK-MVE-NEXT: vrintn.f16 s4, s4
454+ ; CHECK-MVE-NEXT: vrintn.f16 s1, s1
455+ ; CHECK-MVE-NEXT: vins.f16 s1, s4
456+ ; CHECK-MVE-NEXT: vmovx.f16 s4, s2
457+ ; CHECK-MVE-NEXT: vrintn.f16 s4, s4
458+ ; CHECK-MVE-NEXT: vrintn.f16 s2, s2
459+ ; CHECK-MVE-NEXT: vins.f16 s2, s4
460+ ; CHECK-MVE-NEXT: vmovx.f16 s4, s3
461+ ; CHECK-MVE-NEXT: vrintn.f16 s4, s4
462+ ; CHECK-MVE-NEXT: vrintn.f16 s3, s3
463+ ; CHECK-MVE-NEXT: vins.f16 s3, s4
464+ ; CHECK-MVE-NEXT: bx lr
465+ ;
466+ ; CHECK-MVEFP-LABEL: froundeven_float16_t:
467+ ; CHECK-MVEFP: @ %bb.0: @ %entry
468+ ; CHECK-MVEFP-NEXT: vrintn.f16 q0, q0
469+ ; CHECK-MVEFP-NEXT: bx lr
470+ entry:
471+ %0 = call fast <8 x half > @llvm.roundeven.v8f16 (<8 x half > %src )
472+ ret <8 x half > %0
473+ }
474+
475+ define arm_aapcs_vfpcc <2 x double > @froundeven_float64_t (<2 x double > %src ) {
476+ ; CHECK-LABEL: froundeven_float64_t:
477+ ; CHECK: @ %bb.0: @ %entry
478+ ; CHECK-NEXT: .save {r7, lr}
479+ ; CHECK-NEXT: push {r7, lr}
480+ ; CHECK-NEXT: .vsave {d8, d9}
481+ ; CHECK-NEXT: vpush {d8, d9}
482+ ; CHECK-NEXT: vmov q4, q0
483+ ; CHECK-NEXT: vmov r0, r1, d9
484+ ; CHECK-NEXT: bl roundeven
485+ ; CHECK-NEXT: vmov r2, r3, d8
486+ ; CHECK-NEXT: vmov d9, r0, r1
487+ ; CHECK-NEXT: mov r0, r2
488+ ; CHECK-NEXT: mov r1, r3
489+ ; CHECK-NEXT: bl roundeven
490+ ; CHECK-NEXT: vmov d8, r0, r1
491+ ; CHECK-NEXT: vmov q0, q4
492+ ; CHECK-NEXT: vpop {d8, d9}
493+ ; CHECK-NEXT: pop {r7, pc}
494+ entry:
495+ %0 = call fast <2 x double > @llvm.roundeven.v2f64 (<2 x double > %src )
496+ ret <2 x double > %0
497+ }
0 commit comments