Use rounding instructions on aarch64

TDecking · TDecking · commit c580ef061c06 · 2025-05-02T09:20:13.000+02:00
diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs
@@ -30,11 +30,156 @@ pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 {
     x
 }
 
+pub fn ceil(mut x: f64) -> f64 {
+    // SAFETY: `frintp` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintp {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn ceilf(mut x: f32) -> f32 {
+    // SAFETY: `frintp` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintp {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn ceilf16(mut x: f16) -> f16 {
+    // SAFETY: `frintp` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    unsafe {
+        asm!(
+            "frintp {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn floor(mut x: f64) -> f64 {
+    // SAFETY: `frintm` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintm {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn floorf(mut x: f32) -> f32 {
+    // SAFETY: `frintm` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintm {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn floorf16(mut x: f16) -> f16 {
+    // SAFETY: `frintm` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    unsafe {
+        asm!(
+            "frintm {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
 pub fn rint(mut x: f64) -> f64 {
+    // SAFETY: `frintx` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintx {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn rintf(mut x: f32) -> f32 {
+    // SAFETY: `frintx` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintx {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn rintf16(mut x: f16) -> f16 {
+    // SAFETY: `frintx` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    unsafe {
+        asm!(
+            "frintx {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn round(mut x: f64) -> f64 {
+    // SAFETY: `frinta` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frinta {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn roundf(mut x: f32) -> f32 {
+    // SAFETY: `frinta` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frinta {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn roundf16(mut x: f16) -> f16 {
+    // SAFETY: `frinta` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    unsafe {
+        asm!(
+            "frinta {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn roundeven(mut x: f64) -> f64 {
     // SAFETY: `frintn` is available with neon and has no side effects.
-    //
-    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
-    // not support rounding modes.
     unsafe {
         asm!(
             "frintn {x:d}, {x:d}",
@@ -45,11 +190,8 @@ pub fn rint(mut x: f64) -> f64 {
     x
 }
 
-pub fn rintf(mut x: f32) -> f32 {
+pub fn roundevenf(mut x: f32) -> f32 {
     // SAFETY: `frintn` is available with neon and has no side effects.
-    //
-    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
-    // not support rounding modes.
     unsafe {
         asm!(
             "frintn {x:s}, {x:s}",
@@ -61,11 +203,8 @@ pub fn rintf(mut x: f32) -> f32 {
 }
 
 #[cfg(all(f16_enabled, target_feature = "fp16"))]
-pub fn rintf16(mut x: f16) -> f16 {
+pub fn roundevenf16(mut x: f16) -> f16 {
     // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
-    //
-    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
-    // not support rounding modes.
     unsafe {
         asm!(
             "frintn {x:h}, {x:h}",
@@ -76,6 +215,43 @@ pub fn rintf16(mut x: f16) -> f16 {
     x
 }
 
+pub fn trunc(mut x: f64) -> f64 {
+    // SAFETY: `frintz` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintz {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn truncf(mut x: f32) -> f32 {
+    // SAFETY: `frintz` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "frintz {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn truncf16(mut x: f16) -> f16 {
+    // SAFETY: `frintz` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    unsafe {
+        asm!(
+            "frintz {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
 pub fn sqrt(mut x: f64) -> f64 {
     // SAFETY: `fsqrt` is available with neon and has no side effects.
     unsafe {
diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs
@@ -26,15 +26,30 @@ cfg_if! {
         pub use aarch64::{
             fma,
             fmaf,
+            ceil,
+            ceilf,
+            floor,
+            floorf,
+            round,
+            roundf,
             rint,
             rintf,
+            roundeven,
+            roundevenf,
+            trun,
+            truncf
             sqrt,
             sqrtf,
         };
 
         #[cfg(all(f16_enabled, target_feature = "fp16"))]
         pub use aarch64::{
+            ceilf16,
+            floorf16,
+            roundf16,
             rintf16,
+            roundevenf16,
+            truncf16
             sqrtf16,
         };
     }
diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs
@@ -4,6 +4,12 @@
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ceilf16(x: f16) -> f16 {
+    select_implementation! {
+        name: ceilf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     super::generic::ceil(x)
 }
 
@@ -14,7 +20,10 @@ pub fn ceilf16(x: f16) -> f16 {
 pub fn ceilf(x: f32) -> f32 {
     select_implementation! {
         name: ceilf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+        ),
         args: x,
     }
 
@@ -28,7 +37,10 @@ pub fn ceilf(x: f32) -> f32 {
 pub fn ceil(x: f64) -> f64 {
     select_implementation! {
         name: ceil,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+        ),
         use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
         args: x,
     }
diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs
@@ -4,6 +4,12 @@
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn floorf16(x: f16) -> f16 {
+    select_implementation! {
+        name: floorf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     return super::generic::floor(x);
 }
 
@@ -14,7 +20,10 @@ pub fn floorf16(x: f16) -> f16 {
 pub fn floor(x: f64) -> f64 {
     select_implementation! {
         name: floor,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+        ),
         use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
         args: x,
     }
@@ -29,7 +38,10 @@ pub fn floor(x: f64) -> f64 {
 pub fn floorf(x: f32) -> f32 {
     select_implementation! {
         name: floorf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+        ),
         args: x,
     }
 
diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs
@@ -2,18 +2,36 @@
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn roundf16(x: f16) -> f16 {
+    select_implementation! {
+        name: roundf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn roundf(x: f32) -> f32 {
+    select_implementation! {
+        name: roundf,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn round(x: f64) -> f64 {
+    select_implementation! {
+        name: round,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     super::generic::round(x)
 }
 
diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs
diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs