implement floor and ceil in assembly on i586

folkertdev · folkertdev · commit b2024742f68d · 2025-07-13T13:28:02.000+02:00
diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs
@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
 
 impl MaybeOverride<(f64,)> for SpecialCase {
     fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
-        if cfg!(x86_no_sse)
-            && ctx.base_name == BaseName::Ceil
-            && ctx.basis == CheckBasis::Musl
-            && input.0 < 0.0
-            && input.0 > -1.0
-            && expected == F::ZERO
-            && actual == F::ZERO
-        {
-            // musl returns -0.0, we return +0.0
-            return XFAIL("i586 ceil signed zero");
-        }
-
         if cfg!(x86_no_sse)
             && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
             && (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             return XFAIL("i586 rint rounding mode");
         }
 
-        if cfg!(x86_no_sse)
-            && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
-            && expected.eq_repr(F::NEG_ZERO)
-            && actual.eq_repr(F::ZERO)
-        {
-            // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
-            // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
-            return XFAIL("i586 ceil/floor signed zero");
-        }
-
         if cfg!(x86_no_sse)
             && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
         {
diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs
@@ -1,37 +1,63 @@
 //! Architecture-specific support for x86-32 without SSE2
 
-use super::super::fabs;
-
 /// Use an alternative implementation on x86, because the
 /// main implementation fails with the x87 FPU used by
 /// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn ceil(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated < x {
-            return truncated + 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
-    }
+/// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_ceil.S
+#[unsafe(naked)]
+extern "C" fn ceil(_: f64) -> f64 {
+    core::arch::naked_asm!(
+        "pushl  %ebp",
+        "movl   %esp,%ebp",
+        "subl   $8,%esp",
+        // Store fpu control word.
+        "fstcw   -4(%ebp)",
+        "movw    -4(%ebp),%dx",
+        // Round towards +oo.
+        "orw $0x0800,%dx",
+        "andw    $0xfbff,%dx",
+        "movw    %dx,-8(%ebp)",
+        // Load modified control word
+        "fldcw   -8(%ebp)",
+        // Round.
+        "fldl    8(%ebp)",
+        "frndint",
+        // Restore original control word.
+        "fldcw   -4(%ebp)",
+        //
+        "leave",
+        "ret",
+        options(att_syntax)
+    )
 }
 
 /// Use an alternative implementation on x86, because the
 /// main implementation fails with the x87 FPU used by
 /// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn floor(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated > x {
-            return truncated - 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
-    }
+/// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_floor.S
+#[unsafe(naked)]
+extern "C" fn floor(_: f64) -> f64 {
+    core::arch::naked_asm!(
+        "pushl  %ebp",
+        "movl   %esp,%ebp",
+        "subl   $8,%esp",
+        // Store fpu control word.
+        "fstcw   -4(%ebp)",
+        "movw    -4(%ebp),%dx",
+        // Round towards -oo.
+        "orw	$0x0400,%dx",
+        "andw	$0xf7ff,%dx",
+        "movw   %dx,-8(%ebp)",
+        // Load modified control word
+        "fldcw   -8(%ebp)",
+        // Round.
+        "fldl    8(%ebp)",
+        "frndint",
+        // Restore original control word.
+        "fldcw   -4(%ebp)",
+        //
+        "leave",
+        "ret",
+        options(att_syntax)
+    )
 }