From b9234d8d7b2070cf3a680c8c9f074528416639fc Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:41:36 +0300 Subject: [PATCH 1/3] implement libm::exp and its variants for i586 with inline assembly to fix precision issues --- libm-test/src/precision.rs | 12 +++++++-- libm/src/math/arch/i586.rs | 53 ++++++++++++++++++++++++++++++++++++++ libm/src/math/arch/mod.rs | 5 ++++ libm/src/math/exp.rs | 6 +++++ libm/src/math/exp10.rs | 6 +++++ libm/src/math/exp10f.rs | 6 +++++ libm/src/math/exp2.rs | 6 +++++ libm/src/math/exp2f.rs | 6 +++++ libm/src/math/expf.rs | 6 +++++ 9 files changed, 104 insertions(+), 2 deletions(-) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index c441922d3..ceaeeb3ac 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -83,6 +83,16 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Bn::Tgamma => 20, }; + // These have a separate implementation on i586 + if cfg!(x86_no_sse) { + match ctx.base_name { + Bn::Exp => ulp = 1, + Bn::Exp2 => ulp = 1, + Bn::Exp10 => ulp = 1, + _ => (), + } + } + // There are some cases where musl's approximation is less accurate than ours. For these // cases, increase the ULP. if ctx.basis == Musl { @@ -124,8 +134,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Id::Asinh => ulp = 3, Id::Asinhf => ulp = 3, Id::Cbrt => ulp = 1, - Id::Exp10 | Id::Exp10f => ulp = 1_000_000, - Id::Exp2 | Id::Exp2f => ulp = 10_000_000, Id::Log1p | Id::Log1pf => ulp = 2, Id::Tan => ulp = 2, _ => (), diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs index b9a667620..982251ae0 100644 --- a/libm/src/math/arch/i586.rs +++ b/libm/src/math/arch/i586.rs @@ -60,3 +60,56 @@ pub fn floor(mut x: f64) -> f64 { } x } + +macro_rules! x87exp { + ($float_ty:ident, $word_size:literal, $fn_name:ident, $load_op:literal) => { + pub fn $fn_name(mut x: $float_ty) -> $float_ty { unsafe { + core::arch::asm!( + // Prepare the register stack as + // ``` + // st(0) = y = x*log2(base) + // st(1) = 1.0 + // st(2) = round(y) + // ``` + concat!($load_op, " ", $word_size, " ptr [{x}]"), + "fld1", + "fld st(1)", + "frndint", + "fxch st(2)", + + // Compare y with round(y) to determine if y is finite and + // not an integer. If so, compute `exp2(y - round(y))` into + // st(1). Otherwise skip ahead with `st(1) = 1.0` + "fucom st(2)", + "fstsw ax", + "test ax, 0x4000", + "jnz 2f", + "fsub st(0), st(2)", // st(0) = y - round(y) + "f2xm1", // st(0) = 2^st(0) - 1.0 + "fadd st(1), st(0)", // st(1) = 1 + st(0) = exp2(y - round(y)) + "2:", + + // Finally, scale by `exp2(round(y))` and clear the stack. + "fstp st(0)", + "fscale", + concat!("fstp ", $word_size, " ptr [{x}]"), + "fstp st(0)", + x = in(reg) &mut x, + out("ax") _, + out("st(0)") _, out("st(1)") _, + out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, + out("st(6)") _, out("st(7)") _, + options(nostack), + ); + x + }} + }; +} + +x87exp!(f32, "dword", x87_exp2f, "fld"); +x87exp!(f64, "qword", x87_exp2, "fld"); +x87exp!(f32, "dword", x87_exp10f, "fldl2t\nfmul"); +x87exp!(f64, "qword", x87_exp10, "fldl2t\nfmul"); +x87exp!(f32, "dword", x87_expf, "fldl2e\nfmul"); +x87exp!(f64, "qword", x87_exp, "fldl2e\nfmul"); diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index 984ae7f31..ba859c679 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -48,3 +48,8 @@ cfg_if! { pub use i586::{ceil, floor}; } } +cfg_if! { + if #[cfg(x86_no_sse)] { + pub use i586::{x87_exp10f, x87_exp10, x87_expf, x87_exp, x87_exp2f, x87_exp2}; + } +} diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index 78ce5dd13..cb939ad5d 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -83,6 +83,12 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). #[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp(mut x: f64) -> f64 { + select_implementation! { + name: x87_exp, + use_arch_required: x86_no_sse, + args: x, + } + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs index 1f49f5e96..e0af1945b 100644 --- a/libm/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -9,6 +9,12 @@ const P10: &[f64] = &[ /// Calculates 10 raised to the power of `x` (f64). #[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp10(x: f64) -> f64 { + select_implementation! { + name: x87_exp10, + use_arch_required: x86_no_sse, + args: x, + } + let (mut y, n) = modf(x); let u: u64 = n.to_bits(); /* fabs(n) < 16 without raising invalid on nan */ diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index 22a264211..f0a311c2d 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -9,6 +9,12 @@ const P10: &[f32] = &[ /// Calculates 10 raised to the power of `x` (f32). #[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { + select_implementation! { + name: x87_exp10f, + use_arch_required: x86_no_sse, + args: x, + } + let (mut y, n) = modff(x); let u = n.to_bits(); /* fabsf(n) < 8 without raising invalid on nan */ diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 6e4cbc29d..08b71587f 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -324,6 +324,12 @@ static TBL: [u64; TBLSIZE * 2] = [ /// Calculate `2^x`, that is, 2 raised to the power `x`. #[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp2(mut x: f64) -> f64 { + select_implementation! { + name: x87_exp2, + use_arch_required: x86_no_sse, + args: x, + } + let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; let p1 = f64::from_bits(0x3fe62e42fefa39ef); let p2 = f64::from_bits(0x3fcebfbdff82c575); diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index 733d2f1a8..ceff6822c 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -75,6 +75,12 @@ static EXP2FT: [u64; TBLSIZE] = [ /// Calculate `2^x`, that is, 2 raised to the power `x`. #[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp2f(mut x: f32) -> f32 { + select_implementation! { + name: x87_exp2f, + use_arch_required: x86_no_sse, + args: x, + } + let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; let p1 = f32::from_bits(0x3f317218); let p2 = f32::from_bits(0x3e75fdf0); diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index dbbfdbba9..5541ab79a 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -32,6 +32,12 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). #[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn expf(mut x: f32) -> f32 { + select_implementation! { + name: x87_expf, + use_arch_required: x86_no_sse, + args: x, + } + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ let mut hx = x.to_bits(); From 6932f3f12cb92966cd24c193f79dee155fe26e99 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Sat, 27 Sep 2025 03:07:25 +0300 Subject: [PATCH 2/3] remove overrides to actually check the precision --- libm-test/src/precision.rs | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index ceaeeb3ac..283718a78 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -85,10 +85,13 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // These have a separate implementation on i586 if cfg!(x86_no_sse) { - match ctx.base_name { - Bn::Exp => ulp = 1, - Bn::Exp2 => ulp = 1, - Bn::Exp10 => ulp = 1, + match ctx.fn_ident { + Id::Exp => ulp = 1, + Id::Exp2 => ulp = 1, + Id::Exp10 => ulp = 1, + Id::Expf => ulp = 0, + Id::Exp2f => ulp = 0, + Id::Exp10f => ulp = 0, _ => (), } } @@ -226,15 +229,6 @@ impl MaybeOverride<(f32,)> for SpecialCase { return XFAIL("expm1 representable numbers"); } - if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Exp2 - && !expected.is_infinite() - && actual.is_infinite() - { - // We return infinity when there is a representable value. Test input: 127.97238 - return XFAIL("586 exp2 representable numbers"); - } - if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { // we return some NaN that should be real values or infinite if ctx.basis == CheckBasis::Musl { @@ -286,14 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase { return XFAIL("i586 rint rounding mode"); } - if cfg!(x86_no_sse) - && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) - { - // FIXME: i586 has very imprecise results with ULP > u32::MAX for these - // operations so we can't reasonably provide a limit. - return XFAIL_NOCHECK; - } - if ctx.base_name == BaseName::J0 && input.0 < -1e300 { // Errors get huge close to -inf return XFAIL_NOCHECK; From 984c6e15241f871e2b5b6139ca4e79e3e2b14ad5 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Sat, 27 Sep 2025 03:35:34 +0300 Subject: [PATCH 3/3] allow musl to have its slight error again --- libm-test/src/precision.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index 283718a78..968ac7170 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -111,6 +111,8 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Id::Cbrt => ulp = 2, // FIXME(#401): musl has an incorrect result here. Id::Fdim => ulp = 2, + Id::Exp2f => ulp = 1, + Id::Expf => ulp = 1, Id::Sincosf => ulp = 500, Id::Tgamma => ulp = 20, _ => (),