Skip to content

Commit 399db7e

Browse files
committed
Make a subset of libm symbols weakly available on all platforms
018616e ("Always have math functions but with `weak` linking attribute if we can") made all math symbols available on platforms that support weak linkage. This caused some unexpected regressions, however, because our less accurate and sometimes slow routines were being selected over the system `libm`, which also tends to be weak [1]. Thus, 0fab77e ("Don't include `math` for `unix` and `wasi` targets") was applied to undo these changes on many platforms. Now that some improvements have been made to `libm`, add back a subset of these functions: * cbrt * ceil * copysign * fabs * fdim * floor * fma * fmax * fmaximum * fmin * fminimum * fmod * rint * round * roundeven * sqrt * trunc This list includes only functions that produce exact results (verified with exhaustive / extensive tests, and also required by IEEE in most cases), and for which benchmarks indicate performance similar to or better than Musl's soft float math routines [^1]. All except `cbrt` also have `f16` and `f128` implementations. Once more routines meet these criteria, we can move them from platform-specific availability to always available. Once this change makes it to rust-lang/rust, we will also be able to move the relevant functions from `std` to `core`. [^1]: We still rely on the backend to provide optimized assmebly routines when available. [1]: rust-lang/rust#128386
1 parent ddd1a09 commit 399db7e

File tree

2 files changed

+172
-103
lines changed

2 files changed

+172
-103
lines changed

src/lib.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,6 @@ mod macros;
4040

4141
pub mod float;
4242
pub mod int;
43-
44-
// Disable for any of the following:
45-
// - x86 without sse2 due to ABI issues
46-
// - <https://github.com/rust-lang/rust/issues/114479>
47-
// - but exclude UEFI since it is a soft-float target
48-
// - <https://github.com/rust-lang/rust/issues/128533>
49-
// - All unix targets (linux, macos, freebsd, android, etc)
50-
// - wasm with known target_os
51-
#[cfg(not(any(
52-
all(
53-
target_arch = "x86",
54-
not(target_feature = "sse2"),
55-
not(target_os = "uefi"),
56-
),
57-
unix,
58-
all(target_family = "wasm", not(target_os = "unknown"))
59-
)))]
6043
pub mod math;
6144
pub mod mem;
6245

src/math.rs

Lines changed: 172 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -6,109 +6,195 @@
66
mod libm;
77

88
#[allow(unused_macros)]
9-
macro_rules! no_mangle {
9+
macro_rules! libm_intrinsics {
1010
($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
1111
intrinsics! {
1212
$(
1313
pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
14-
self::libm::$fun($($iid),+)
14+
$crate::math::libm::$fun($($iid),+)
1515
}
1616
)+
1717
}
1818
}
1919
}
2020

21-
#[cfg(not(windows))]
22-
no_mangle! {
23-
fn acos(x: f64) -> f64;
24-
fn asin(x: f64) -> f64;
25-
fn cbrt(x: f64) -> f64;
26-
fn expm1(x: f64) -> f64;
27-
fn hypot(x: f64, y: f64) -> f64;
28-
fn tan(x: f64) -> f64;
29-
fn cos(x: f64) -> f64;
30-
fn expf(x: f32) -> f32;
31-
fn log2(x: f64) -> f64;
32-
fn log2f(x: f32) -> f32;
33-
fn log10(x: f64) -> f64;
34-
fn log10f(x: f32) -> f32;
35-
fn log(x: f64) -> f64;
36-
fn logf(x: f32) -> f32;
37-
fn round(x: f64) -> f64;
38-
fn roundf(x: f32) -> f32;
39-
fn rint(x: f64) -> f64;
40-
fn rintf(x: f32) -> f32;
41-
fn sin(x: f64) -> f64;
42-
fn pow(x: f64, y: f64) -> f64;
43-
fn powf(x: f32, y: f32) -> f32;
44-
fn acosf(n: f32) -> f32;
45-
fn atan2f(a: f32, b: f32) -> f32;
46-
fn atanf(n: f32) -> f32;
47-
fn coshf(n: f32) -> f32;
48-
fn expm1f(n: f32) -> f32;
49-
fn fdim(a: f64, b: f64) -> f64;
50-
fn fdimf(a: f32, b: f32) -> f32;
51-
fn log1pf(n: f32) -> f32;
52-
fn sinhf(n: f32) -> f32;
53-
fn tanhf(n: f32) -> f32;
54-
fn ldexp(f: f64, n: i32) -> f64;
55-
fn ldexpf(f: f32, n: i32) -> f32;
56-
fn tgamma(x: f64) -> f64;
57-
fn tgammaf(x: f32) -> f32;
58-
fn atan(x: f64) -> f64;
59-
fn atan2(x: f64, y: f64) -> f64;
60-
fn cosh(x: f64) -> f64;
61-
fn log1p(x: f64) -> f64;
62-
fn sinh(x: f64) -> f64;
63-
fn tanh(x: f64) -> f64;
64-
fn cosf(x: f32) -> f32;
65-
fn exp(x: f64) -> f64;
66-
fn sinf(x: f32) -> f32;
67-
fn exp2(x: f64) -> f64;
68-
fn exp2f(x: f32) -> f32;
69-
fn fma(x: f64, y: f64, z: f64) -> f64;
70-
fn fmaf(x: f32, y: f32, z: f32) -> f32;
71-
fn asinf(n: f32) -> f32;
72-
fn cbrtf(n: f32) -> f32;
73-
fn hypotf(x: f32, y: f32) -> f32;
74-
fn tanf(n: f32) -> f32;
21+
/// This set of functions is well tested in `libm` and known to provide similar performance to
22+
/// system `libm`, as well as the same or better accuracy.
23+
mod full_availability {
24+
#[cfg(f16_enabled)]
25+
libm_intrinsics! {
26+
fn ceilf16(x: f16) -> f16;
27+
fn copysignf16(x: f16, y: f16) -> f16;
28+
fn fabsf16(x: f16) -> f16;
29+
fn fdimf16(x: f16, y: f16) -> f16;
30+
fn floorf16(x: f16) -> f16;
31+
fn fmaxf16(x: f16, y: f16) -> f16;
32+
fn fmaximumf16(x: f16, y: f16) -> f16;
33+
fn fminf16(x: f16, y: f16) -> f16;
34+
fn fminimumf16(x: f16, y: f16) -> f16;
35+
fn fmodf16(x: f16, y: f16) -> f16;
36+
fn rintf16(x: f16) -> f16;
37+
fn roundevenf16(x: f16) -> f16;
38+
fn roundf16(x: f16) -> f16;
39+
fn sqrtf16(x: f16) -> f16;
40+
fn truncf16(x: f16) -> f16;
41+
}
42+
43+
/* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
44+
* the system libc provides in order to avoid conflicts. */
7545

76-
fn sqrtf(x: f32) -> f32;
77-
fn sqrt(x: f64) -> f64;
46+
#[cfg(all(not(windows), not(target_vendor = "apple")))]
47+
libm_intrinsics! {
48+
/* f32 */
49+
fn cbrtf(n: f32) -> f32;
50+
fn ceilf(x: f32) -> f32;
51+
fn copysignf(x: f32, y: f32) -> f32;
52+
fn fabsf(x: f32) -> f32;
53+
fn fdimf(a: f32, b: f32) -> f32;
54+
fn floorf(x: f32) -> f32;
55+
fn fmaf(x: f32, y: f32, z: f32) -> f32;
56+
fn fmaxf(x: f32, y: f32) -> f32;
57+
fn fminf(x: f32, y: f32) -> f32;
58+
fn fmodf(x: f32, y: f32) -> f32;
59+
fn rintf(x: f32) -> f32;
60+
fn roundf(x: f32) -> f32;
61+
fn sqrtf(x: f32) -> f32;
62+
fn truncf(x: f32) -> f32;
7863

79-
fn ceil(x: f64) -> f64;
80-
fn ceilf(x: f32) -> f32;
81-
fn floor(x: f64) -> f64;
82-
fn floorf(x: f32) -> f32;
83-
fn trunc(x: f64) -> f64;
84-
fn truncf(x: f32) -> f32;
64+
/* f64 */
65+
fn cbrt(x: f64) -> f64;
66+
fn ceil(x: f64) -> f64;
67+
fn copysign(x: f64, y: f64) -> f64;
68+
fn fabs(x: f64) -> f64;
69+
fn fdim(a: f64, b: f64) -> f64;
70+
fn floor(x: f64) -> f64;
71+
fn fma(x: f64, y: f64, z: f64) -> f64;
72+
fn fmax(x: f64, y: f64) -> f64;
73+
fn fmin(x: f64, y: f64) -> f64;
74+
fn fmod(x: f64, y: f64) -> f64;
75+
fn rint(x: f64) -> f64;
76+
fn round(x: f64) -> f64;
77+
fn sqrt(x: f64) -> f64;
78+
fn trunc(x: f64) -> f64;
79+
}
8580

86-
fn fmin(x: f64, y: f64) -> f64;
87-
fn fminf(x: f32, y: f32) -> f32;
88-
fn fmax(x: f64, y: f64) -> f64;
89-
fn fmaxf(x: f32, y: f32) -> f32;
90-
// `f64 % f64`
91-
fn fmod(x: f64, y: f64) -> f64;
92-
// `f32 % f32`
93-
fn fmodf(x: f32, y: f32) -> f32;
81+
// Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
82+
// however, so we still provide a fallback.
83+
libm_intrinsics! {
84+
fn fmaximum(x: f64, y: f64) -> f64;
85+
fn fmaximumf(x: f32, y: f32) -> f32;
86+
fn fminimum(x: f64, y: f64) -> f64;
87+
fn fminimumf(x: f32, y: f32) -> f32;
88+
fn roundeven(x: f64) -> f64;
89+
fn roundevenf(x: f32) -> f32;
90+
}
9491

95-
fn erf(x: f64) -> f64;
96-
fn erff(x: f32) -> f32;
97-
fn erfc(x: f64) -> f64;
98-
fn erfcf(x: f32) -> f32;
92+
#[cfg(f128_enabled)]
93+
libm_intrinsics! {
94+
fn ceilf128(x: f128) -> f128;
95+
fn copysignf128(x: f128, y: f128) -> f128;
96+
fn fabsf128(x: f128) -> f128;
97+
fn fdimf128(x: f128, y: f128) -> f128;
98+
fn floorf128(x: f128) -> f128;
99+
fn fmaf128(x: f128, y: f128, z: f128) -> f128;
100+
fn fmaxf128(x: f128, y: f128) -> f128;
101+
fn fmaximumf128(x: f128, y: f128) -> f128;
102+
fn fminf128(x: f128, y: f128) -> f128;
103+
fn fminimumf128(x: f128, y: f128) -> f128;
104+
fn fmodf128(x: f128, y: f128) -> f128;
105+
fn rintf128(x: f128) -> f128;
106+
fn roundevenf128(x: f128) -> f128;
107+
fn roundf128(x: f128) -> f128;
108+
fn sqrtf128(x: f128) -> f128;
109+
fn truncf128(x: f128) -> f128;
110+
}
99111
}
100112

101-
// allow for windows (and other targets)
102-
intrinsics! {
103-
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
104-
let r = self::libm::lgamma_r(x);
105-
*s = r.1;
106-
r.0
113+
/// This group of functions has more performance or precision issues than system versions, or
114+
/// are otherwise less well tested. Provide them only on platforms that have problems with the
115+
/// system `libm`.
116+
///
117+
/// As `libm` improves, more functions will be moved from this group to the first group.
118+
///
119+
/// Do not supply for any of the following:
120+
/// - x86 without sse2 due to ABI issues
121+
/// - <https://github.com/rust-lang/rust/issues/114479>
122+
/// - but exclude UEFI since it is a soft-float target
123+
/// - <https://github.com/rust-lang/rust/issues/128533>
124+
/// - All unix targets (linux, macos, freebsd, android, etc)
125+
/// - wasm with known target_os
126+
#[cfg(not(any(
127+
all(
128+
target_arch = "x86",
129+
not(target_feature = "sse2"),
130+
not(target_os = "uefi"),
131+
),
132+
unix,
133+
all(target_family = "wasm", not(target_os = "unknown"))
134+
)))]
135+
mod partial_availability {
136+
#[cfg(not(windows))]
137+
libm_intrinsics! {
138+
fn acos(x: f64) -> f64;
139+
fn acosf(n: f32) -> f32;
140+
fn asin(x: f64) -> f64;
141+
fn asinf(n: f32) -> f32;
142+
fn atan(x: f64) -> f64;
143+
fn atan2(x: f64, y: f64) -> f64;
144+
fn atan2f(a: f32, b: f32) -> f32;
145+
fn atanf(n: f32) -> f32;
146+
fn cos(x: f64) -> f64;
147+
fn cosf(x: f32) -> f32;
148+
fn cosh(x: f64) -> f64;
149+
fn coshf(n: f32) -> f32;
150+
fn erf(x: f64) -> f64;
151+
fn erfc(x: f64) -> f64;
152+
fn erfcf(x: f32) -> f32;
153+
fn erff(x: f32) -> f32;
154+
fn exp(x: f64) -> f64;
155+
fn exp2(x: f64) -> f64;
156+
fn exp2f(x: f32) -> f32;
157+
fn expf(x: f32) -> f32;
158+
fn expm1(x: f64) -> f64;
159+
fn expm1f(n: f32) -> f32;
160+
fn hypot(x: f64, y: f64) -> f64;
161+
fn hypotf(x: f32, y: f32) -> f32;
162+
fn ldexp(f: f64, n: i32) -> f64;
163+
fn ldexpf(f: f32, n: i32) -> f32;
164+
fn log(x: f64) -> f64;
165+
fn log10(x: f64) -> f64;
166+
fn log10f(x: f32) -> f32;
167+
fn log1p(x: f64) -> f64;
168+
fn log1pf(n: f32) -> f32;
169+
fn log2(x: f64) -> f64;
170+
fn log2f(x: f32) -> f32;
171+
fn logf(x: f32) -> f32;
172+
fn pow(x: f64, y: f64) -> f64;
173+
fn powf(x: f32, y: f32) -> f32;
174+
fn sin(x: f64) -> f64;
175+
fn sinf(x: f32) -> f32;
176+
fn sinh(x: f64) -> f64;
177+
fn sinhf(n: f32) -> f32;
178+
fn tan(x: f64) -> f64;
179+
fn tanf(n: f32) -> f32;
180+
fn tanh(x: f64) -> f64;
181+
fn tanhf(n: f32) -> f32;
182+
fn tgamma(x: f64) -> f64;
183+
fn tgammaf(x: f32) -> f32;
107184
}
108185

109-
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
110-
let r = self::libm::lgammaf_r(x);
111-
*s = r.1;
112-
r.0
186+
// allow for windows (and other targets)
187+
intrinsics! {
188+
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
189+
let r = self::libm::lgamma_r(x);
190+
*s = r.1;
191+
r.0
192+
}
193+
194+
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
195+
let r = self::libm::lgammaf_r(x);
196+
*s = r.1;
197+
r.0
198+
}
113199
}
114200
}

0 commit comments

Comments
 (0)