1
1
/* SPDX-License-Identifier: MIT OR Apache-2.0 */
2
- use crate :: support:: { CastFrom , Float , Int , MinInt } ;
2
+ use crate :: support:: { CastFrom , CastInto , Float , HInt , Int , MinInt , NarrowingDiv } ;
3
3
4
4
#[ inline]
5
- pub fn fmod < F : Float > ( x : F , y : F ) -> F {
5
+ pub fn fmod < F : Float > ( x : F , y : F ) -> F
6
+ where
7
+ F :: Int : HInt ,
8
+ <F :: Int as HInt >:: D : NarrowingDiv ,
9
+ {
6
10
let _1 = F :: Int :: ONE ;
7
11
let sx = x. to_bits ( ) & F :: SIGN_MASK ;
8
12
let ux = x. to_bits ( ) & !F :: SIGN_MASK ;
@@ -29,7 +33,7 @@ pub fn fmod<F: Float>(x: F, y: F) -> F {
29
33
30
34
// To compute `(num << ex) % (div << ey)`, first
31
35
// evaluate `rem = (num << (ex - ey)) % div` ...
32
- let rem = reduction ( num, ex - ey, div) ;
36
+ let rem = reduction :: < F > ( num, ex - ey, div) ;
33
37
// ... so the result will be `rem << ey`
34
38
35
39
if rem. is_zero ( ) {
@@ -58,11 +62,55 @@ fn into_sig_exp<F: Float>(mut bits: F::Int) -> (F::Int, u32) {
58
62
}
59
63
60
64
/// Compute the remainder `(x * 2.pow(e)) % y` without overflow.
61
- fn reduction < I : Int > ( mut x : I , e : u32 , y : I ) -> I {
62
- x %= y;
63
- for _ in 0 ..e {
64
- x <<= 1 ;
65
- x = x. checked_sub ( y) . unwrap_or ( x) ;
65
+ fn reduction < F > ( mut x : F :: Int , e : u32 , y : F :: Int ) -> F :: Int
66
+ where
67
+ F : Float ,
68
+ F :: Int : HInt ,
69
+ <<F as Float >:: Int as HInt >:: D : NarrowingDiv ,
70
+ {
71
+ // `f16` only has 5 exponent bits, so even `f16::MAX = 65504.0` is only
72
+ // a 40-bit integer multiple of the smallest subnormal.
73
+ if F :: BITS == 16 {
74
+ debug_assert ! ( F :: EXP_MAX - F :: EXP_MIN == 29 ) ;
75
+ debug_assert ! ( e <= 29 ) ;
76
+ let u: u16 = x. cast ( ) ;
77
+ let v: u16 = y. cast ( ) ;
78
+ let u = ( u as u64 ) << e;
79
+ let v = v as u64 ;
80
+ return F :: Int :: cast_from ( ( u % v) as u16 ) ;
66
81
}
67
- x
82
+
83
+ // Ensure `x < 2y` for later steps
84
+ if x >= ( y << 1 ) {
85
+ // This case is only reached with subnormal divisors,
86
+ // but it might be better to just normalize all significands
87
+ // to make this unnecessary. The further calls could potentially
88
+ // benefit from assuming a specific fixed leading bit position.
89
+ x %= y;
90
+ }
91
+
92
+ // The simple implementation seems to be fastest for a short reduction
93
+ // at this size. The limit here was chosen empirically on an Intel Nehalem.
94
+ // Less old CPUs that have faster `u64 * u64 -> u128` might not benefit,
95
+ // and 32-bit systems or architectures without hardware multipliers might
96
+ // want to do this in more cases.
97
+ if F :: BITS == 64 && e < 32 {
98
+ // Assumes `x < 2y`
99
+ for _ in 0 ..e {
100
+ x = x. checked_sub ( y) . unwrap_or ( x) ;
101
+ x <<= 1 ;
102
+ }
103
+ return x. checked_sub ( y) . unwrap_or ( x) ;
104
+ }
105
+
106
+ // Fast path for short reductions
107
+ if e < F :: BITS {
108
+ let w = x. widen ( ) << e;
109
+ if let Some ( ( _, r) ) = w. checked_narrowing_div_rem ( y) {
110
+ return r;
111
+ }
112
+ }
113
+
114
+ // Assumes `x < 2y`
115
+ crate :: support:: linear_mul_reduction ( x, e, y)
68
116
}
0 commit comments