@@ -6,73 +6,152 @@ use super::Float;
66
77/// Conversions from integers to floats.
88///
9- /// These are hand-optimized bit twiddling code,
10- /// which unfortunately isn't the easiest kind of code to read.
9+ /// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
10+ /// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
11+ /// mantissa _with the implicit bit set_!
12+ /// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns are used
13+ /// to figure this out. Adjust the mantissa if needed.
14+ /// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros) and subtract one.
15+ /// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
16+ /// from the exponent (above) accounts for the explicit bit being set in the mantissa.
1117///
12- /// The algorithm is explained here: <https://blog.m-ou.se/floats/>
18+ /// # Terminology
19+ ///
20+ /// - `i`: the original integer
21+ /// - `i_m`: the integer, shifted fully left (no leading zeros)
22+ /// - `n`: number of leading zeroes
23+ /// - `e`: the resulting exponent
24+ /// - `m`: the resulting mantissa
25+ /// - `m_base`: the mantissa before adjusting for truncated bits
1326mod int_to_float {
27+ use super :: * ;
28+
29+ /// Calculate the exponent from the number of leading zeros.
30+ ///
31+ /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
32+ /// bit set can be added back later.
33+ fn exp < I : Int , F : Float < Int : CastFrom < u32 > > > ( n : u32 ) -> F :: Int {
34+ F :: Int :: cast_from ( F :: EXPONENT_BIAS - 1 + I :: BITS - n)
35+ }
36+
37+ /// Adjust a mantissa with dropped bits to perform correct rounding.
38+ ///
39+ /// The dropped bits can be compr
40+ fn m_adj < F : Float > ( m_base : F :: Int , dropped_bits : F :: Int ) -> F :: Int {
41+ // Branchlessly extract a `1` if rounding up should happen, 0 otherwise
42+ // This accounts for rounding to even.
43+ let adj = ( dropped_bits - ( dropped_bits >> ( F :: BITS - 1 ) & !m_base) ) >> ( F :: BITS - 1 ) ;
44+
45+ // Add one when we need to round up. Break ties to even.
46+ m_base + adj
47+ }
48+
49+ /// Shift the exponent to its position and add the mantissa.
50+ ///
51+ /// If the mantissa has the implicit bit set, the exponent should be one less than its actual
52+ /// value to cancel it out.
53+ fn repr < F : Float > ( e : F :: Int , m : F :: Int ) -> F :: Int {
54+ // + rather than | so the mantissa can overflow into the exponent
55+ ( e << F :: SIGNIFICAND_BITS ) + m
56+ }
57+
58+ /// Shift distance for a left-aligned integer to a smaller float.
59+ fn shift_f_lt_i < I : Int , F : Float > ( ) -> u32 {
60+ ( I :: BITS - F :: BITS ) + F :: EXPONENT_BITS
61+ }
62+
63+ /// Shift distance for an integer with `n` leading zeros to a smaller float.
64+ fn shift_f_gt_i < I : Int , F : Float > ( n : u32 ) -> u32 {
65+ F :: SIGNIFICAND_BITS - I :: BITS + 1 + n
66+ }
67+
68+ /// Perform a signed operation as unsigned, then add the sign back.
69+ pub fn signed < I , F , Conv > ( i : I , conv : Conv ) -> F
70+ where
71+ F : Float ,
72+ I : Int ,
73+ F :: Int : CastFrom < I > ,
74+ Conv : Fn ( I :: UnsignedInt ) -> F :: Int ,
75+ {
76+ let sign_bit = F :: Int :: cast_from ( i >> ( I :: BITS - 1 ) ) << ( F :: BITS - 1 ) ;
77+ F :: from_repr ( conv ( i. unsigned_abs ( ) ) | sign_bit)
78+ }
79+
1480 pub fn u32_to_f32_bits ( i : u32 ) -> u32 {
1581 if i == 0 {
1682 return 0 ;
1783 }
1884 let n = i. leading_zeros ( ) ;
19- let a = ( i << n) >> 8 ; // Significant bits, with bit 24 still in tact.
20- let b = ( i << n) << 24 ; // Insignificant bits, only relevant for rounding.
21- let m = a + ( ( b - ( b >> 31 & !a ) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
22- let e = 157 - n ; // Exponent plus 127, minus one.
23- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
85+ let m_base = ( i << n) >> f32 :: EXPONENT_BITS ;
86+ let adj = ( i << n) << ( f32 :: SIGNIFICAND_BITS + 1 ) ;
87+ let m = m_adj :: < f32 > ( m_base , adj ) ;
88+ let e = exp :: < u32 , f32 > ( n ) - 1 ;
89+ repr :: < f32 > ( e , m )
2490 }
2591
2692 pub fn u32_to_f64_bits ( i : u32 ) -> u64 {
2793 if i == 0 {
2894 return 0 ;
2995 }
3096 let n = i. leading_zeros ( ) ;
31- let m = ( i as u64 ) << ( 21 + n) ; // Significant bits, with bit 53 still in tact.
32- let e = 1053 - n as u64 ; // Exponent plus 1023, minus one.
33- ( e << 52 ) + m // Bit 53 of m will overflow into e.
97+ let m = ( i as u64 ) << shift_f_gt_i :: < u32 , f64 > ( n) ;
98+ let e = exp :: < u32 , f64 > ( n ) - 1 ;
99+ repr :: < f64 > ( e , m )
34100 }
35101
36102 pub fn u64_to_f32_bits ( i : u64 ) -> u32 {
37103 let n = i. leading_zeros ( ) ;
38- let y = i. wrapping_shl ( n) ;
39- let a = ( y >> 40 ) as u32 ; // Significant bits, with bit 24 still in tact.
40- let b = ( y >> 8 | y & 0xFFFF ) as u32 ; // Insignificant bits, only relevant for rounding.
41- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
42- let e = if i == 0 { 0 } else { 189 - n } ; // Exponent plus 127, minus one, except for zero.
43- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
104+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
105+ let m_base: u32 = ( i_m >> shift_f_lt_i :: < u64 , f32 > ( ) ) as u32 ;
106+ // The entire lower half of `i` will be truncated (masked portion), plus the
107+ // next `EXPONENT_BITS` bits.
108+ let adj = ( i_m >> f32:: EXPONENT_BITS | i_m & 0xFFFF ) as u32 ;
109+ let m = m_adj :: < f32 > ( m_base, adj) ;
110+ let e = if i == 0 { 0 } else { exp :: < u64 , f32 > ( n) - 1 } ;
111+ repr :: < f32 > ( e, m)
44112 }
45113
46114 pub fn u64_to_f64_bits ( i : u64 ) -> u64 {
47115 if i == 0 {
48116 return 0 ;
49117 }
50118 let n = i. leading_zeros ( ) ;
51- let a = ( i << n) >> 11 ; // Significant bits, with bit 53 still in tact.
52- let b = ( i << n) << 53 ; // Insignificant bits, only relevant for rounding.
53- let m = a + ( ( b - ( b >> 63 & !a ) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
54- let e = 1085 - n as u64 ; // Exponent plus 1023, minus one.
55- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
119+ let m_base = ( i << n) >> f64 :: EXPONENT_BITS ;
120+ let adj = ( i << n) << ( f64 :: SIGNIFICAND_BITS + 1 ) ;
121+ let m = m_adj :: < f64 > ( m_base , adj ) ;
122+ let e = exp :: < u64 , f64 > ( n ) - 1 ;
123+ repr :: < f64 > ( e , m )
56124 }
57125
58126 pub fn u128_to_f32_bits ( i : u128 ) -> u32 {
59127 let n = i. leading_zeros ( ) ;
60- let y = i. wrapping_shl ( n) ;
61- let a = ( y >> 104 ) as u32 ; // Significant bits, with bit 24 still in tact.
62- let b = ( y >> 72 ) as u32 | ( ( y << 32 ) >> 32 != 0 ) as u32 ; // Insignificant bits, only relevant for rounding.
63- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
64- let e = if i == 0 { 0 } else { 253 - n } ; // Exponent plus 127, minus one, except for zero.
65- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
128+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
129+ let m_base: u32 = ( i_m >> shift_f_lt_i :: < u128 , f32 > ( ) ) as u32 ;
130+
131+ // Within the upper `F::BITS`, everything except for the signifcand
132+ // gets truncated
133+ let d1: u32 = ( i_m >> ( u128:: BITS - f32:: BITS - f32:: SIGNIFICAND_BITS - 1 ) ) . cast ( ) ;
134+
135+ // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
136+ // check if it is nonzero.
137+ let d2: u32 = ( i_m << f32:: BITS >> f32:: BITS != 0 ) . into ( ) ;
138+ let adj = d1 | d2;
139+
140+ let m = m_adj :: < f32 > ( m_base, adj) ;
141+ let e = if i == 0 { 0 } else { exp :: < u128 , f32 > ( n) - 1 } ;
142+ repr :: < f32 > ( e, m)
66143 }
67144
68145 pub fn u128_to_f64_bits ( i : u128 ) -> u64 {
69146 let n = i. leading_zeros ( ) ;
70- let y = i. wrapping_shl ( n) ;
71- let a = ( y >> 75 ) as u64 ; // Significant bits, with bit 53 still in tact.
72- let b = ( y >> 11 | y & 0xFFFF_FFFF ) as u64 ; // Insignificant bits, only relevant for rounding.
73- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
74- let e = if i == 0 { 0 } else { 1149 - n as u64 } ; // Exponent plus 1023, minus one, except for zero.
75- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
147+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
148+ let m_base: u64 = ( i_m >> shift_f_lt_i :: < u128 , f64 > ( ) ) as u64 ;
149+ // The entire lower half of `i` will be truncated (masked portion), plus the
150+ // next `EXPONENT_BITS` bits.
151+ let adj = ( i_m >> f64:: EXPONENT_BITS | i_m & 0xFFFF_FFFF ) as u64 ;
152+ let m = m_adj :: < f64 > ( m_base, adj) ;
153+ let e = if i == 0 { 0 } else { exp :: < u128 , f64 > ( n) - 1 } ;
154+ repr :: < f64 > ( e, m)
76155 }
77156}
78157
@@ -113,38 +192,32 @@ intrinsics! {
113192intrinsics ! {
114193 #[ arm_aeabi_alias = __aeabi_i2f]
115194 pub extern "C" fn __floatsisf( i: i32 ) -> f32 {
116- let sign_bit = ( ( i >> 31 ) as u32 ) << 31 ;
117- f32 :: from_bits( int_to_float:: u32_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
195+ int_to_float:: signed( i, int_to_float:: u32_to_f32_bits)
118196 }
119197
120198 #[ arm_aeabi_alias = __aeabi_i2d]
121199 pub extern "C" fn __floatsidf( i: i32 ) -> f64 {
122- let sign_bit = ( ( i >> 31 ) as u64 ) << 63 ;
123- f64 :: from_bits( int_to_float:: u32_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
200+ int_to_float:: signed( i, int_to_float:: u32_to_f64_bits)
124201 }
125202
126203 #[ arm_aeabi_alias = __aeabi_l2f]
127204 pub extern "C" fn __floatdisf( i: i64 ) -> f32 {
128- let sign_bit = ( ( i >> 63 ) as u32 ) << 31 ;
129- f32 :: from_bits( int_to_float:: u64_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
205+ int_to_float:: signed( i, int_to_float:: u64_to_f32_bits)
130206 }
131207
132208 #[ arm_aeabi_alias = __aeabi_l2d]
133209 pub extern "C" fn __floatdidf( i: i64 ) -> f64 {
134- let sign_bit = ( ( i >> 63 ) as u64 ) << 63 ;
135- f64 :: from_bits( int_to_float:: u64_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
210+ int_to_float:: signed( i, int_to_float:: u64_to_f64_bits)
136211 }
137212
138213 #[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
139214 pub extern "C" fn __floattisf( i: i128 ) -> f32 {
140- let sign_bit = ( ( i >> 127 ) as u32 ) << 31 ;
141- f32 :: from_bits( int_to_float:: u128_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
215+ int_to_float:: signed( i, int_to_float:: u128_to_f32_bits)
142216 }
143217
144218 #[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
145219 pub extern "C" fn __floattidf( i: i128 ) -> f64 {
146- let sign_bit = ( ( i >> 127 ) as u64 ) << 63 ;
147- f64 :: from_bits( int_to_float:: u128_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
220+ int_to_float:: signed( i, int_to_float:: u128_to_f64_bits)
148221 }
149222}
150223
0 commit comments