@@ -14,25 +14,33 @@ use super::Float;
14
14
/// - Calculate a base mantissa by shifting the integer into mantissa position
15
15
/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns apply
16
16
/// here, so they may be "squashed" into smaller numbers to simplify the classification.
17
+ ///
18
+ /// # Terminology
19
+ ///
20
+ /// - `i`: the original integer
21
+ /// - `i_m`: the integer, shifted fully left (no leading zeros)
22
+ /// - `n`: number of leading zeroes
23
+ /// - `e`: the resulting exponent
24
+ /// - `m`: the resulting mantissa
25
+ /// - `m_base`: the mantissa before adjusting for truncated bits
17
26
mod int_to_float {
18
27
use super :: * ;
19
28
20
29
/// Calculate the exponent from the number of leading zeros.
21
30
fn exp < I : Int , F : Float < Int : CastFrom < u32 > > > ( n : u32 ) -> F :: Int {
22
- F :: Int :: cast_from ( I :: BITS + F :: EXPONENT_BIAS - 2 - n)
31
+ F :: Int :: cast_from ( F :: EXPONENT_BIAS - 1 + I :: BITS - n)
23
32
}
24
33
25
- /// Shift the integer into the float's mantissa bits. Keep the lowest exponent bit intact.
26
- fn m_base < I : Int , F : Float < Int : CastFrom < I > > > ( i_m : I ) -> F :: Int {
34
+ /// Calculate the mantissa in cases where the float size is less than integer size. An
35
+ /// adjustment of the final mantissa will be needed, but it is calculated separately.
36
+ fn m_f_lt_i < I : Int , F : Float < Int : CastFrom < I > > > ( i_m : I ) -> F :: Int {
37
+ // `i_m` already has no leading zeros. Just shift it into the float's mantissa bits,
38
+ // retaining the highest bits.
27
39
F :: Int :: cast_from ( i_m >> ( ( I :: BITS - F :: BITS ) + F :: EXPONENT_BITS ) )
28
40
}
29
41
30
- /// Calculate the mantissa in cases where the float size is greater than integer size
31
- fn m_f_gt_i < I : Int , F : Float < Int : CastFrom < I > > > ( i : I , n : u32 ) -> F :: Int {
32
- F :: Int :: cast_from ( i) << ( F :: SIGNIFICAND_BITS - I :: BITS + 1 + n)
33
- }
34
-
35
- /// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes
42
+ /// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes.
43
+ /// Returns the mantissa and necessary adjustment.
36
44
fn m_f_eq_i < I : Int + CastInto < F :: Int > , F : Float < Int = I > > ( i : I , n : u32 ) -> ( F :: Int , F :: Int ) {
37
45
let base = ( i << n) >> F :: EXPONENT_BITS ;
38
46
@@ -43,16 +51,23 @@ mod int_to_float {
43
51
( base, adj)
44
52
}
45
53
54
+ /// Calculate the mantissa in cases where the float size is greater than integer size
55
+ fn m_f_gt_i < I : Int , F : Float < Int : CastFrom < I > > > ( i : I , n : u32 ) -> F :: Int {
56
+ F :: Int :: cast_from ( i) << ( F :: SIGNIFICAND_BITS - I :: BITS + 1 + n)
57
+ }
58
+
46
59
/// Adjust a mantissa with dropped bits
47
- fn m_adj < F : Float > ( m_base : F :: Int , dropped_bits : F :: Int ) -> F :: Int {
60
+ fn m_adj < F : Float < Int : CastInto < i32 > > > ( m_base : F :: Int , dropped_bits : F :: Int ) -> F :: Int {
61
+ // fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
48
62
// Branchlessly extract a `1` if rounding up should happen
49
63
let adj = ( dropped_bits - ( dropped_bits >> ( F :: BITS - 1 ) & !m_base) ) >> ( F :: BITS - 1 ) ;
50
64
51
65
// Add one when we need to round up. Break ties to even.
52
66
m_base + adj
53
67
}
54
68
55
- /// Combine a final float repr from an exponent and mantissa.
69
+ /// Shift the exponent to its position and add the mantissa. Allows adjusting an off by one
70
+ /// exponent with an overflowing mantissa.
56
71
fn repr < F : Float > ( e : F :: Int , m : F :: Int ) -> F :: Int {
57
72
// + rather than | so the mantissa can overflow into the exponent
58
73
( e << F :: SIGNIFICAND_BITS ) + m
@@ -77,7 +92,7 @@ mod int_to_float {
77
92
let n = i. leading_zeros ( ) ;
78
93
let ( m_base, adj) = m_f_eq_i :: < u32 , f32 > ( i, n) ;
79
94
let m = m_adj :: < f32 > ( m_base, adj) ;
80
- let e = exp :: < u32 , f32 > ( n) ;
95
+ let e = exp :: < u32 , f32 > ( n) - 1 ;
81
96
repr :: < f32 > ( e, m)
82
97
}
83
98
@@ -87,7 +102,7 @@ mod int_to_float {
87
102
}
88
103
let n = i. leading_zeros ( ) ;
89
104
let m = m_f_gt_i :: < _ , f64 > ( i, n) ;
90
- let e = exp :: < u32 , f64 > ( n) ;
105
+ let e = exp :: < u32 , f64 > ( n) - 1 ;
91
106
repr :: < f64 > ( e, m)
92
107
}
93
108
@@ -111,12 +126,12 @@ mod int_to_float {
111
126
pub fn u64_to_f32_bits ( i : u64 ) -> u32 {
112
127
let n = i. leading_zeros ( ) ;
113
128
let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
114
- let m_base = m_base :: < _ , f32 > ( i_m) ;
129
+ let m_base = m_f_lt_i :: < u64 , f32 > ( i_m) ;
115
130
// The entire lower half of `i` will be truncated (masked portion), plus the
116
131
// next `EXPONENT_BITS` bits.
117
132
let adj = ( i_m >> f32:: EXPONENT_BITS | i_m & 0xFFFF ) as u32 ;
118
133
let m = m_adj :: < f32 > ( m_base, adj) ;
119
- let e = if i == 0 { 0 } else { exp :: < u64 , f32 > ( n) } ;
134
+ let e = if i == 0 { 0 } else { exp :: < u64 , f32 > ( n) - 1 } ;
120
135
repr :: < f32 > ( e, m)
121
136
}
122
137
@@ -127,7 +142,7 @@ mod int_to_float {
127
142
let n = i. leading_zeros ( ) ;
128
143
let ( m_base, adj) = m_f_eq_i :: < u64 , f64 > ( i, n) ;
129
144
let m = m_adj :: < f64 > ( m_base, adj) ;
130
- let e = exp :: < u64 , f64 > ( n) ;
145
+ let e = exp :: < u64 , f64 > ( n) - 1 ;
131
146
repr :: < f64 > ( e, m)
132
147
}
133
148
@@ -138,14 +153,14 @@ mod int_to_float {
138
153
}
139
154
let n = i. leading_zeros ( ) ;
140
155
let m = m_f_gt_i :: < _ , f128 > ( i, n) ;
141
- let e = exp :: < u64 , f128 > ( n) ;
156
+ let e = exp :: < u64 , f128 > ( n) - 1 ;
142
157
repr :: < f128 > ( e, m)
143
158
}
144
159
145
160
pub fn u128_to_f32_bits ( i : u128 ) -> u32 {
146
161
let n = i. leading_zeros ( ) ;
147
162
let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
148
- let m_base = m_base :: < _ , f32 > ( i_m) ;
163
+ let m_base = m_f_lt_i :: < u128 , f32 > ( i_m) ;
149
164
150
165
// Within the upper `F::BITS`, everything except for the signifcand
151
166
// gets truncated
@@ -157,19 +172,19 @@ mod int_to_float {
157
172
let adj = d1 | d2;
158
173
159
174
let m = m_adj :: < f32 > ( m_base, adj) ;
160
- let e = if i == 0 { 0 } else { exp :: < u128 , f32 > ( n) } ;
175
+ let e = if i == 0 { 0 } else { exp :: < u128 , f32 > ( n) - 1 } ;
161
176
repr :: < f32 > ( e, m)
162
177
}
163
178
164
179
pub fn u128_to_f64_bits ( i : u128 ) -> u64 {
165
180
let n = i. leading_zeros ( ) ;
166
181
let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
167
- let m_base = m_base :: < _ , f64 > ( i_m) ;
182
+ let m_base = m_f_lt_i :: < u128 , f64 > ( i_m) ;
168
183
// The entire lower half of `i` will be truncated (masked portion), plus the
169
184
// next `EXPONENT_BITS` bits.
170
185
let adj = ( i_m >> f64:: EXPONENT_BITS | i_m & 0xFFFF_FFFF ) as u64 ;
171
186
let m = m_adj :: < f64 > ( m_base, adj) ;
172
- let e = if i == 0 { 0 } else { exp :: < u128 , f64 > ( n) } ;
187
+ let e = if i == 0 { 0 } else { exp :: < u128 , f64 > ( n) - 1 } ;
173
188
repr :: < f64 > ( e, m)
174
189
}
175
190
@@ -181,7 +196,7 @@ mod int_to_float {
181
196
let n = i. leading_zeros ( ) ;
182
197
let ( m_base, adj) = m_f_eq_i :: < u128 , f128 > ( i, n) ;
183
198
let m = m_adj :: < f128 > ( m_base, adj) ;
184
- let e = exp :: < u128 , f128 > ( n) ;
199
+ let e = exp :: < u128 , f128 > ( n) - 1 ;
185
200
repr :: < f128 > ( e, m)
186
201
}
187
202
}
0 commit comments