Skip to content

Commit d1aa791

Browse files
committed
wip
1 parent a5277c0 commit d1aa791

File tree

1 file changed

+37
-22
lines changed

1 file changed

+37
-22
lines changed

src/float/conv.rs

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,33 @@ use super::Float;
1414
/// - Calculate a base mantissa by shifting the integer into mantissa position
1515
/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns apply
1616
/// here, so they may be "squashed" into smaller numbers to simplify the classification.
17+
///
18+
/// # Terminology
19+
///
20+
/// - `i`: the original integer
21+
/// - `i_m`: the integer, shifted fully left (no leading zeros)
22+
/// - `n`: number of leading zeroes
23+
/// - `e`: the resulting exponent
24+
/// - `m`: the resulting mantissa
25+
/// - `m_base`: the mantissa before adjusting for truncated bits
1726
mod int_to_float {
1827
use super::*;
1928

2029
/// Calculate the exponent from the number of leading zeros.
2130
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
22-
F::Int::cast_from(I::BITS + F::EXPONENT_BIAS - 2 - n)
31+
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
2332
}
2433

25-
/// Shift the integer into the float's mantissa bits. Keep the lowest exponent bit intact.
26-
fn m_base<I: Int, F: Float<Int: CastFrom<I>>>(i_m: I) -> F::Int {
34+
/// Calculate the mantissa in cases where the float size is less than integer size. An
35+
/// adjustment of the final mantissa will be needed, but it is calculated separately.
36+
fn m_f_lt_i<I: Int, F: Float<Int: CastFrom<I>>>(i_m: I) -> F::Int {
37+
// `i_m` already has no leading zeros. Just shift it into the float's mantissa bits,
38+
// retaining the highest bits.
2739
F::Int::cast_from(i_m >> ((I::BITS - F::BITS) + F::EXPONENT_BITS))
2840
}
2941

30-
/// Calculate the mantissa in cases where the float size is greater than integer size
31-
fn m_f_gt_i<I: Int, F: Float<Int: CastFrom<I>>>(i: I, n: u32) -> F::Int {
32-
F::Int::cast_from(i) << (F::SIGNIFICAND_BITS - I::BITS + 1 + n)
33-
}
34-
35-
/// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes
42+
/// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes.
43+
/// Returns the mantissa and necessary adjustment.
3644
fn m_f_eq_i<I: Int + CastInto<F::Int>, F: Float<Int = I>>(i: I, n: u32) -> (F::Int, F::Int) {
3745
let base = (i << n) >> F::EXPONENT_BITS;
3846

@@ -43,16 +51,23 @@ mod int_to_float {
4351
(base, adj)
4452
}
4553

54+
/// Calculate the mantissa in cases where the float size is greater than integer size
55+
fn m_f_gt_i<I: Int, F: Float<Int: CastFrom<I>>>(i: I, n: u32) -> F::Int {
56+
F::Int::cast_from(i) << (F::SIGNIFICAND_BITS - I::BITS + 1 + n)
57+
}
58+
4659
/// Adjust a mantissa with dropped bits
47-
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
60+
fn m_adj<F: Float<Int: CastInto<i32>>>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
61+
// fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
4862
// Branchlessly extract a `1` if rounding up should happen
4963
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
5064

5165
// Add one when we need to round up. Break ties to even.
5266
m_base + adj
5367
}
5468

55-
/// Combine a final float repr from an exponent and mantissa.
69+
/// Shift the exponent to its position and add the mantissa. Allows adjusting an off by one
70+
/// exponent with an overflowing mantissa.
5671
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
5772
// + rather than | so the mantissa can overflow into the exponent
5873
(e << F::SIGNIFICAND_BITS) + m
@@ -77,7 +92,7 @@ mod int_to_float {
7792
let n = i.leading_zeros();
7893
let (m_base, adj) = m_f_eq_i::<u32, f32>(i, n);
7994
let m = m_adj::<f32>(m_base, adj);
80-
let e = exp::<u32, f32>(n);
95+
let e = exp::<u32, f32>(n) - 1;
8196
repr::<f32>(e, m)
8297
}
8398

@@ -87,7 +102,7 @@ mod int_to_float {
87102
}
88103
let n = i.leading_zeros();
89104
let m = m_f_gt_i::<_, f64>(i, n);
90-
let e = exp::<u32, f64>(n);
105+
let e = exp::<u32, f64>(n) - 1;
91106
repr::<f64>(e, m)
92107
}
93108

@@ -111,12 +126,12 @@ mod int_to_float {
111126
pub fn u64_to_f32_bits(i: u64) -> u32 {
112127
let n = i.leading_zeros();
113128
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
114-
let m_base = m_base::<_, f32>(i_m);
129+
let m_base = m_f_lt_i::<u64, f32>(i_m);
115130
// The entire lower half of `i` will be truncated (masked portion), plus the
116131
// next `EXPONENT_BITS` bits.
117132
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
118133
let m = m_adj::<f32>(m_base, adj);
119-
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) };
134+
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
120135
repr::<f32>(e, m)
121136
}
122137

@@ -127,7 +142,7 @@ mod int_to_float {
127142
let n = i.leading_zeros();
128143
let (m_base, adj) = m_f_eq_i::<u64, f64>(i, n);
129144
let m = m_adj::<f64>(m_base, adj);
130-
let e = exp::<u64, f64>(n);
145+
let e = exp::<u64, f64>(n) - 1;
131146
repr::<f64>(e, m)
132147
}
133148

@@ -138,14 +153,14 @@ mod int_to_float {
138153
}
139154
let n = i.leading_zeros();
140155
let m = m_f_gt_i::<_, f128>(i, n);
141-
let e = exp::<u64, f128>(n);
156+
let e = exp::<u64, f128>(n) - 1;
142157
repr::<f128>(e, m)
143158
}
144159

145160
pub fn u128_to_f32_bits(i: u128) -> u32 {
146161
let n = i.leading_zeros();
147162
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
148-
let m_base = m_base::<_, f32>(i_m);
163+
let m_base = m_f_lt_i::<u128, f32>(i_m);
149164

150165
// Within the upper `F::BITS`, everything except for the signifcand
151166
// gets truncated
@@ -157,19 +172,19 @@ mod int_to_float {
157172
let adj = d1 | d2;
158173

159174
let m = m_adj::<f32>(m_base, adj);
160-
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) };
175+
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
161176
repr::<f32>(e, m)
162177
}
163178

164179
pub fn u128_to_f64_bits(i: u128) -> u64 {
165180
let n = i.leading_zeros();
166181
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
167-
let m_base = m_base::<_, f64>(i_m);
182+
let m_base = m_f_lt_i::<u128, f64>(i_m);
168183
// The entire lower half of `i` will be truncated (masked portion), plus the
169184
// next `EXPONENT_BITS` bits.
170185
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
171186
let m = m_adj::<f64>(m_base, adj);
172-
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) };
187+
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
173188
repr::<f64>(e, m)
174189
}
175190

@@ -181,7 +196,7 @@ mod int_to_float {
181196
let n = i.leading_zeros();
182197
let (m_base, adj) = m_f_eq_i::<u128, f128>(i, n);
183198
let m = m_adj::<f128>(m_base, adj);
184-
let e = exp::<u128, f128>(n);
199+
let e = exp::<u128, f128>(n) - 1;
185200
repr::<f128>(e, m)
186201
}
187202
}

0 commit comments

Comments
 (0)