Skip to content

Commit 64492fb

Browse files
committed
Refactor integer to float conversion
Extract some common routines to separate functions in order to deduplicate code and remove some of the magic.
1 parent de74b28 commit 64492fb

File tree

3 files changed

+129
-48
lines changed

3 files changed

+129
-48
lines changed

src/float/conv.rs

Lines changed: 119 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,73 +6,152 @@ use super::Float;
66

77
/// Conversions from integers to floats.
88
///
9-
/// These are hand-optimized bit twiddling code,
10-
/// which unfortunately isn't the easiest kind of code to read.
9+
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
10+
/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
11+
/// mantissa _with the implicit bit set_!
12+
/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns are used
13+
/// to figure this out. Adjust the mantissa if needed.
14+
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros) and subtract one.
15+
/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
16+
/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
1117
///
12-
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
18+
/// # Terminology
19+
///
20+
/// - `i`: the original integer
21+
/// - `i_m`: the integer, shifted fully left (no leading zeros)
22+
/// - `n`: number of leading zeroes
23+
/// - `e`: the resulting exponent
24+
/// - `m`: the resulting mantissa
25+
/// - `m_base`: the mantissa before adjusting for truncated bits
1326
mod int_to_float {
27+
use super::*;
28+
29+
/// Calculate the exponent from the number of leading zeros.
30+
///
31+
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
32+
/// bit set can be added back later.
33+
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
34+
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
35+
}
36+
37+
/// Adjust a mantissa with dropped bits to perform correct rounding.
38+
///
39+
/// The dropped bits can be compr
40+
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
41+
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
42+
// This accounts for rounding to even.
43+
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
44+
45+
// Add one when we need to round up. Break ties to even.
46+
m_base + adj
47+
}
48+
49+
/// Shift the exponent to its position and add the mantissa.
50+
///
51+
/// If the mantissa has the implicit bit set, the exponent should be one less than its actual
52+
/// value to cancel it out.
53+
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
54+
// + rather than | so the mantissa can overflow into the exponent
55+
(e << F::SIGNIFICAND_BITS) + m
56+
}
57+
58+
/// Shift distance for a left-aligned integer to a smaller float.
59+
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
60+
(I::BITS - F::BITS) + F::EXPONENT_BITS
61+
}
62+
63+
/// Shift distance for an integer with `n` leading zeros to a smaller float.
64+
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
65+
F::SIGNIFICAND_BITS - I::BITS + 1 + n
66+
}
67+
68+
/// Perform a signed operation as unsigned, then add the sign back.
69+
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
70+
where
71+
F: Float,
72+
I: Int,
73+
F::Int: CastFrom<I>,
74+
Conv: Fn(I::UnsignedInt) -> F::Int,
75+
{
76+
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
77+
F::from_repr(conv(i.unsigned_abs()) | sign_bit)
78+
}
79+
1480
pub fn u32_to_f32_bits(i: u32) -> u32 {
1581
if i == 0 {
1682
return 0;
1783
}
1884
let n = i.leading_zeros();
19-
let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
20-
let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
21-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
22-
let e = 157 - n; // Exponent plus 127, minus one.
23-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
85+
let m_base = (i << n) >> f32::EXPONENT_BITS;
86+
let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
87+
let m = m_adj::<f32>(m_base, adj);
88+
let e = exp::<u32, f32>(n) - 1;
89+
repr::<f32>(e, m)
2490
}
2591

2692
pub fn u32_to_f64_bits(i: u32) -> u64 {
2793
if i == 0 {
2894
return 0;
2995
}
3096
let n = i.leading_zeros();
31-
let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
32-
let e = 1053 - n as u64; // Exponent plus 1023, minus one.
33-
(e << 52) + m // Bit 53 of m will overflow into e.
97+
let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
98+
let e = exp::<u32, f64>(n) - 1;
99+
repr::<f64>(e, m)
34100
}
35101

36102
pub fn u64_to_f32_bits(i: u64) -> u32 {
37103
let n = i.leading_zeros();
38-
let y = i.wrapping_shl(n);
39-
let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
40-
let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
41-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
42-
let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
43-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
104+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
105+
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
106+
// The entire lower half of `i` will be truncated (masked portion), plus the
107+
// next `EXPONENT_BITS` bits.
108+
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
109+
let m = m_adj::<f32>(m_base, adj);
110+
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
111+
repr::<f32>(e, m)
44112
}
45113

46114
pub fn u64_to_f64_bits(i: u64) -> u64 {
47115
if i == 0 {
48116
return 0;
49117
}
50118
let n = i.leading_zeros();
51-
let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
52-
let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
53-
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
54-
let e = 1085 - n as u64; // Exponent plus 1023, minus one.
55-
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
119+
let m_base = (i << n) >> f64::EXPONENT_BITS;
120+
let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
121+
let m = m_adj::<f64>(m_base, adj);
122+
let e = exp::<u64, f64>(n) - 1;
123+
repr::<f64>(e, m)
56124
}
57125

58126
pub fn u128_to_f32_bits(i: u128) -> u32 {
59127
let n = i.leading_zeros();
60-
let y = i.wrapping_shl(n);
61-
let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
62-
let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
63-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
64-
let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
65-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
128+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
129+
let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;
130+
131+
// Within the upper `F::BITS`, everything except for the signifcand
132+
// gets truncated
133+
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
134+
135+
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
136+
// check if it is nonzero.
137+
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
138+
let adj = d1 | d2;
139+
140+
let m = m_adj::<f32>(m_base, adj);
141+
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
142+
repr::<f32>(e, m)
66143
}
67144

68145
pub fn u128_to_f64_bits(i: u128) -> u64 {
69146
let n = i.leading_zeros();
70-
let y = i.wrapping_shl(n);
71-
let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
72-
let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
73-
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
74-
let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
75-
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
147+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
148+
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
149+
// The entire lower half of `i` will be truncated (masked portion), plus the
150+
// next `EXPONENT_BITS` bits.
151+
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
152+
let m = m_adj::<f64>(m_base, adj);
153+
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
154+
repr::<f64>(e, m)
76155
}
77156
}
78157

@@ -113,38 +192,32 @@ intrinsics! {
113192
intrinsics! {
114193
#[arm_aeabi_alias = __aeabi_i2f]
115194
pub extern "C" fn __floatsisf(i: i32) -> f32 {
116-
let sign_bit = ((i >> 31) as u32) << 31;
117-
f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
195+
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
118196
}
119197

120198
#[arm_aeabi_alias = __aeabi_i2d]
121199
pub extern "C" fn __floatsidf(i: i32) -> f64 {
122-
let sign_bit = ((i >> 31) as u64) << 63;
123-
f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
200+
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
124201
}
125202

126203
#[arm_aeabi_alias = __aeabi_l2f]
127204
pub extern "C" fn __floatdisf(i: i64) -> f32 {
128-
let sign_bit = ((i >> 63) as u32) << 31;
129-
f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
205+
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
130206
}
131207

132208
#[arm_aeabi_alias = __aeabi_l2d]
133209
pub extern "C" fn __floatdidf(i: i64) -> f64 {
134-
let sign_bit = ((i >> 63) as u64) << 63;
135-
f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
210+
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
136211
}
137212

138213
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
139214
pub extern "C" fn __floattisf(i: i128) -> f32 {
140-
let sign_bit = ((i >> 127) as u32) << 31;
141-
f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
215+
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
142216
}
143217

144218
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
145219
pub extern "C" fn __floattidf(i: i128) -> f64 {
146-
let sign_bit = ((i >> 127) as u64) << 63;
147-
f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
220+
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
148221
}
149222
}
150223

src/int/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt
8383

8484
fn unsigned(self) -> Self::UnsignedInt;
8585
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
86+
fn unsigned_abs(self) -> Self::UnsignedInt;
8687

8788
fn from_bool(b: bool) -> Self;
8889

@@ -178,7 +179,6 @@ macro_rules! int_impl_common {
178179
fn wrapping_mul(self, other: Self) -> Self {
179180
<Self>::wrapping_mul(self, other)
180181
}
181-
182182
fn wrapping_sub(self, other: Self) -> Self {
183183
<Self>::wrapping_sub(self, other)
184184
}
@@ -235,6 +235,10 @@ macro_rules! int_impl {
235235
me
236236
}
237237

238+
fn unsigned_abs(self) -> Self {
239+
self
240+
}
241+
238242
fn abs_diff(self, other: Self) -> Self {
239243
if self < other {
240244
other.wrapping_sub(self)
@@ -268,6 +272,10 @@ macro_rules! int_impl {
268272
me as $ity
269273
}
270274

275+
fn unsigned_abs(self) -> Self::UnsignedInt {
276+
self.unsigned_abs()
277+
}
278+
271279
fn abs_diff(self, other: Self) -> $uty {
272280
self.wrapping_sub(other).wrapping_abs() as $uty
273281
}

testcrate/tests/conv.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use compiler_builtins::float::Float;
88
use rustc_apfloat::{Float as _, FloatConvert as _};
99
use testcrate::*;
1010

11-
mod int_to_float {
11+
mod i_to_f {
1212
use super::*;
1313

1414
macro_rules! i_to_f {

0 commit comments

Comments
 (0)