Skip to content

Commit 6102fc3

Browse files
committed
Changed dec2flt to use the Eisel-Lemire algorithm.
Implementation is based off fast-float-rust, with a few notable changes. - Some unsafe methods have been removed. - Safe methods with inherently unsafe functionality have been removed. - All unsafe functionality is documented and provably safe. - Extensive documentation has been added for simpler maintenance. - Inline annotations on internal routines has been removed. - Fixed Python errors in src/etc/test-float-parse/runtests.py. - Updated test-float-parse to be a library, to avoid missing rand dependency. - Added regression tests for rust-lang#31109 and rust-lang#31407 in core tests. - Added regression tests for rust-lang#31109 and rust-lang#31407 in ui tests. - Use the existing slice primitive to simplify shared dec2flt methods - Remove Miri ignores from dec2flt, due to faster parsing times. - resolves rust-lang#85198 - resolves rust-lang#85214 - resolves rust-lang#85234 - fixes rust-lang#31407 - fixes rust-lang#31109 - fixes rust-lang#53015 - resolves rust-lang#68396 - closes aldanor/fast-float-rust#15
1 parent a857516 commit 6102fc3

File tree

19 files changed

+2383
-2596
lines changed

19 files changed

+2383
-2596
lines changed

core/src/num/dec2flt/algorithm.rs

Lines changed: 0 additions & 429 deletions
This file was deleted.

core/src/num/dec2flt/common.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
//! Common utilities, for internal use only.
2+
3+
use crate::ptr;
4+
5+
/// Helper methods to process immutable bytes.
6+
pub(crate) trait ByteSlice: AsRef<[u8]> {
7+
unsafe fn first_unchecked(&self) -> u8 {
8+
debug_assert!(!self.is_empty());
9+
// SAFETY: safe as long as self is not empty
10+
unsafe { *self.as_ref().get_unchecked(0) }
11+
}
12+
13+
/// Get if the slice contains no elements.
14+
fn is_empty(&self) -> bool {
15+
self.as_ref().is_empty()
16+
}
17+
18+
/// Check if the slice at least `n` length.
19+
fn check_len(&self, n: usize) -> bool {
20+
n <= self.as_ref().len()
21+
}
22+
23+
/// Check if the first character in the slice is equal to c.
24+
fn first_is(&self, c: u8) -> bool {
25+
self.as_ref().first() == Some(&c)
26+
}
27+
28+
/// Check if the first character in the slice is equal to c1 or c2.
29+
fn first_is2(&self, c1: u8, c2: u8) -> bool {
30+
if let Some(&c) = self.as_ref().first() { c == c1 || c == c2 } else { false }
31+
}
32+
33+
/// Bounds-checked test if the first character in the slice is a digit.
34+
fn first_isdigit(&self) -> bool {
35+
if let Some(&c) = self.as_ref().first() { c.is_ascii_digit() } else { false }
36+
}
37+
38+
/// Check if self starts with u with a case-insensitive comparison.
39+
fn eq_ignore_case(&self, u: &[u8]) -> bool {
40+
debug_assert!(self.as_ref().len() >= u.len());
41+
let iter = self.as_ref().iter().zip(u.iter());
42+
let d = iter.fold(0, |i, (&x, &y)| i | (x ^ y));
43+
d == 0 || d == 32
44+
}
45+
46+
/// Get the remaining slice after the first N elements.
47+
fn advance(&self, n: usize) -> &[u8] {
48+
&self.as_ref()[n..]
49+
}
50+
51+
/// Get the slice after skipping all leading characters equal c.
52+
fn skip_chars(&self, c: u8) -> &[u8] {
53+
let mut s = self.as_ref();
54+
while s.first_is(c) {
55+
s = s.advance(1);
56+
}
57+
s
58+
}
59+
60+
/// Get the slice after skipping all leading characters equal c1 or c2.
61+
fn skip_chars2(&self, c1: u8, c2: u8) -> &[u8] {
62+
let mut s = self.as_ref();
63+
while s.first_is2(c1, c2) {
64+
s = s.advance(1);
65+
}
66+
s
67+
}
68+
69+
/// Read 8 bytes as a 64-bit integer in little-endian order.
70+
unsafe fn read_u64_unchecked(&self) -> u64 {
71+
debug_assert!(self.check_len(8));
72+
let src = self.as_ref().as_ptr() as *const u64;
73+
// SAFETY: safe as long as self is at least 8 bytes
74+
u64::from_le(unsafe { ptr::read_unaligned(src) })
75+
}
76+
77+
/// Try to read the next 8 bytes from the slice.
78+
fn read_u64(&self) -> Option<u64> {
79+
if self.check_len(8) {
80+
// SAFETY: self must be at least 8 bytes.
81+
Some(unsafe { self.read_u64_unchecked() })
82+
} else {
83+
None
84+
}
85+
}
86+
87+
/// Calculate the offset of slice from another.
88+
fn offset_from(&self, other: &Self) -> isize {
89+
other.as_ref().len() as isize - self.as_ref().len() as isize
90+
}
91+
}
92+
93+
impl ByteSlice for [u8] {}
94+
95+
/// Helper methods to process mutable bytes.
96+
pub(crate) trait ByteSliceMut: AsMut<[u8]> {
97+
/// Write a 64-bit integer as 8 bytes in little-endian order.
98+
unsafe fn write_u64_unchecked(&mut self, value: u64) {
99+
debug_assert!(self.as_mut().len() >= 8);
100+
let dst = self.as_mut().as_mut_ptr() as *mut u64;
101+
// NOTE: we must use `write_unaligned`, since dst is not
102+
// guaranteed to be properly aligned. Miri will warn us
103+
// if we use `write` instead of `write_unaligned`, as expected.
104+
// SAFETY: safe as long as self is at least 8 bytes
105+
unsafe {
106+
ptr::write_unaligned(dst, u64::to_le(value));
107+
}
108+
}
109+
}
110+
111+
impl ByteSliceMut for [u8] {}
112+
113+
/// Bytes wrapper with specialized methods for ASCII characters.
114+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115+
pub(crate) struct AsciiStr<'a> {
116+
slc: &'a [u8],
117+
}
118+
119+
impl<'a> AsciiStr<'a> {
120+
pub fn new(slc: &'a [u8]) -> Self {
121+
Self { slc }
122+
}
123+
124+
/// Advance the view by n, advancing it in-place to (n..).
125+
pub unsafe fn step_by(&mut self, n: usize) -> &mut Self {
126+
// SAFETY: safe as long n is less than the buffer length
127+
self.slc = unsafe { self.slc.get_unchecked(n..) };
128+
self
129+
}
130+
131+
/// Advance the view by n, advancing it in-place to (1..).
132+
pub unsafe fn step(&mut self) -> &mut Self {
133+
// SAFETY: safe as long as self is not empty
134+
unsafe { self.step_by(1) }
135+
}
136+
137+
/// Iteratively parse and consume digits from bytes.
138+
pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) {
139+
while let Some(&c) = self.as_ref().first() {
140+
let c = c.wrapping_sub(b'0');
141+
if c < 10 {
142+
func(c);
143+
// SAFETY: self cannot be empty
144+
unsafe {
145+
self.step();
146+
}
147+
} else {
148+
break;
149+
}
150+
}
151+
}
152+
}
153+
154+
impl<'a> AsRef<[u8]> for AsciiStr<'a> {
155+
#[inline]
156+
fn as_ref(&self) -> &[u8] {
157+
self.slc
158+
}
159+
}
160+
161+
impl<'a> ByteSlice for AsciiStr<'a> {}
162+
163+
/// Determine if 8 bytes are all decimal digits.
164+
/// This does not care about the order in which the bytes were loaded.
165+
pub(crate) fn is_8digits(v: u64) -> bool {
166+
let a = v.wrapping_add(0x4646_4646_4646_4646);
167+
let b = v.wrapping_sub(0x3030_3030_3030_3030);
168+
(a | b) & 0x8080_8080_8080_8080 == 0
169+
}
170+
171+
/// Iteratively parse and consume digits from bytes.
172+
pub(crate) fn parse_digits(s: &mut &[u8], mut f: impl FnMut(u8)) {
173+
while let Some(&c) = s.get(0) {
174+
let c = c.wrapping_sub(b'0');
175+
if c < 10 {
176+
f(c);
177+
*s = s.advance(1);
178+
} else {
179+
break;
180+
}
181+
}
182+
}
183+
184+
/// A custom 64-bit floating point type, representing `f * 2^e`.
185+
/// e is biased, so it be directly shifted into the exponent bits.
186+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
187+
pub struct BiasedFp {
188+
/// The significant digits.
189+
pub f: u64,
190+
/// The biased, binary exponent.
191+
pub e: i32,
192+
}
193+
194+
impl BiasedFp {
195+
pub const fn zero_pow2(e: i32) -> Self {
196+
Self { f: 0, e }
197+
}
198+
}

0 commit comments

Comments
 (0)