Skip to content

Commit d8ec91f

Browse files
committed
Migrate to using raw digits to simplify our APIs.
This keeps our digit counts separate for each component rather than keep them as one of a larger memo, since we can handle eac individual digit separator component correctly like this, and it simplifies and improves performance when one component doesn't allow digit separators.
1 parent 28f4755 commit d8ec91f

File tree

14 files changed

+464
-296
lines changed

14 files changed

+464
-296
lines changed

lexical-parse-float/src/binary.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ pub fn binary<F: RawFloat, const FORMAT: u128>(num: &Number, lossy: bool) -> Ext
101101
#[cfg_attr(not(feature = "compact"), inline(always))]
102102
#[allow(unused_mut)]
103103
pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
104-
mut iter: Iter,
104+
iter: &mut Iter,
105105
mantissa: &mut u64,
106106
step: &mut usize,
107107
overflowed: &mut bool,
@@ -118,7 +118,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
118118
debug_assert!(radix < 16, "larger radices will wrap on radix^8");
119119
let radix8 = format.radix8() as u64;
120120
while *step > 8 {
121-
if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
121+
if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(iter) {
122122
*mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
123123
*step -= 8;
124124
} else {
@@ -169,9 +169,10 @@ pub fn slow_binary<F: RawFloat, const FORMAT: u128>(num: Number) -> ExtendedFloa
169169
// Parse the integer digits.
170170
let mut step = u64_step(radix);
171171
let mut integer = num.integer.bytes::<FORMAT>();
172-
integer.integer_iter().skip_zeros();
172+
let mut integer_iter = integer.integer_iter();
173+
integer_iter.skip_zeros();
173174
parse_u64_digits::<_, FORMAT>(
174-
integer.integer_iter(),
175+
&mut integer_iter,
175176
&mut mantissa,
176177
&mut step,
177178
&mut overflow,
@@ -181,11 +182,12 @@ pub fn slow_binary<F: RawFloat, const FORMAT: u128>(num: Number) -> ExtendedFloa
181182
// Parse the fraction digits.
182183
if let Some(fraction) = num.fraction {
183184
let mut fraction = fraction.bytes::<FORMAT>();
185+
let mut fraction_iter = fraction.fraction_iter();
184186
if mantissa == 0 {
185-
fraction.fraction_iter().skip_zeros();
187+
fraction_iter.skip_zeros();
186188
}
187189
parse_u64_digits::<_, FORMAT>(
188-
fraction.fraction_iter(),
190+
&mut fraction_iter,
189191
&mut mantissa,
190192
&mut step,
191193
&mut overflow,

lexical-parse-float/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
//! [Comprehensive Configuration]: #comprehensive-configuration
6060
//!
6161
//! ```rust
62-
//! # #[cfg(feature = "radix")] {
62+
//! # #[cfg(feature = "format")] {
6363
//! # use core::str;
6464
//! use lexical_parse_float::{Error, FromLexicalWithOptions, NumberFormatBuilder, Options};
6565
//!

lexical-parse-float/src/parse.rs

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
541541
// NOTE: `lz_prefix` is if we had a leading zero when
542542
// checking for a base prefix: it is not if the prefix
543543
// exists or not.
544+
// TODO: MIGRATE TO BASE PREFIX LOGIC
544545
#[allow(unused_variables)]
545546
let mut lz_prefix = false;
546547
#[cfg(all(feature = "format", feature = "power-of-two"))]
@@ -568,12 +569,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
568569
// Parse our integral digits.
569570
let mut mantissa = 0_u64;
570571
let start = byte.clone();
572+
let mut integer_iter = byte.integer_iter();
573+
let start_count = integer_iter.digits();
571574
#[cfg(not(feature = "compact"))]
572-
parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
573-
parse_digits(byte.integer_iter(), format.mantissa_radix(), |digit| {
575+
parse_8digits::<_, FORMAT>(&mut integer_iter, &mut mantissa);
576+
parse_digits(&mut integer_iter, format.mantissa_radix(), |digit| {
574577
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
575578
});
576-
let mut n_digits = byte.current_count() - start.current_count();
579+
let mut n_digits = integer_iter.digits_since(start_count);
577580
#[cfg(feature = "format")]
578581
let n_before_dot = n_digits;
579582
#[cfg(feature = "format")]
@@ -598,7 +601,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
598601
// SAFETY: safe, since `n_digits <= start.as_slice().len()`.
599602
// This is since `byte.len() >= start.len()` but has to have
600603
// the same end bounds (that is, `start = byte.clone()`), so
601-
// `0 <= byte.current_count() <= start.current_count() <= start.lent()`
604+
// `0 <= byte.digits() <= start.digits() <= start.len()`
602605
// so, this will always return only the integer digits.
603606
//
604607
// NOTE: Removing this code leads to ~10% reduction in parsing
@@ -609,7 +612,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
609612
// Check if integer leading zeros are disabled.
610613
#[cfg(feature = "format")]
611614
if !lz_prefix && format.no_float_leading_zeros() {
612-
if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') {
615+
let mut integer = integer_digits.bytes::<FORMAT>();
616+
if integer_digits.len() > 1 && integer.integer_iter().peek() == Some(&b'0') {
613617
return Err(Error::InvalidLeadingZeros(start.cursor()));
614618
}
615619
}
@@ -627,17 +631,19 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
627631
// SAFETY: byte cannot be empty due to `first_is`
628632
unsafe { byte.step_unchecked() };
629633
let before = byte.clone();
634+
let mut fraction_iter = byte.fraction_iter();
635+
let start_count = fraction_iter.digits();
630636
#[cfg(not(feature = "compact"))]
631-
parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa);
632-
parse_digits(byte.fraction_iter(), format.mantissa_radix(), |digit| {
637+
parse_8digits::<_, FORMAT>(&mut fraction_iter, &mut mantissa);
638+
parse_digits(&mut fraction_iter, format.mantissa_radix(), |digit| {
633639
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
634640
});
635-
n_after_dot = byte.current_count() - before.current_count();
641+
n_after_dot = fraction_iter.digits_since(start_count);
636642
// NOTE: We can't use the number of digits to extract the slice for
637643
// non-contiguous iterators, but we also need to the number of digits
638644
// for our value calculation. We store both, and let the compiler know
639645
// to optimize it out when not needed.
640-
let b_after_dot = if cfg!(feature = "format") && !byte.fraction_iter().is_contiguous() {
646+
let b_after_dot = if cfg!(feature = "format") && !fraction_iter.is_contiguous() {
641647
byte.cursor() - before.cursor()
642648
} else {
643649
n_after_dot
@@ -672,9 +678,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
672678

673679
// check to see if we have any invalid leading zeros
674680
n_digits += n_after_dot;
675-
if format.required_mantissa_digits()
676-
&& (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0))
677-
{
681+
if format.required_mantissa_digits() && n_digits == 0 {
678682
let any_digits = start.clone().integer_iter().peek().is_some();
679683
// NOTE: This is because numbers like `_12.34` have significant digits,
680684
// they just don't have a valid digit (#97).
@@ -731,14 +735,15 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
731735
}
732736

733737
let is_negative_exponent = parse_exponent_sign(&mut byte)?;
734-
let before = byte.current_count();
735-
parse_digits(byte.exponent_iter(), format.exponent_radix(), |digit| {
738+
let mut exponent_iter = byte.exponent_iter();
739+
let start_count = exponent_iter.digits();
740+
parse_digits(&mut exponent_iter, format.exponent_radix(), |digit| {
736741
if explicit_exponent < 0x10000000 {
737742
explicit_exponent *= format.exponent_radix() as i64;
738743
explicit_exponent += digit as i64;
739744
}
740745
});
741-
if format.required_exponent_digits() && byte.current_count() - before == 0 {
746+
if format.required_exponent_digits() && exponent_iter.digits_since(start_count) == 0 {
742747
return Err(Error::EmptyExponent(byte.cursor()));
743748
}
744749
// Handle our sign, and get the explicit part of the exponent.
@@ -755,10 +760,10 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
755760
// Check to see if we have a valid base suffix.
756761
// We've already trimmed any leading digit separators here, so we can be safe
757762
// that the first character **is not** a digit separator.
758-
#[allow(unused_variables)]
759-
let base_suffix = format.base_suffix();
763+
// FIXME: Improve parsing of this
760764
#[cfg(all(feature = "format", feature = "power-of-two"))]
761-
if base_suffix != 0 {
765+
if format.has_base_suffix() {
766+
let base_suffix = format.base_suffix();
762767
let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix());
763768
if is_suffix {
764769
// SAFETY: safe since `byte.len() >= 1`.
@@ -814,8 +819,9 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
814819
let mut integer = integer_digits.bytes::<{ FORMAT }>();
815820
// Skip leading zeros, so we can use the step properly.
816821
let mut integer_iter = integer.integer_iter();
822+
let integer_start = integer_iter.digits();
817823
integer_iter.skip_zeros();
818-
parse_u64_digits::<_, FORMAT>(integer_iter, &mut mantissa, &mut step);
824+
parse_u64_digits::<_, FORMAT>(&mut integer_iter, &mut mantissa, &mut step);
819825
// NOTE: With the format feature enabled and non-contiguous iterators, we can
820826
// have null fraction digits even if step was not 0. We want to make the
821827
// none check as late in there as possible: any of them should
@@ -828,7 +834,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
828834
|| (cfg!(feature = "format") && !byte.is_contiguous() && fraction_digits.is_none())
829835
{
830836
// Filled our mantissa with just the integer.
831-
int_end - integer.current_count() as i64
837+
int_end - integer_iter.digits_since(integer_start) as i64
832838
} else {
833839
// We know this can't be a None since we had more than 19
834840
// digits previously, so we overflowed a 64-bit integer,
@@ -837,12 +843,13 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
837843
// point, and at least 1 fractional digit.
838844
let mut fraction = fraction_digits.unwrap().bytes::<{ FORMAT }>();
839845
let mut fraction_iter = fraction.fraction_iter();
846+
let fraction_start = fraction_iter.digits();
840847
// Skip leading zeros, so we can use the step properly.
841848
if mantissa == 0 {
842849
fraction_iter.skip_zeros();
843850
}
844-
parse_u64_digits::<_, FORMAT>(fraction_iter, &mut mantissa, &mut step);
845-
-(fraction.current_count() as i64)
851+
parse_u64_digits::<_, FORMAT>(&mut fraction_iter, &mut mantissa, &mut step);
852+
-(fraction_iter.digits_since(fraction_start) as i64)
846853
};
847854
if format.mantissa_radix() == format.exponent_base() {
848855
exponent = implicit_exponent;
@@ -898,7 +905,7 @@ pub fn parse_complete_number<'a, const FORMAT: u128>(
898905

899906
/// Iteratively parse and consume digits from bytes.
900907
#[inline(always)]
901-
pub fn parse_digits<'a, Iter, Cb>(mut iter: Iter, radix: u32, mut cb: Cb)
908+
pub fn parse_digits<'a, Iter, Cb>(iter: &mut Iter, radix: u32, mut cb: Cb)
902909
where
903910
Iter: DigitsIter<'a>,
904911
Cb: FnMut(u32),
@@ -923,7 +930,7 @@ where
923930
/// The iterator must be of the significant digits, not the exponent.
924931
#[inline(always)]
925932
#[cfg(not(feature = "compact"))]
926-
pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64)
933+
pub fn parse_8digits<'a, Iter, const FORMAT: u128>(iter: &mut Iter, mantissa: &mut u64)
927934
where
928935
Iter: DigitsIter<'a>,
929936
{
@@ -934,7 +941,7 @@ where
934941
let radix8 = format.radix8() as u64;
935942
// Can do up to 2 iterations without overflowing, however, for large
936943
// inputs, this is much faster than any other alternative.
937-
while let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
944+
while let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(iter) {
938945
*mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
939946
}
940947
}
@@ -948,7 +955,7 @@ where
948955
/// must be of the significant digits, not the exponent.
949956
#[cfg_attr(not(feature = "compact"), inline(always))]
950957
pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
951-
mut iter: Iter,
958+
iter: &mut Iter,
952959
mantissa: &mut u64,
953960
step: &mut usize,
954961
) where
@@ -963,7 +970,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
963970
debug_assert!(radix < 16, "radices over 16 will overflow with radix^8");
964971
let radix8 = format.radix8() as u64;
965972
while *step > 8 {
966-
if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
973+
if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(iter) {
967974
*mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
968975
*step -= 8;
969976
} else {

lexical-parse-float/tests/api_tests.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ fn f64_exponent_leading_digit_separator_test() {
10511051
.build_strict();
10521052
const OPTIONS: Options = Options::new();
10531053
assert!(f64::from_lexical_with_options::<FORMAT>(b"31.01e7_1", &OPTIONS).is_err());
1054-
assert!(f64::from_lexical_with_options::<FORMAT>(b"31.01e_71", &OPTIONS).is_ok());
1054+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"31.01e_71", &OPTIONS), Ok(31.01e71));
10551055
assert!(f64::from_lexical_with_options::<FORMAT>(b"31.01e71_", &OPTIONS).is_err());
10561056
}
10571057

@@ -1343,3 +1343,46 @@ fn require_base_prefix_test() {
13431343
let value = f64::from_lexical_with_options::<SUFFIX>(b"-0d12345", &OPTIONS);
13441344
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
13451345
}
1346+
1347+
#[test]
1348+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1349+
fn base_prefix_digit_separator_edge_cases_test() {
1350+
use core::num;
1351+
1352+
const OPTIONS: Options = Options::new();
1353+
const NO_PREFIX: u128 = NumberFormatBuilder::new()
1354+
.digit_separator(num::NonZeroU8::new(b'_'))
1355+
.leading_digit_separator(true)
1356+
.build_strict();
1357+
1358+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"_+12345", &OPTIONS);
1359+
assert_eq!(value, Err(Error::InvalidDigit(1)));
1360+
1361+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+_12345", &OPTIONS);
1362+
assert_eq!(value, Ok(12345.0));
1363+
1364+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+12345e_+23", &OPTIONS);
1365+
assert_eq!(value, Err(Error::EmptyExponent(8)));
1366+
1367+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+12345e+_23", &OPTIONS);
1368+
assert_eq!(value, Ok(1.2345e27));
1369+
1370+
const PREFIX: u128 = NumberFormatBuilder::new()
1371+
.digit_separator(num::NonZeroU8::new(b'_'))
1372+
.base_prefix(num::NonZeroU8::new(b'd'))
1373+
.required_base_prefix(true)
1374+
.leading_digit_separator(true)
1375+
.build_strict();
1376+
1377+
let value = f64::from_lexical_with_options::<PREFIX>(b"_+0d12345", &OPTIONS);
1378+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1379+
1380+
let value = f64::from_lexical_with_options::<PREFIX>(b"+_0d12345", &OPTIONS);
1381+
assert_eq!(value, Ok(12345.0));
1382+
1383+
// TODO: This fails
1384+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d_12345", &OPTIONS);
1385+
assert_eq!(value, Ok(12345.0));
1386+
1387+
// TODO:> Add suffix
1388+
}

lexical-parse-float/tests/parse_tests.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ fn parse_digits_test() {
154154
let mut mantissa: u64 = 0;
155155
let digits = b"1234567890123456789012345";
156156
let mut byte = digits.bytes::<{ FORMAT }>();
157-
parse::parse_digits(byte.integer_iter(), 10, |digit| {
157+
parse::parse_digits(&mut byte.integer_iter(), 10, |digit| {
158158
mantissa = mantissa.wrapping_mul(10).wrapping_add(digit as _);
159159
});
160160
assert_eq!(mantissa, 1096246371337559929);
@@ -167,7 +167,7 @@ fn parse_8digits_test() {
167167
let mut mantissa: u64 = 0;
168168
let digits = b"1234567890123456789012345";
169169
let mut byte = digits.bytes::<{ FORMAT }>();
170-
parse::parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
170+
parse::parse_8digits::<_, FORMAT>(&mut byte.integer_iter(), &mut mantissa);
171171
// We don't check for overflow.
172172
assert_eq!(mantissa, 11177671081359486962);
173173
}
@@ -179,15 +179,15 @@ fn parse_u64_digits_test() {
179179
let mut step = u64_step(10);
180180
let digits = b"1234567890123456789012345";
181181
let mut byte = digits.bytes::<{ FORMAT }>();
182-
parse::parse_u64_digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa, &mut step);
182+
parse::parse_u64_digits::<_, FORMAT>(&mut byte.integer_iter(), &mut mantissa, &mut step);
183183
assert_eq!(mantissa, 1234567890123456789);
184184
assert_eq!(step, 0);
185185

186186
let mut mantissa: u64 = 0;
187187
let mut step = u64_step(10);
188188
let digits = b"1234567890123456789";
189189
let mut byte = digits.bytes::<{ FORMAT }>();
190-
parse::parse_u64_digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa, &mut step);
190+
parse::parse_u64_digits::<_, FORMAT>(&mut byte.integer_iter(), &mut mantissa, &mut step);
191191
assert_eq!(mantissa, 1234567890123456789);
192192
assert_eq!(step, 0);
193193
}

0 commit comments

Comments
 (0)