Skip to content

Commit e757c85

Browse files
committed
Improve parsing floats to use our better base prefix logic.
1 parent d8ec91f commit e757c85

File tree

6 files changed

+95
-84
lines changed

6 files changed

+95
-84
lines changed

lexical-parse-float/src/parse.rs

Lines changed: 37 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,10 @@ pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
251251
options: &Options,
252252
) -> Result<F> {
253253
let mut byte = bytes.bytes::<{ FORMAT }>();
254+
let format = NumberFormat::<FORMAT> {};
254255
let is_negative = parse_mantissa_sign(&mut byte)?;
255256
if byte.integer_iter().is_consumed() {
256-
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
257-
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
258-
{
257+
if format.required_integer_digits() || format.required_mantissa_digits() {
259258
return Err(Error::Empty(byte.cursor()));
260259
} else {
261260
return Ok(F::ZERO);
@@ -294,11 +293,10 @@ pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
294293
options: &Options,
295294
) -> Result<F> {
296295
let mut byte = bytes.bytes::<{ FORMAT }>();
296+
let format = NumberFormat::<FORMAT> {};
297297
let is_negative = parse_mantissa_sign(&mut byte)?;
298298
if byte.integer_iter().is_consumed() {
299-
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
300-
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
301-
{
299+
if format.required_integer_digits() || format.required_mantissa_digits() {
302300
return Err(Error::Empty(byte.cursor()));
303301
} else {
304302
return Ok(F::ZERO);
@@ -319,11 +317,10 @@ pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
319317
options: &Options,
320318
) -> Result<(F, usize)> {
321319
let mut byte = bytes.bytes::<{ FORMAT }>();
320+
let format = NumberFormat::<FORMAT> {};
322321
let is_negative = parse_mantissa_sign(&mut byte)?;
323322
if byte.integer_iter().is_consumed() {
324-
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
325-
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
326-
{
323+
if format.required_integer_digits() || format.required_mantissa_digits() {
327324
return Err(Error::Empty(byte.cursor()));
328325
} else {
329326
return Ok((F::ZERO, byte.cursor()));
@@ -368,11 +365,10 @@ pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
368365
options: &Options,
369366
) -> Result<(F, usize)> {
370367
let mut byte = bytes.bytes::<{ FORMAT }>();
368+
let format = NumberFormat::<FORMAT> {};
371369
let is_negative = parse_mantissa_sign(&mut byte)?;
372370
if byte.integer_iter().is_consumed() {
373-
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
374-
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
375-
{
371+
if format.required_integer_digits() || format.required_mantissa_digits() {
376372
return Err(Error::Empty(byte.cursor()));
377373
} else {
378374
return Ok((F::ZERO, byte.cursor()));
@@ -535,51 +531,31 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
535531
let bits_per_digit = shared::log2(format.mantissa_radix()) as i64;
536532
let bits_per_base = shared::log2(format.exponent_base()) as i64;
537533

538-
// INTEGER
539-
540-
// Check to see if we have a valid base prefix.
541-
// NOTE: `lz_prefix` is if we had a leading zero when
542-
// checking for a base prefix: it is not if the prefix
543-
// exists or not.
544-
// TODO: MIGRATE TO BASE PREFIX LOGIC
545-
#[allow(unused_variables)]
546-
let mut lz_prefix = false;
547-
#[cfg(all(feature = "format", feature = "power-of-two"))]
548-
{
549-
let base_prefix = format.base_prefix();
550-
let mut has_prefix = false;
551-
let mut iter = byte.integer_iter();
552-
if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
553-
// Check to see if the next character is the base prefix.
554-
// We must have a format like `0x`, `0d`, `0o`.
555-
// NOTE: The check for empty integer digits happens below so
556-
// we don't need a redundant check here.
557-
lz_prefix = true;
558-
let prefix = iter.read_if_value(base_prefix, format.case_sensitive_base_prefix());
559-
has_prefix = prefix.is_some();
560-
if has_prefix && iter.is_buffer_empty() && format.required_integer_digits() {
561-
return Err(Error::EmptyInteger(iter.cursor()));
562-
}
563-
}
564-
if format.required_base_prefix() && !has_prefix {
565-
return Err(Error::MissingBasePrefix(iter.cursor()));
534+
// skip and validate an optional base prefix
535+
let has_base_prefix = cfg!(feature = "format") && byte.integer_iter().read_base_prefix();
536+
if cfg!(feature = "format") && has_base_prefix {
537+
if byte.is_buffer_empty() && format.required_integer_digits() {
538+
return Err(Error::EmptyInteger(byte.cursor()));
566539
}
540+
} else if format.required_base_prefix() {
541+
return Err(Error::MissingBasePrefix(byte.cursor()));
567542
}
568543

569-
// Parse our integral digits.
570-
let mut mantissa = 0_u64;
544+
// INTEGER
545+
571546
let start = byte.clone();
547+
let mut mantissa = 0_u64;
572548
let mut integer_iter = byte.integer_iter();
573-
let start_count = integer_iter.digits();
549+
let integer_start = integer_iter.digits();
550+
551+
// Parse our integral digits.
574552
#[cfg(not(feature = "compact"))]
575553
parse_8digits::<_, FORMAT>(&mut integer_iter, &mut mantissa);
576554
parse_digits(&mut integer_iter, format.mantissa_radix(), |digit| {
577555
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
578556
});
579-
let mut n_digits = integer_iter.digits_since(start_count);
580-
#[cfg(feature = "format")]
557+
let mut n_digits = integer_iter.digits_since(integer_start);
581558
let n_before_dot = n_digits;
582-
#[cfg(feature = "format")]
583559
if format.required_integer_digits() && n_digits == 0 {
584560
return Err(Error::EmptyInteger(byte.cursor()));
585561
}
@@ -610,10 +586,13 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
610586
let integer_digits = unsafe { start.as_slice().get_unchecked(..b_digits) };
611587

612588
// Check if integer leading zeros are disabled.
613-
#[cfg(feature = "format")]
614-
if !lz_prefix && format.no_float_leading_zeros() {
589+
if cfg!(feature = "format")
590+
&& format.no_float_leading_zeros()
591+
&& !has_base_prefix
592+
&& n_before_dot > 1
593+
{
615594
let mut integer = integer_digits.bytes::<FORMAT>();
616-
if integer_digits.len() > 1 && integer.integer_iter().peek() == Some(&b'0') {
595+
if integer.integer_iter().peek() == Some(&b'0') {
617596
return Err(Error::InvalidLeadingZeros(start.cursor()));
618597
}
619598
}
@@ -632,13 +611,13 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
632611
unsafe { byte.step_unchecked() };
633612
let before = byte.clone();
634613
let mut fraction_iter = byte.fraction_iter();
635-
let start_count = fraction_iter.digits();
614+
let fraction_count = fraction_iter.digits();
636615
#[cfg(not(feature = "compact"))]
637616
parse_8digits::<_, FORMAT>(&mut fraction_iter, &mut mantissa);
638617
parse_digits(&mut fraction_iter, format.mantissa_radix(), |digit| {
639618
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
640619
});
641-
n_after_dot = fraction_iter.digits_since(start_count);
620+
n_after_dot = fraction_iter.digits_since(fraction_count);
642621
// NOTE: We can't use the number of digits to extract the slice for
643622
// non-contiguous iterators, but we also need to the number of digits
644623
// for our value calculation. We store both, and let the compiler know
@@ -674,7 +653,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
674653
// NOTE: Check if we have our exponent **BEFORE** checking if the
675654
// mantissa is empty, so we can ensure
676655
let has_exponent = byte
677-
.first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format"));
656+
.first_is(exponent_character, cfg!(feature = "format") && format.case_sensitive_exponent());
678657

679658
// check to see if we have any invalid leading zeros
680659
n_digits += n_after_dot;
@@ -701,8 +680,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
701680
unsafe { byte.step_unchecked() };
702681

703682
// Check float format syntax checks.
704-
#[cfg(feature = "format")]
705-
{
683+
if cfg!(feature = "format") {
706684
// NOTE: We've overstepped for the safety invariant before.
707685
if format.no_exponent_notation() {
708686
return Err(Error::InvalidExponent(byte.cursor() - 1));
@@ -736,14 +714,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
736714

737715
let is_negative_exponent = parse_exponent_sign(&mut byte)?;
738716
let mut exponent_iter = byte.exponent_iter();
739-
let start_count = exponent_iter.digits();
717+
let exponent_start = exponent_iter.digits();
740718
parse_digits(&mut exponent_iter, format.exponent_radix(), |digit| {
741719
if explicit_exponent < 0x10000000 {
742720
explicit_exponent *= format.exponent_radix() as i64;
743721
explicit_exponent += digit as i64;
744722
}
745723
});
746-
if format.required_exponent_digits() && exponent_iter.digits_since(start_count) == 0 {
724+
if format.required_exponent_digits() && exponent_iter.digits_since(exponent_start) == 0 {
747725
return Err(Error::EmptyExponent(byte.cursor()));
748726
}
749727
// Handle our sign, and get the explicit part of the exponent.
@@ -760,9 +738,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
760738
// Check to see if we have a valid base suffix.
761739
// We've already trimmed any leading digit separators here, so we can be safe
762740
// that the first character **is not** a digit separator.
763-
// FIXME: Improve parsing of this
764-
#[cfg(all(feature = "format", feature = "power-of-two"))]
765-
if format.has_base_suffix() {
741+
// TODO: Improve parsing of this using a base suffix method
742+
if cfg!(all(feature = "format", feature = "power-of-two")) && format.has_base_suffix() {
766743
let base_suffix = format.base_suffix();
767744
let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix());
768745
if is_suffix {
@@ -779,8 +756,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
779756
let end = byte.cursor();
780757
let mut step = u64_step(format.mantissa_radix());
781758
let mut many_digits = false;
782-
#[cfg(feature = "format")]
783-
if !format.required_mantissa_digits() && n_digits == 0 {
759+
if cfg!(feature = "format") && !format.required_mantissa_digits() && n_digits == 0 {
784760
exponent = 0;
785761
}
786762
if n_digits <= step {

lexical-parse-integer/src/algorithm.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -674,23 +674,23 @@ macro_rules! algorithm {
674674
let mut byte = $bytes.bytes::<FORMAT>();
675675
let format = NumberFormat::<FORMAT> {};
676676
let radix = format.mantissa_radix();
677+
debug_assert!(format.is_valid(), "should have already checked for an invalid number format");
677678

678679
let is_negative = parse_sign::<T, FORMAT>(&mut byte)?;
679680
let mut iter = byte.integer_iter();
680681
maybe_into_empty!(iter, $into_ok);
681682

682683
// skip and validate an optional base prefix
683-
#[cfg(all(feature = "format", feature = "power-of-two"))]
684-
if iter.read_base_prefix() {
684+
let has_base_prefix = cfg!(feature = "format") && iter.read_base_prefix();
685+
if cfg!(feature = "format") && has_base_prefix {
685686
maybe_into_empty!(iter, $into_ok);
686687
} else if format.required_base_prefix() {
687688
return Err(Error::MissingBasePrefix(iter.cursor()));
688689
}
689690

690691
// NOTE: always do a peek so any leading digit separators
691692
// are skipped, and we can get the correct index
692-
#[cfg(feature = "format")]
693-
if format.no_integer_leading_zeros() && iter.peek() == Some(&b'0') {
693+
if cfg!(feature = "format") && format.no_integer_leading_zeros() && !has_base_prefix && iter.peek() == Some(&b'0') {
694694
// NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature
695695
let index = iter.cursor();
696696
let zeros = iter.skip_zeros();
@@ -719,7 +719,6 @@ macro_rules! algorithm {
719719
// and even if parsing a 64-bit integer is marginally faster, it
720720
// culminates in **way** slower performance overall for simple
721721
// integers, and no improvement for large integers.
722-
#[allow(unused)]
723722
let mut has_suffix = false;
724723
// FIXME: This is only used for the parsing of the base suffix.
725724
#[allow(unused)]
@@ -776,8 +775,7 @@ macro_rules! algorithm {
776775
);
777776
}
778777

779-
#[cfg(all(feature = "format", feature = "power-of-two"))]
780-
if format.required_base_suffix() && !has_suffix {
778+
if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix {
781779
return Err(Error::MissingBaseSuffix(iter.cursor()));
782780
}
783781

lexical-util/src/feature_format.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -516,16 +516,20 @@ impl<const FORMAT: u128> NumberFormat<FORMAT> {
516516

517517
/// Get if leading zeros before an integer are not allowed.
518518
///
519-
/// Can only be modified with [`feature`][crate#features] `format`. Defaults
520-
/// to [`false`].
519+
/// Can only be modified with [`feature`][crate#features] `format`. This
520+
/// only applies if there is no base prefix: that is, the zeros are
521+
/// at the absolute start of the number. Defaults to [`false`].
521522
///
522523
/// # Examples
523524
///
525+
/// With a base prefix of `x`.
526+
///
524527
/// | Input | Valid? |
525528
/// |:-:|:-:|
526529
/// | `01` | ❌ |
527530
/// | `0` | ✔️ |
528531
/// | `10` | ✔️ |
532+
/// | `0x01` | ✔️ |
529533
///
530534
/// # Used For
531535
///
@@ -544,17 +548,21 @@ impl<const FORMAT: u128> NumberFormat<FORMAT> {
544548
///
545549
/// This is before the significant digits of the float, that is, if there is
546550
/// 1 or more digits in the integral component and the leading digit is 0,
547-
/// Can only be modified with [`feature`][crate#features] `format`. Defaults
548-
/// to [`false`].
551+
/// Can only be modified with [`feature`][crate#features] `format`. This
552+
/// only applies if there is no base prefix: that is, the zeros are
553+
/// at the absolute start of the number. Defaults to [`false`].
549554
///
550555
/// # Examples
551556
///
557+
/// With a base prefix of `x`.
558+
///
552559
/// | Input | Valid? |
553560
/// |:-:|:-:|
554561
/// | `01` | ❌ |
555562
/// | `01.0` | ❌ |
556563
/// | `0` | ✔️ |
557564
/// | `10` | ✔️ |
565+
/// | `0x01.0` | ✔️ |
558566
/// | `0.1` | ✔️ |
559567
///
560568
/// # Used For

0 commit comments

Comments
 (0)