Skip to content

Commit 3c0003c

Browse files
committed
Add in heavy optimizations for parsing integers.
This improves both the parsing of sign and digits for better performance, especially with non-decimal radices and larger digit counts.
1 parent 1bf721e commit 3c0003c

File tree

9 files changed

+539
-431
lines changed

9 files changed

+539
-431
lines changed

lexical-parse-float/src/lib.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
//!
7979
//! let value = "1.234e+300";
8080
//! let result = f64::from_lexical_with_options::<FORMAT>(value.as_bytes(), &OPTIONS);
81-
//! assert_eq!(result, Err(Error::MissingSign(0)));
81+
//! assert_eq!(result, Err(Error::MissingMantissaSign(0)));
8282
//! # }
8383
//! ```
8484
//!
@@ -132,7 +132,7 @@
132132
//! }
133133
//!
134134
//! assert_eq!(parse_json_float(b"-1"), Ok(-1.0));
135-
//! assert_eq!(parse_json_float(b"+1"), Err(Error::InvalidPositiveSign(0)));
135+
//! assert_eq!(parse_json_float(b"+1"), Err(Error::InvalidPositiveMantissaSign(0)));
136136
//! assert_eq!(parse_json_float(b"1"), Ok(1.0));
137137
//! assert_eq!(parse_json_float(b"1."), Err(Error::EmptyFraction(2)));
138138
//! assert_eq!(parse_json_float(b"0.1"), Ok(0.1));
@@ -193,7 +193,7 @@
193193
//! assert_eq!(value.map(|x| x.is_nan()), Ok(true));
194194
//!
195195
//! let value = f64::from_lexical_with_options::<FORMAT>(b"+1_2.3_4", &OPTIONS);
196-
//! assert_eq!(value, Err(Error::InvalidPositiveSign(0)));
196+
//! assert_eq!(value, Err(Error::InvalidPositiveMantissaSign(0)));
197197
//!
198198
//! let value = f64::from_lexical_with_options::<FORMAT>(b"0.3_4", &OPTIONS);
199199
//! assert_eq!(value, Ok(0.34));
@@ -566,9 +566,6 @@ mod table_lemire;
566566
mod table_radix;
567567
mod table_small;
568568

569-
#[macro_use(parse_sign)]
570-
extern crate lexical_parse_integer;
571-
572569
// Re-exports
573570
#[cfg(feature = "f16")]
574571
pub use lexical_util::bf16::bf16;

lexical-parse-float/src/parse.rs

Lines changed: 40 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,35 @@ macro_rules! check_radix {
7575
}};
7676
}
7777

78+
/// If a buffer is empty, return the value or an error.
79+
macro_rules! maybe_into_empty {
80+
($iter:expr, $into_ok:ident) => {{
81+
let mut iter = $iter;
82+
let format = NumberFormat::<FORMAT> {};
83+
if iter.is_consumed() {
84+
if format.required_integer_digits() || format.required_mantissa_digits() {
85+
return Err(Error::Empty(iter.cursor()));
86+
} else {
87+
return $into_ok!(F::ZERO, iter.cursor());
88+
}
89+
}
90+
}};
91+
}
92+
93+
/// Return an value for a complete parser.
94+
macro_rules! into_ok_complete {
95+
($value:expr, $index:expr) => {{
96+
Ok($value)
97+
}};
98+
}
99+
100+
/// Return an value and index for a partial parser.
101+
macro_rules! into_ok_partial {
102+
($value:expr, $index:expr) => {{
103+
Ok(($value, $index))
104+
}};
105+
}
106+
78107
/// Parse integer trait, implemented in terms of the optimized back-end.
79108
pub trait ParseFloat: LemireFloat {
80109
/// Forward complete parser parameters to the backend.
@@ -168,36 +197,6 @@ parse_float_as_f32! { bf16 f16 }
168197
// different internally. Most of the code is shared, so the duplicated
169198
// code is only like 30 lines.
170199

171-
/// Parse the sign from the leading digits.
172-
#[cfg_attr(not(feature = "compact"), inline(always))]
173-
pub fn parse_mantissa_sign<const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
174-
let format = NumberFormat::<{ FORMAT }> {};
175-
parse_sign!(
176-
byte,
177-
true,
178-
integer_iter,
179-
format.no_positive_mantissa_sign(),
180-
format.required_mantissa_sign(),
181-
InvalidPositiveSign,
182-
MissingSign
183-
)
184-
}
185-
186-
/// Parse the sign from the leading digits.
187-
#[cfg_attr(not(feature = "compact"), inline(always))]
188-
pub fn parse_exponent_sign<const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
189-
let format = NumberFormat::<{ FORMAT }> {};
190-
parse_sign!(
191-
byte,
192-
true,
193-
exponent_iter,
194-
format.no_positive_exponent_sign(),
195-
format.required_exponent_sign(),
196-
InvalidPositiveExponentSign,
197-
MissingExponentSign
198-
)
199-
}
200-
201200
/// Utility to extract the result and handle any errors from parsing a `Number`.
202201
///
203202
/// - `format` - The numerical format as a packed integer
@@ -251,15 +250,8 @@ pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
251250
options: &Options,
252251
) -> Result<F> {
253252
let mut byte = bytes.bytes::<{ FORMAT }>();
254-
let format = NumberFormat::<FORMAT> {};
255-
let is_negative = parse_mantissa_sign(&mut byte)?;
256-
if byte.integer_iter().is_consumed() {
257-
if format.required_integer_digits() || format.required_mantissa_digits() {
258-
return Err(Error::Empty(byte.cursor()));
259-
} else {
260-
return Ok(F::ZERO);
261-
}
262-
}
253+
let is_negative = byte.read_mantissa_sign()?;
254+
maybe_into_empty!(byte.integer_iter(), into_ok_complete);
263255

264256
// Parse our a small representation of our number.
265257
let num: Number<'_> =
@@ -293,15 +285,8 @@ pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
293285
options: &Options,
294286
) -> Result<F> {
295287
let mut byte = bytes.bytes::<{ FORMAT }>();
296-
let format = NumberFormat::<FORMAT> {};
297-
let is_negative = parse_mantissa_sign(&mut byte)?;
298-
if byte.integer_iter().is_consumed() {
299-
if format.required_integer_digits() || format.required_mantissa_digits() {
300-
return Err(Error::Empty(byte.cursor()));
301-
} else {
302-
return Ok(F::ZERO);
303-
}
304-
}
288+
let is_negative = byte.read_mantissa_sign()?;
289+
maybe_into_empty!(byte.integer_iter(), into_ok_complete);
305290

306291
// Parse our a small representation of our number.
307292
let num =
@@ -317,15 +302,8 @@ pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
317302
options: &Options,
318303
) -> Result<(F, usize)> {
319304
let mut byte = bytes.bytes::<{ FORMAT }>();
320-
let format = NumberFormat::<FORMAT> {};
321-
let is_negative = parse_mantissa_sign(&mut byte)?;
322-
if byte.integer_iter().is_consumed() {
323-
if format.required_integer_digits() || format.required_mantissa_digits() {
324-
return Err(Error::Empty(byte.cursor()));
325-
} else {
326-
return Ok((F::ZERO, byte.cursor()));
327-
}
328-
}
305+
let is_negative = byte.read_mantissa_sign()?;
306+
maybe_into_empty!(byte.integer_iter(), into_ok_partial);
329307

330308
// Parse our a small representation of our number.
331309
let (num, count) = parse_number!(
@@ -365,15 +343,8 @@ pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
365343
options: &Options,
366344
) -> Result<(F, usize)> {
367345
let mut byte = bytes.bytes::<{ FORMAT }>();
368-
let format = NumberFormat::<FORMAT> {};
369-
let is_negative = parse_mantissa_sign(&mut byte)?;
370-
if byte.integer_iter().is_consumed() {
371-
if format.required_integer_digits() || format.required_mantissa_digits() {
372-
return Err(Error::Empty(byte.cursor()));
373-
} else {
374-
return Ok((F::ZERO, byte.cursor()));
375-
}
376-
}
346+
let is_negative = byte.read_mantissa_sign()?;
347+
maybe_into_empty!(byte.integer_iter(), into_ok_partial);
377348

378349
// Parse our a small representation of our number.
379350
let (num, count) = parse_number!(
@@ -532,12 +503,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
532503
let bits_per_base = shared::log2(format.exponent_base()) as i64;
533504

534505
// skip and validate an optional base prefix
535-
let has_base_prefix = cfg!(feature = "format") && byte.integer_iter().read_base_prefix();
536-
if cfg!(feature = "format") && has_base_prefix {
537-
if byte.is_buffer_empty() && format.required_integer_digits() {
538-
return Err(Error::EmptyInteger(byte.cursor()));
539-
}
540-
} else if format.required_base_prefix() {
506+
let has_base_prefix = cfg!(feature = "format") && byte.read_base_prefix();
507+
if cfg!(feature = "format") && !has_base_prefix && format.required_base_prefix() {
541508
return Err(Error::MissingBasePrefix(byte.cursor()));
542509
}
543510

@@ -712,7 +679,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
712679
}
713680
}
714681

715-
let is_negative_exponent = parse_exponent_sign(&mut byte)?;
682+
let is_negative_exponent = byte.read_exponent_sign()?;
716683
let mut exponent_iter = byte.exponent_iter();
717684
let exponent_start = exponent_iter.digits();
718685
parse_digits(&mut exponent_iter, format.exponent_radix(), |digit| {
@@ -738,7 +705,6 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
738705
// Check to see if we have a valid base suffix.
739706
// We've already trimmed any leading digit separators here, so we can be safe
740707
// that the first character **is not** a digit separator.
741-
// TODO: Improve parsing of this using a base suffix method
742708
if cfg!(all(feature = "format", feature = "power-of-two")) && format.has_base_suffix() {
743709
let base_suffix = format.base_suffix();
744710
let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix());

lexical-parse-float/tests/api_tests.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -858,23 +858,23 @@ fn f64_no_exponent_notation_test() {
858858
fn f64_optional_exponent_test() {
859859
const FORMAT: u128 = format::PERMISSIVE;
860860
const OPTIONS: Options = Options::new();
861-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e7", &OPTIONS).is_ok());
862-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-7", &OPTIONS).is_ok());
863-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e", &OPTIONS).is_ok());
864-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-", &OPTIONS).is_ok());
865-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0", &OPTIONS).is_ok());
861+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e7", &OPTIONS), Ok(3.0e7));
862+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-7", &OPTIONS), Ok(3.0e-7));
863+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e", &OPTIONS), Ok(3.0));
864+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-", &OPTIONS), Ok(3.0));
865+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0", &OPTIONS), Ok(3.0));
866866
}
867867

868868
#[test]
869869
#[cfg(feature = "format")]
870870
fn f64_required_exponent_test() {
871871
const FORMAT: u128 = rebuild(format::PERMISSIVE).required_exponent_digits(true).build_strict();
872872
const OPTIONS: Options = Options::new();
873-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e7", &OPTIONS).is_ok());
874-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-7", &OPTIONS).is_ok());
873+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e7", &OPTIONS), Ok(3.0e7));
874+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-7", &OPTIONS), Ok(3.0e-7));
875875
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e", &OPTIONS).is_err());
876876
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0e-", &OPTIONS).is_err());
877-
assert!(f64::from_lexical_with_options::<FORMAT>(b"+3.0", &OPTIONS).is_ok());
877+
assert_eq!(f64::from_lexical_with_options::<FORMAT>(b"+3.0", &OPTIONS), Ok(3.0));
878878
}
879879

880880
#[test]

0 commit comments

Comments
 (0)