Skip to content

Commit 91074ea

Browse files
committed
Improve parsing of base prefixes and suffixes.
This adds comprehensive tests, including correctness ones, as well as performance benchmarks to ensure optimal ASM is generated on x86_64, including re-arranging some code to optimize jumps.
1 parent 3c0003c commit 91074ea

File tree

12 files changed

+894
-228
lines changed

12 files changed

+894
-228
lines changed

lexical-parse-float/src/parse.rs

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -503,10 +503,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
503503
let bits_per_base = shared::log2(format.exponent_base()) as i64;
504504

505505
// skip and validate an optional base prefix
506-
let has_base_prefix = cfg!(feature = "format") && byte.read_base_prefix();
507-
if cfg!(feature = "format") && !has_base_prefix && format.required_base_prefix() {
508-
return Err(Error::MissingBasePrefix(byte.cursor()));
509-
}
506+
let has_base_prefix =
507+
cfg!(all(feature = "format", feature = "power-of-two")) && byte.read_base_prefix()?;
510508

511509
// INTEGER
512510

@@ -703,18 +701,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
703701
}
704702

705703
// Check to see if we have a valid base suffix.
706-
// We've already trimmed any leading digit separators here, so we can be safe
707-
// that the first character **is not** a digit separator.
708-
if cfg!(all(feature = "format", feature = "power-of-two")) && format.has_base_suffix() {
709-
let base_suffix = format.base_suffix();
710-
let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix());
711-
if is_suffix {
712-
// SAFETY: safe since `byte.len() >= 1`.
713-
unsafe { byte.step_unchecked() };
714-
} else if format.required_base_suffix() {
715-
return Err(Error::MissingBaseSuffix(byte.cursor()));
716-
}
717-
}
704+
_ = byte.read_base_suffix(has_exponent)?;
718705

719706
// CHECK OVERFLOW
720707

lexical-parse-float/tests/api_tests.rs

Lines changed: 313 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ fn require_base_prefix_test() {
13461346

13471347
#[test]
13481348
#[cfg(all(feature = "format", feature = "power-of-two"))]
1349-
fn base_prefix_digit_separator_edge_cases_test() {
1349+
fn base_prefix_no_digit_separator_test() {
13501350
use core::num;
13511351

13521352
const OPTIONS: Options = Options::new();
@@ -1355,34 +1355,331 @@ fn base_prefix_digit_separator_edge_cases_test() {
13551355
.leading_digit_separator(true)
13561356
.build_strict();
13571357

1358-
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"_+12345", &OPTIONS);
1358+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"_+12345.6", &OPTIONS);
13591359
assert_eq!(value, Err(Error::InvalidDigit(1)));
13601360

1361-
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+_12345", &OPTIONS);
1362-
assert_eq!(value, Ok(12345.0));
1361+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+_12345.6", &OPTIONS);
1362+
assert_eq!(value, Ok(12345.6));
1363+
1364+
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"1.2", &OPTIONS);
1365+
assert_eq!(value, Ok(1.2));
1366+
1367+
const OPT_PREFIX: u128 = NumberFormatBuilder::new()
1368+
.digit_separator(num::NonZeroU8::new(b'_'))
1369+
.base_prefix(num::NonZeroU8::new(b'd'))
1370+
.internal_digit_separator(true)
1371+
.build_strict();
13631372

1364-
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+12345e_+23", &OPTIONS);
1365-
assert_eq!(value, Err(Error::EmptyExponent(8)));
1373+
let value = f64::from_lexical_with_options::<OPT_PREFIX>(b"1.2", &OPTIONS);
1374+
assert_eq!(value, Ok(1.2));
13661375

1367-
let value = f64::from_lexical_with_options::<NO_PREFIX>(b"+12345e+_23", &OPTIONS);
1368-
assert_eq!(value, Ok(1.2345e27));
1376+
let value = f64::from_lexical_with_options::<OPT_PREFIX>(b"0d1.2", &OPTIONS);
1377+
assert_eq!(value, Ok(1.2));
13691378

13701379
const PREFIX: u128 = NumberFormatBuilder::new()
13711380
.digit_separator(num::NonZeroU8::new(b'_'))
13721381
.base_prefix(num::NonZeroU8::new(b'd'))
1382+
.internal_digit_separator(true)
13731383
.required_base_prefix(true)
1374-
.leading_digit_separator(true)
13751384
.build_strict();
13761385

1377-
let value = f64::from_lexical_with_options::<PREFIX>(b"_+0d12345", &OPTIONS);
1386+
let value = f64::from_lexical_with_options::<PREFIX>(b"_+0d12345.6", &OPTIONS);
1387+
assert_eq!(value, Err(Error::MissingBasePrefix(0)));
1388+
1389+
let value = f64::from_lexical_with_options::<PREFIX>(b"+_0d12345.6", &OPTIONS);
13781390
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
13791391

1380-
let value = f64::from_lexical_with_options::<PREFIX>(b"+_0d12345", &OPTIONS);
1381-
assert_eq!(value, Ok(12345.0));
1392+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0_d12345.6", &OPTIONS);
1393+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
13821394

1383-
// TODO: This fails
1384-
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d_12345", &OPTIONS);
1385-
assert_eq!(value, Ok(12345.0));
1395+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d_12345.6", &OPTIONS);
1396+
assert_eq!(value, Err(Error::InvalidDigit(3)));
1397+
1398+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d12345.6", &OPTIONS);
1399+
assert_eq!(value, Ok(12345.6));
1400+
}
1401+
1402+
#[test]
1403+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1404+
fn base_prefix_l_digit_separator_test() {
1405+
use core::num;
1406+
1407+
const OPTIONS: Options = Options::new();
1408+
const PREFIX: u128 = NumberFormatBuilder::new()
1409+
.digit_separator(num::NonZeroU8::new(b'_'))
1410+
.base_prefix(num::NonZeroU8::new(b'd'))
1411+
.internal_digit_separator(true)
1412+
.required_base_prefix(true)
1413+
.base_prefix_leading_digit_separator(true)
1414+
.build_strict();
1415+
1416+
let value = f64::from_lexical_with_options::<PREFIX>(b"_+0d12345.6", &OPTIONS);
1417+
assert_eq!(value, Err(Error::MissingBasePrefix(0)));
1418+
1419+
let value = f64::from_lexical_with_options::<PREFIX>(b"+_0d12345.6", &OPTIONS);
1420+
assert_eq!(value, Ok(12345.6));
1421+
1422+
let value = f64::from_lexical_with_options::<PREFIX>(b"+__0d12345.6", &OPTIONS);
1423+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1424+
1425+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0_d12345.6", &OPTIONS);
1426+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1427+
1428+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d_12345.6", &OPTIONS);
1429+
assert_eq!(value, Err(Error::InvalidDigit(3)));
1430+
1431+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d12345.6", &OPTIONS);
1432+
assert_eq!(value, Ok(12345.6));
1433+
1434+
const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX)
1435+
.base_prefix_consecutive_digit_separator(true)
1436+
.build_strict();
1437+
1438+
let value = f64::from_lexical_with_options::<CONSECUTIVE>(b"+__0d12345.6", &OPTIONS);
1439+
assert_eq!(value, Ok(12345.6));
1440+
}
1441+
1442+
#[test]
1443+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1444+
fn base_prefix_i_digit_separator_test() {
1445+
use core::num;
1446+
1447+
const OPTIONS: Options = Options::new();
1448+
const PREFIX: u128 = NumberFormatBuilder::new()
1449+
.digit_separator(num::NonZeroU8::new(b'_'))
1450+
.base_prefix(num::NonZeroU8::new(b'd'))
1451+
.internal_digit_separator(true)
1452+
.required_base_prefix(true)
1453+
.base_prefix_internal_digit_separator(true)
1454+
.build_strict();
1455+
1456+
let value = f64::from_lexical_with_options::<PREFIX>(b"_+0d12345.6", &OPTIONS);
1457+
assert_eq!(value, Err(Error::MissingBasePrefix(0)));
1458+
1459+
let value = f64::from_lexical_with_options::<PREFIX>(b"+_0d12345.6", &OPTIONS);
1460+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1461+
1462+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0_d12345.6", &OPTIONS);
1463+
assert_eq!(value, Ok(12345.6));
1464+
1465+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0__d12345.6", &OPTIONS);
1466+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1467+
1468+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d_12345.6", &OPTIONS);
1469+
assert_eq!(value, Err(Error::InvalidDigit(3)));
1470+
1471+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d12345.6", &OPTIONS);
1472+
assert_eq!(value, Ok(12345.6));
1473+
1474+
const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX)
1475+
.base_prefix_consecutive_digit_separator(true)
1476+
.build_strict();
1477+
1478+
let value = f64::from_lexical_with_options::<CONSECUTIVE>(b"+0__d12345.6", &OPTIONS);
1479+
assert_eq!(value, Ok(12345.6));
1480+
}
1481+
1482+
#[test]
1483+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1484+
fn base_prefix_t_digit_separator_test() {
1485+
use core::num;
1486+
1487+
const OPTIONS: Options = Options::new();
1488+
const PREFIX: u128 = NumberFormatBuilder::new()
1489+
.digit_separator(num::NonZeroU8::new(b'_'))
1490+
.base_prefix(num::NonZeroU8::new(b'd'))
1491+
.internal_digit_separator(true)
1492+
.required_base_prefix(true)
1493+
.base_prefix_trailing_digit_separator(true)
1494+
.build_strict();
1495+
1496+
let value = f64::from_lexical_with_options::<PREFIX>(b"_+0d12345.6", &OPTIONS);
1497+
assert_eq!(value, Err(Error::MissingBasePrefix(0)));
1498+
1499+
let value = f64::from_lexical_with_options::<PREFIX>(b"+_0d12345.6", &OPTIONS);
1500+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1501+
1502+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0_d12345.6", &OPTIONS);
1503+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1504+
1505+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d_12345.6", &OPTIONS);
1506+
assert_eq!(value, Ok(12345.6));
1507+
1508+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d__12345.6", &OPTIONS);
1509+
assert_eq!(value, Err(Error::InvalidDigit(4)));
1510+
1511+
let value = f64::from_lexical_with_options::<PREFIX>(b"+0d12345.6", &OPTIONS);
1512+
assert_eq!(value, Ok(12345.6));
1513+
1514+
// special case: overlap with a leading digit separator
1515+
const LEADING: u128 = NumberFormatBuilder::new()
1516+
.digit_separator(num::NonZeroU8::new(b'_'))
1517+
.base_prefix(num::NonZeroU8::new(b'd'))
1518+
.leading_digit_separator(true)
1519+
.required_base_prefix(true)
1520+
.base_prefix_trailing_digit_separator(true)
1521+
.build_strict();
1522+
1523+
let value = f64::from_lexical_with_options::<LEADING>(b"+0d_12345.6", &OPTIONS);
1524+
assert_eq!(value, Ok(12345.6));
1525+
1526+
let value = f64::from_lexical_with_options::<LEADING>(b"+0d__12345.6", &OPTIONS);
1527+
assert_eq!(value, Err(Error::InvalidDigit(3)));
1528+
1529+
const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX)
1530+
.base_prefix_consecutive_digit_separator(true)
1531+
.build_strict();
1532+
1533+
let value = f64::from_lexical_with_options::<CONSECUTIVE>(b"+0d__12345.6", &OPTIONS);
1534+
assert_eq!(value, Ok(12345.6));
1535+
}
1536+
1537+
#[test]
1538+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1539+
fn base_suffix_no_digit_separator_test() {
1540+
use core::num;
1541+
1542+
const OPTIONS: Options = Options::new();
1543+
const NO_SUFFIX: u128 = NumberFormatBuilder::new()
1544+
.digit_separator(num::NonZeroU8::new(b'_'))
1545+
.leading_digit_separator(true)
1546+
.build_strict();
1547+
1548+
let value = f64::from_lexical_with_options::<NO_SUFFIX>(b"_+12345.6", &OPTIONS);
1549+
assert_eq!(value, Err(Error::InvalidDigit(1)));
1550+
1551+
let value = f64::from_lexical_with_options::<NO_SUFFIX>(b"+_12345.6", &OPTIONS);
1552+
assert_eq!(value, Ok(12345.6));
1553+
1554+
let value = f64::from_lexical_with_options::<NO_SUFFIX>(b"1.2", &OPTIONS);
1555+
assert_eq!(value, Ok(1.2));
1556+
1557+
const OPT_SUFFIX: u128 = NumberFormatBuilder::new()
1558+
.digit_separator(num::NonZeroU8::new(b'_'))
1559+
.base_suffix(num::NonZeroU8::new(b'd'))
1560+
.internal_digit_separator(true)
1561+
.build_strict();
1562+
1563+
let value = f64::from_lexical_with_options::<OPT_SUFFIX>(b"1.2", &OPTIONS);
1564+
assert_eq!(value, Ok(1.2));
1565+
1566+
let value = f64::from_lexical_with_options::<OPT_SUFFIX>(b"1.2d", &OPTIONS);
1567+
assert_eq!(value, Ok(1.2));
1568+
1569+
const SUFFIX: u128 = NumberFormatBuilder::new()
1570+
.digit_separator(num::NonZeroU8::new(b'_'))
1571+
.base_suffix(num::NonZeroU8::new(b'd'))
1572+
.internal_digit_separator(true)
1573+
.required_base_suffix(true)
1574+
.build_strict();
1575+
1576+
let value = f64::from_lexical_with_options::<SUFFIX>(b"_+12345.6d", &OPTIONS);
1577+
assert_eq!(value, Err(Error::InvalidDigit(0)));
1578+
1579+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6_d", &OPTIONS);
1580+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1581+
1582+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d", &OPTIONS);
1583+
assert_eq!(value, Ok(12345.6));
1584+
1585+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d_", &OPTIONS);
1586+
assert_eq!(value, Err(Error::InvalidDigit(9)));
1587+
1588+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6", &OPTIONS);
1589+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1590+
}
1591+
1592+
#[test]
1593+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1594+
fn base_suffix_l_digit_separator_test() {
1595+
use core::num;
1596+
1597+
const OPTIONS: Options = Options::new();
1598+
const SUFFIX: u128 = NumberFormatBuilder::new()
1599+
.digit_separator(num::NonZeroU8::new(b'_'))
1600+
.base_suffix(num::NonZeroU8::new(b'd'))
1601+
.internal_digit_separator(true)
1602+
.required_base_suffix(true)
1603+
.base_suffix_leading_digit_separator(true)
1604+
.build_strict();
1605+
1606+
let value = f64::from_lexical_with_options::<SUFFIX>(b"_+12345.6d", &OPTIONS);
1607+
assert_eq!(value, Err(Error::InvalidDigit(0)));
1608+
1609+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6_d", &OPTIONS);
1610+
assert_eq!(value, Ok(12345.6));
1611+
1612+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6__d", &OPTIONS);
1613+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1614+
1615+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d", &OPTIONS);
1616+
assert_eq!(value, Ok(12345.6));
1617+
1618+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d_", &OPTIONS);
1619+
assert_eq!(value, Err(Error::InvalidDigit(9)));
1620+
1621+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6", &OPTIONS);
1622+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1623+
1624+
// special case: overlap with a trailing digit separator
1625+
const TRAILING: u128 = NumberFormatBuilder::new()
1626+
.digit_separator(num::NonZeroU8::new(b'_'))
1627+
.base_suffix(num::NonZeroU8::new(b'd'))
1628+
.trailing_digit_separator(true)
1629+
.required_base_suffix(true)
1630+
.base_suffix_leading_digit_separator(true)
1631+
.build_strict();
1632+
1633+
let value = f64::from_lexical_with_options::<TRAILING>(b"+12345.6_d", &OPTIONS);
1634+
assert_eq!(value, Ok(12345.6));
1635+
1636+
let value = f64::from_lexical_with_options::<TRAILING>(b"+12345.6__d", &OPTIONS);
1637+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1638+
1639+
const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(SUFFIX)
1640+
.base_suffix_consecutive_digit_separator(true)
1641+
.build_strict();
1642+
1643+
let value = f64::from_lexical_with_options::<CONSECUTIVE>(b"+12345.6__d", &OPTIONS);
1644+
assert_eq!(value, Ok(12345.6));
1645+
}
1646+
1647+
#[test]
1648+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1649+
fn base_suffix_t_digit_separator_test() {
1650+
use core::num;
1651+
1652+
const OPTIONS: Options = Options::new();
1653+
const SUFFIX: u128 = NumberFormatBuilder::new()
1654+
.digit_separator(num::NonZeroU8::new(b'_'))
1655+
.base_suffix(num::NonZeroU8::new(b'd'))
1656+
.internal_digit_separator(true)
1657+
.required_base_suffix(true)
1658+
.base_suffix_trailing_digit_separator(true)
1659+
.build_strict();
1660+
1661+
let value = f64::from_lexical_with_options::<SUFFIX>(b"_+12345.6d", &OPTIONS);
1662+
assert_eq!(value, Err(Error::InvalidDigit(0)));
1663+
1664+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6_d", &OPTIONS);
1665+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1666+
1667+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d", &OPTIONS);
1668+
assert_eq!(value, Ok(12345.6));
1669+
1670+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d_", &OPTIONS);
1671+
assert_eq!(value, Ok(12345.6));
1672+
1673+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6", &OPTIONS);
1674+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1675+
1676+
let value = f64::from_lexical_with_options::<SUFFIX>(b"+12345.6d__", &OPTIONS);
1677+
assert_eq!(value, Err(Error::InvalidDigit(10)));
1678+
1679+
const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(SUFFIX)
1680+
.base_suffix_consecutive_digit_separator(true)
1681+
.build_strict();
13861682

1387-
// TODO:> Add suffix
1683+
let value = f64::from_lexical_with_options::<CONSECUTIVE>(b"+12345.6d__", &OPTIONS);
1684+
assert_eq!(value, Ok(12345.6));
13881685
}

0 commit comments

Comments
 (0)