Skip to content

Commit f1e7e55

Browse files
committed
Improve parser errors for one of multiple ascii characters
1 parent a1c0cb2 commit f1e7e55

File tree

5 files changed

+116
-40
lines changed

5 files changed

+116
-40
lines changed

crates/utils/src/ascii.rs

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
//! ASCII helpers.
2+
3+
use crate::bit::BitIterator;
4+
use std::fmt::{Debug, Display, Formatter};
5+
6+
/// A set of ASCII characters.
7+
///
8+
/// # Examples
9+
/// ```
10+
/// # use utils::ascii::AsciiSet;
11+
/// let set1 = AsciiSet::new((1 << b'A') | (1 << b'B') | (1 << b'C'));
12+
/// assert_eq!(set1.len(), 3);
13+
/// assert_eq!(set1.to_string(), "'A', 'B', 'C'");
14+
/// assert_eq!(format!("{set1:?}"), "{'A', 'B', 'C'}");
15+
///
16+
/// let mut array = [false; 128];
17+
/// array[b'A' as usize] = true;
18+
/// array[b'B' as usize] = true;
19+
/// array[b'C' as usize] = true;
20+
/// assert_eq!(AsciiSet::from(array), set1);
21+
///
22+
/// assert_eq!(AsciiSet::from(|b| (b'A'..=b'C').contains(&b)), set1);
23+
/// ```
24+
#[derive(Copy, Clone, Eq, PartialEq, Default)]
25+
#[repr(transparent)]
26+
#[must_use]
27+
pub struct AsciiSet {
28+
set: u128,
29+
}
30+
31+
impl AsciiSet {
32+
/// Creates a new `AsciiSet` from the specified bitset.
33+
pub const fn new(set: u128) -> Self {
34+
Self { set }
35+
}
36+
37+
#[must_use]
38+
pub const fn is_empty(&self) -> bool {
39+
self.set == 0
40+
}
41+
42+
#[must_use]
43+
pub const fn len(&self) -> usize {
44+
self.set.count_ones() as usize
45+
}
46+
}
47+
48+
impl Display for AsciiSet {
49+
#[expect(clippy::cast_possible_truncation)]
50+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51+
if self.set == 0 {
52+
return write!(f, "(empty)");
53+
}
54+
55+
for (i, (c, _)) in BitIterator::ones(self.set).enumerate() {
56+
let c = c as u8 as char;
57+
if i == 0 {
58+
write!(f, "{c:?}")?;
59+
} else {
60+
write!(f, ", {c:?}")?;
61+
}
62+
}
63+
Ok(())
64+
}
65+
}
66+
67+
impl Debug for AsciiSet {
68+
#[expect(clippy::cast_possible_truncation)]
69+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
70+
f.debug_set()
71+
.entries(BitIterator::ones(self.set).map(|(c, _)| c as u8 as char))
72+
.finish()
73+
}
74+
}
75+
76+
impl From<u128> for AsciiSet {
77+
fn from(set: u128) -> Self {
78+
Self { set }
79+
}
80+
}
81+
82+
impl From<[bool; 128]> for AsciiSet {
83+
fn from(value: [bool; 128]) -> Self {
84+
Self {
85+
set: value
86+
.iter()
87+
.enumerate()
88+
.fold(0, |s, (i, &b)| s | u128::from(b) << i),
89+
}
90+
}
91+
}
92+
93+
impl<F: Fn(u8) -> bool> From<F> for AsciiSet {
94+
fn from(value: F) -> Self {
95+
Self {
96+
set: (0u8..=127).fold(0, |s, i| s | u128::from(value(i)) << i),
97+
}
98+
}
99+
}

crates/utils/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#![cfg_attr(not(feature = "unsafe"), forbid(unsafe_code))]
33

44
pub mod array;
5+
pub mod ascii;
56
pub mod bit;
67
pub mod date;
78
mod framework;

crates/utils/src/parser/error.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::ascii::AsciiSet;
12
use crate::parser::then::Then2;
23
use crate::parser::{ParseResult, Parser};
34
use std::error::Error;
@@ -15,6 +16,8 @@ pub enum ParseError {
1516
ExpectedByte(u8),
1617
/// Expected $min - $max.
1718
ExpectedByteRange(u8, u8),
19+
/// Expected one of $set.
20+
ExpectedOneOf(AsciiSet),
1821
/// Expected at least $n matches.
1922
ExpectedMatches(usize),
2023
/// Expected $n items or less.
@@ -68,6 +71,7 @@ impl Display for ParseError {
6871
max.escape_ascii().to_string(),
6972
)
7073
}
74+
ParseError::ExpectedOneOf(set) => write!(f, "expected one of {set}"),
7175
ParseError::ExpectedEof() => write!(f, "expected end of input"),
7276
ParseError::ExpectedMatches(x) => write!(f, "expected at least {x} match"),
7377
ParseError::ExpectedLessItems(x) => write!(f, "expected {x} items or less"),

crates/utils/src/parser/macros.rs

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,37 +24,6 @@ macro_rules! parser_byte_map {
2424
(
2525
$($($l:literal)|+ => $e:expr),+$(,)?
2626
) => {{
27-
// `let _: u8 = $l` ensures $l is used in the repetition and also ensures all the literals
28-
// are byte literals
29-
const COUNT: usize = 0usize $($(+ {let _: u8 = $l; 1usize})+)+;
30-
const LEN: usize = 14 + 5 * COUNT;
31-
const {
32-
assert!(COUNT >= 2, "at least two literals must be provided");
33-
}
34-
35-
// Once concat_bytes! is stabilized this error message can be created in the macro similar
36-
// to parser_literal_map!
37-
const ERROR: [u8; LEN] = {
38-
let mut result = [0u8; LEN];
39-
let (prefix, vals) = result.split_at_mut(16);
40-
prefix.copy_from_slice(b"expected one of ");
41-
42-
let mut i = 0;
43-
let literals = [$($($l),+),+];
44-
while i < COUNT {
45-
vals[i * 5] = b'\'';
46-
vals[i * 5 + 1] = literals[i];
47-
vals[i * 5 + 2] = b'\'';
48-
if i + 1 < COUNT {
49-
vals[i * 5 + 3] = b',';
50-
vals[i * 5 + 4] = b' ';
51-
}
52-
i += 1;
53-
}
54-
55-
result
56-
};
57-
5827
$crate::parser::byte_lut(&const {
5928
// Don't use a const item for the lut to avoid naming the value type
6029
let mut lut = [None; 256];
@@ -63,11 +32,14 @@ macro_rules! parser_byte_map {
6332
lut[$l as usize] = Some($e);
6433
)+)+
6534
lut
66-
}, const {
67-
match str::from_utf8(&ERROR) {
68-
Ok(v) => v,
69-
Err(_) => panic!("one or more of the provided literals is invalid unicode"),
70-
}
35+
}, {
36+
let mut set = 0u128;
37+
$($(
38+
let v: u8 = $l;
39+
assert!(v < 128, "invalid ASCII");
40+
set |= 1u128 << v;
41+
)+)+
42+
$crate::parser::ParseError::ExpectedOneOf($crate::ascii::AsciiSet::new(set))
7143
})
7244
}};
7345
}

crates/utils/src/parser/simple.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pub fn byte() -> Byte {
4242
#[derive(Copy, Clone)]
4343
pub struct ByteLut<'a, O> {
4444
lut: &'a [Option<O>; 256],
45-
error: &'static str,
45+
error: ParseError,
4646
}
4747
impl<'i, O: Copy> Parser<'i> for ByteLut<'_, O> {
4848
type Output = O;
@@ -55,7 +55,7 @@ impl<'i, O: Copy> Parser<'i> for ByteLut<'_, O> {
5555
{
5656
Ok((output, remaining))
5757
} else {
58-
Err((ParseError::Custom(self.error), input))
58+
Err((self.error, input))
5959
}
6060
}
6161
}
@@ -78,14 +78,14 @@ impl<'i, O: Copy> Parser<'i> for ByteLut<'_, O> {
7878
/// x
7979
/// };
8080
///
81-
/// let parser = parser::byte_lut(&LOOKUP, "expected '#' or '.'");
81+
/// let parser = parser::byte_lut(&LOOKUP, ParseError::Custom("expected '#' or '.'"));
8282
/// assert_eq!(parser.parse(b"#..##"), Ok((true, &b"..##"[..])));
8383
/// assert_eq!(parser.parse(b"..##"), Ok((false, &b".##"[..])));
8484
/// assert_eq!(parser.parse(b"abc"), Err((ParseError::Custom("expected '#' or '.'"), &b"abc"[..])));
8585
/// ```
8686
#[inline]
8787
#[must_use]
88-
pub fn byte_lut<'a, T: Copy>(lut: &'a [Option<T>; 256], error: &'static str) -> ByteLut<'a, T> {
88+
pub fn byte_lut<T: Copy>(lut: &'_ [Option<T>; 256], error: ParseError) -> ByteLut<'_, T> {
8989
ByteLut { lut, error }
9090
}
9191

0 commit comments

Comments
 (0)