Skip to content

Commit 3e5cc97

Browse files
committed
refactor(thrift-fieldmask): refactor path parser with chumsky
1 parent f3a3cf0 commit 3e5cc97

File tree

7 files changed

+100
-178
lines changed

7 files changed

+100
-178
lines changed

Cargo.lock

Lines changed: 0 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ integer-encoding = { version = "4", features = ["tokio", "tokio_async"] }
3434
itertools = "0.14"
3535
lazy_static = "1"
3636
linkedbytes = "0.1"
37-
nom = "7"
3837
normpath = "1"
3938
ordered-float = { version = "5", features = ["serde"] }
4039
paste = "1"

pilota-thrift-fieldmask/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ pilota = { path = "../pilota", version = "0.12" }
2323
pilota-thrift-parser = { path = "../pilota-thrift-parser", version = "0.12" }
2424
pilota-thrift-reflect = { path = "../pilota-thrift-reflect", version = "0.1" }
2525

26-
nom.workspace = true
2726
ahash.workspace = true
2827
faststr.workspace = true
2928
thiserror.workspace = true

pilota-thrift-fieldmask/src/fieldmask.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,9 +1616,9 @@ mod tests {
16161616

16171617
#[test]
16181618
fn test_thiserror_integration() {
1619-
let path_error = PathError::InvalidCharacter {
1619+
let path_error = PathError::ParseError {
16201620
position: 5,
1621-
character: '@',
1621+
message: "invalid character".into(),
16221622
};
16231623
assert!(path_error.to_string().contains("invalid character"));
16241624

pilota-thrift-fieldmask/src/path.rs

Lines changed: 97 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,14 @@
11
use std::{fmt, str};
22

3-
use nom::{
4-
IResult,
5-
branch::alt,
6-
bytes::complete::{escaped, tag, take_while1},
7-
character::complete::{char, digit1, multispace0, one_of},
8-
combinator::{map, map_res},
9-
sequence::{delimited, preceded, terminated},
10-
};
3+
use chumsky::prelude::*;
114
use pilota::FastStr;
5+
use pilota_thrift_parser::descriptor::Components;
126
use thiserror::Error;
137

148
#[derive(Debug, Clone, Error)]
159
pub enum PathError {
1610
#[error("syntax error at position {position}")]
1711
SyntaxError { position: usize },
18-
#[error("invalid character '{character}' at position {position}")]
19-
InvalidCharacter { position: usize, character: char },
20-
#[error("unterminated string at position {start_position}")]
21-
UnterminatedString { start_position: usize },
22-
#[error("invalid escape sequence '{sequence}' at position {position}")]
23-
InvalidEscape { position: usize, sequence: FastStr },
24-
#[error("invalid number '{value}' at position {position}")]
25-
InvalidNumber { position: usize, value: FastStr },
2612
#[error("unexpected EOF")]
2713
UnexpectedEof,
2814
#[error("path cannot be empty")]
@@ -107,88 +93,107 @@ impl fmt::Display for PathToken {
10793
pub struct PathParser;
10894

10995
impl PathParser {
110-
fn parse_root(input: &str) -> IResult<&str, TokenData> {
111-
map(tag("$"), |_| TokenData::Root)(input)
96+
fn parse_root<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
97+
just("$").map(|_| TokenData::Root)
11298
}
11399

114-
fn parse_field(input: &str) -> IResult<&str, TokenData> {
115-
map(tag("."), |_| TokenData::Field)(input)
100+
fn parse_field<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
101+
just(".").map(|_| TokenData::Field)
116102
}
117103

118-
fn parse_index_left(input: &str) -> IResult<&str, TokenData> {
119-
map(terminated(tag("["), multispace0), |_| TokenData::IndexL)(input)
104+
fn parse_index_left<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
105+
just("[")
106+
.then_ignore(Components::blank())
107+
.map(|_| TokenData::IndexL)
120108
}
121109

122-
fn parse_index_right(input: &str) -> IResult<&str, TokenData> {
123-
map(preceded(multispace0, tag("]")), |_| TokenData::IndexR)(input)
110+
fn parse_index_right<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
111+
Components::blank()
112+
.ignore_then(just("]"))
113+
.map(|_| TokenData::IndexR)
124114
}
125-
126-
fn parse_map_left(input: &str) -> IResult<&str, TokenData> {
127-
map(terminated(tag("{"), multispace0), |_| TokenData::MapL)(input)
115+
fn parse_map_left<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
116+
just("{")
117+
.then_ignore(Components::blank())
118+
.map(|_| TokenData::MapL)
128119
}
129120

130-
fn parse_map_right(input: &str) -> IResult<&str, TokenData> {
131-
map(preceded(multispace0, tag("}")), |_| TokenData::MapR)(input)
121+
fn parse_map_right<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
122+
Components::blank()
123+
.ignore_then(just("}"))
124+
.map(|_| TokenData::MapR)
132125
}
133126

134-
fn parse_elem(input: &str) -> IResult<&str, TokenData> {
135-
map(delimited(multispace0, tag(","), multispace0), |_| {
136-
TokenData::Elem
137-
})(input)
127+
fn parse_elem<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
128+
Components::blank()
129+
.ignore_then(just(","))
130+
.then_ignore(Components::blank())
131+
.map(|_| TokenData::Elem)
138132
}
139133

140-
fn parse_any(input: &str) -> IResult<&str, TokenData> {
141-
map(tag("*"), |_| TokenData::Any)(input)
134+
fn parse_any<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
135+
just("*").map(|_| TokenData::Any)
142136
}
143137

144-
fn parse_quoted_string(input: &str) -> IResult<&str, TokenData> {
145-
let (input, content) = delimited(
146-
char('"'),
147-
escaped(
148-
take_while1(|c: char| c != '"' && c != '\\'),
149-
'\\',
150-
one_of("\"ntr\\"),
151-
),
152-
char('"'),
153-
)(input)?;
154-
155-
let unescaped = content
156-
.replace(r#"\""#, "\"")
157-
.replace(r"\n", "\n")
158-
.replace(r"\t", "\t")
159-
.replace(r"\r", "\r")
160-
.replace(r"\\", "\\");
161-
162-
Ok((input, TokenData::Str(unescaped.into())))
138+
fn parse_quoted_string<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>>
139+
{
140+
let normal_char = none_of("\"\\").map(|c: char| c.to_string());
141+
142+
let escape_seq = just('\\')
143+
.then(one_of("\"ntr\\"))
144+
.map(|(_, esc)| match esc {
145+
'"' => "\"".to_string(),
146+
'n' => "\n".to_string(),
147+
't' => "\t".to_string(),
148+
'r' => "\r".to_string(),
149+
'\\' => "\\".to_string(),
150+
_ => esc.to_string(),
151+
});
152+
153+
let content = normal_char
154+
.or(escape_seq)
155+
.repeated()
156+
.collect::<Vec<String>>()
157+
.map(|frags: Vec<String>| frags.concat());
158+
159+
content
160+
.delimited_by(just('"'), just('"'))
161+
.map(|s: String| TokenData::Str(FastStr::new(s)))
163162
}
164163

165-
fn parse_integer(input: &str) -> IResult<&str, TokenData> {
166-
map_res(digit1, |s: &str| s.parse::<i32>().map(TokenData::LitInt))(input)
164+
fn parse_integer<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
165+
text::digits(10)
166+
.collect::<String>()
167+
.map(|s| TokenData::LitInt(s.parse::<i32>().unwrap()))
167168
}
168169

169-
fn parse_identifier(input: &str) -> IResult<&str, TokenData> {
170-
let (input, ident) =
171-
take_while1(|c: char| c.is_alphanumeric() || c == '_' || c == '-')(input)?;
172-
Ok((input, TokenData::LitStr(FastStr::new(ident))))
170+
fn parse_identifier<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
171+
any()
172+
.filter(|c: &char| c.is_alphanumeric() || *c == '_' || *c == '-')
173+
.repeated()
174+
.at_least(1)
175+
.collect::<String>()
176+
.map(|s: String| TokenData::LitStr(FastStr::new(s)))
173177
}
174178

175-
fn parse_literal(input: &str) -> IResult<&str, TokenData> {
176-
alt((Self::parse_integer, Self::parse_identifier))(input)
179+
fn parse_literal<'a>() -> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
180+
choice((Self::parse_integer(), Self::parse_identifier()))
177181
}
178182

179-
pub fn parse_single_token(input: &str) -> IResult<&str, TokenData> {
180-
alt((
181-
Self::parse_root,
182-
Self::parse_field,
183-
Self::parse_index_left,
184-
Self::parse_index_right,
185-
Self::parse_map_left,
186-
Self::parse_map_right,
187-
Self::parse_elem,
188-
Self::parse_any,
189-
Self::parse_quoted_string,
190-
Self::parse_literal,
191-
))(input)
183+
pub fn parse_single_token<'a>()
184+
-> impl Parser<'a, &'a str, TokenData, extra::Err<Rich<'a, char>>> {
185+
choice((
186+
Self::parse_root(),
187+
Self::parse_field(),
188+
Self::parse_index_left(),
189+
Self::parse_index_right(),
190+
Self::parse_map_left(),
191+
Self::parse_map_right(),
192+
Self::parse_elem(),
193+
Self::parse_any(),
194+
Self::parse_quoted_string(),
195+
Self::parse_literal(),
196+
))
192197
}
193198
}
194199

@@ -204,32 +209,24 @@ impl PathIterator {
204209
return Err(PathError::EmptyPath);
205210
}
206211

207-
let mut tokens = Vec::new();
208-
let mut remaining = src.as_ref();
209-
let mut position = 0;
210-
211-
while !remaining.is_empty() {
212-
let start_pos = position;
213-
214-
match PathParser::parse_single_token(remaining) {
215-
Ok((rest, token)) => {
216-
let consumed = remaining.len() - rest.len();
217-
position += consumed;
218-
remaining = rest;
219-
220-
tokens.push(PathToken::new(token, start_pos, position));
221-
}
222-
Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
223-
return Err(Self::create_parse_error(&e, src.as_ref(), start_pos));
224-
}
225-
Err(nom::Err::Incomplete(_)) => {
226-
return Err(PathError::UnexpectedEof);
227-
}
228-
}
212+
let (tokens, errs) = PathParser::parse_single_token()
213+
.map_with(|token, e| {
214+
let span = e.span();
215+
PathToken::new(token, span.start, span.end)
216+
})
217+
.repeated()
218+
.collect::<Vec<PathToken>>()
219+
.parse(src.as_ref())
220+
.into_output_errors();
221+
if !errs.is_empty() {
222+
return Err(PathError::ParseError {
223+
position: errs[0].span().start,
224+
message: errs[0].to_string().into(),
225+
});
229226
}
230227

231228
Ok(Self {
232-
tokens,
229+
tokens: tokens.unwrap(),
233230
position: 0,
234231
})
235232
}
@@ -248,40 +245,6 @@ impl PathIterator {
248245
self.position += 1;
249246
token
250247
}
251-
252-
fn create_parse_error(
253-
nom_error: &nom::error::Error<&str>,
254-
original: &str,
255-
position: usize,
256-
) -> PathError {
257-
let remaining = &original[position..];
258-
let remaining_chars: Vec<char> = remaining.chars().take(3).collect();
259-
260-
if remaining.starts_with('"') && !remaining[1..].contains('"') {
261-
PathError::UnterminatedString {
262-
start_position: position,
263-
}
264-
} else if let Some(first_char) = remaining_chars.first() {
265-
if !first_char.is_ascii_alphanumeric()
266-
&& !matches!(
267-
*first_char,
268-
'$' | '.' | '[' | ']' | '{' | '}' | ',' | '*' | '"'
269-
)
270-
{
271-
PathError::InvalidCharacter {
272-
position,
273-
character: *first_char,
274-
}
275-
} else {
276-
PathError::SyntaxError { position }
277-
}
278-
} else {
279-
PathError::ParseError {
280-
position,
281-
message: nom_error.to_string().into(),
282-
}
283-
}
284-
}
285248
}
286249

287250
#[cfg(test)]
@@ -345,17 +308,11 @@ mod tests {
345308
fn test_error_handling() {
346309
let result = PathIterator::new("$@invalid");
347310
assert!(result.is_err());
348-
assert!(matches!(
349-
result.unwrap_err(),
350-
PathError::InvalidCharacter { character: '@', .. }
351-
));
311+
println!("{:?}", result.unwrap_err());
352312

353313
let result = PathIterator::new("\"unclosed");
354314
assert!(result.is_err());
355-
assert!(matches!(
356-
result.unwrap_err(),
357-
PathError::UnterminatedString { .. }
358-
));
315+
println!("{:?}", result.unwrap_err());
359316
}
360317

361318
#[test]
@@ -441,18 +398,4 @@ mod tests {
441398

442399
assert_eq!(tokens, expected);
443400
}
444-
445-
#[test]
446-
fn test_path_error_display() {
447-
let error = PathError::SyntaxError { position: 5 };
448-
assert!(error.to_string().contains("syntax error"));
449-
assert!(error.to_string().contains("at position 5"));
450-
451-
let error = PathError::InvalidCharacter {
452-
position: 3,
453-
character: '@',
454-
};
455-
assert!(error.to_string().contains("invalid character"));
456-
assert!(error.to_string().contains("'@'"));
457-
}
458401
}

0 commit comments

Comments
 (0)