11use std:: { fmt, str} ;
22
3- use nom:: {
4- IResult ,
5- branch:: alt,
6- bytes:: complete:: { escaped, tag, take_while1} ,
7- character:: complete:: { char, digit1, multispace0, one_of} ,
8- combinator:: { map, map_res} ,
9- sequence:: { delimited, preceded, terminated} ,
10- } ;
3+ use chumsky:: prelude:: * ;
114use pilota:: FastStr ;
5+ use pilota_thrift_parser:: descriptor:: Components ;
126use thiserror:: Error ;
137
148#[ derive( Debug , Clone , Error ) ]
159pub enum PathError {
1610 #[ error( "syntax error at position {position}" ) ]
1711 SyntaxError { position : usize } ,
18- #[ error( "invalid character '{character}' at position {position}" ) ]
19- InvalidCharacter { position : usize , character : char } ,
20- #[ error( "unterminated string at position {start_position}" ) ]
21- UnterminatedString { start_position : usize } ,
22- #[ error( "invalid escape sequence '{sequence}' at position {position}" ) ]
23- InvalidEscape { position : usize , sequence : FastStr } ,
24- #[ error( "invalid number '{value}' at position {position}" ) ]
25- InvalidNumber { position : usize , value : FastStr } ,
2612 #[ error( "unexpected EOF" ) ]
2713 UnexpectedEof ,
2814 #[ error( "path cannot be empty" ) ]
@@ -107,88 +93,107 @@ impl fmt::Display for PathToken {
10793pub struct PathParser ;
10894
10995impl PathParser {
110- fn parse_root ( input : & str ) -> IResult < & str , TokenData > {
111- map ( tag ( "$" ) , |_| TokenData :: Root ) ( input )
96+ fn parse_root < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra :: Err < Rich < ' a , char > > > {
97+ just ( "$" ) . map ( |_| TokenData :: Root )
11298 }
11399
114- fn parse_field ( input : & str ) -> IResult < & str , TokenData > {
115- map ( tag ( "." ) , |_| TokenData :: Field ) ( input )
100+ fn parse_field < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra :: Err < Rich < ' a , char > > > {
101+ just ( "." ) . map ( |_| TokenData :: Field )
116102 }
117103
118- fn parse_index_left ( input : & str ) -> IResult < & str , TokenData > {
119- map ( terminated ( tag ( "[" ) , multispace0) , |_| TokenData :: IndexL ) ( input)
104+ fn parse_index_left < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
105+ just ( "[" )
106+ . then_ignore ( Components :: blank ( ) )
107+ . map ( |_| TokenData :: IndexL )
120108 }
121109
122- fn parse_index_right ( input : & str ) -> IResult < & str , TokenData > {
123- map ( preceded ( multispace0, tag ( "]" ) ) , |_| TokenData :: IndexR ) ( input)
110+ fn parse_index_right < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
111+ Components :: blank ( )
112+ . ignore_then ( just ( "]" ) )
113+ . map ( |_| TokenData :: IndexR )
124114 }
125-
126- fn parse_map_left ( input : & str ) -> IResult < & str , TokenData > {
127- map ( terminated ( tag ( "{" ) , multispace0) , |_| TokenData :: MapL ) ( input)
115+ fn parse_map_left < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
116+ just ( "{" )
117+ . then_ignore ( Components :: blank ( ) )
118+ . map ( |_| TokenData :: MapL )
128119 }
129120
130- fn parse_map_right ( input : & str ) -> IResult < & str , TokenData > {
131- map ( preceded ( multispace0, tag ( "}" ) ) , |_| TokenData :: MapR ) ( input)
121+ fn parse_map_right < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
122+ Components :: blank ( )
123+ . ignore_then ( just ( "}" ) )
124+ . map ( |_| TokenData :: MapR )
132125 }
133126
134- fn parse_elem ( input : & str ) -> IResult < & str , TokenData > {
135- map ( delimited ( multispace0, tag ( "," ) , multispace0) , |_| {
136- TokenData :: Elem
137- } ) ( input)
127+ fn parse_elem < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
128+ Components :: blank ( )
129+ . ignore_then ( just ( "," ) )
130+ . then_ignore ( Components :: blank ( ) )
131+ . map ( |_| TokenData :: Elem )
138132 }
139133
140- fn parse_any ( input : & str ) -> IResult < & str , TokenData > {
141- map ( tag ( "*" ) , |_| TokenData :: Any ) ( input )
134+ fn parse_any < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra :: Err < Rich < ' a , char > > > {
135+ just ( "*" ) . map ( |_| TokenData :: Any )
142136 }
143137
144- fn parse_quoted_string ( input : & str ) -> IResult < & str , TokenData > {
145- let ( input, content) = delimited (
146- char ( '"' ) ,
147- escaped (
148- take_while1 ( |c : char | c != '"' && c != '\\' ) ,
149- '\\' ,
150- one_of ( "\" ntr\\ " ) ,
151- ) ,
152- char ( '"' ) ,
153- ) ( input) ?;
154-
155- let unescaped = content
156- . replace ( r#"\""# , "\" " )
157- . replace ( r"\n" , "\n " )
158- . replace ( r"\t" , "\t " )
159- . replace ( r"\r" , "\r " )
160- . replace ( r"\\" , "\\ " ) ;
161-
162- Ok ( ( input, TokenData :: Str ( unescaped. into ( ) ) ) )
138+ fn parse_quoted_string < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > >
139+ {
140+ let normal_char = none_of ( "\" \\ " ) . map ( |c : char | c. to_string ( ) ) ;
141+
142+ let escape_seq = just ( '\\' )
143+ . then ( one_of ( "\" ntr\\ " ) )
144+ . map ( |( _, esc) | match esc {
145+ '"' => "\" " . to_string ( ) ,
146+ 'n' => "\n " . to_string ( ) ,
147+ 't' => "\t " . to_string ( ) ,
148+ 'r' => "\r " . to_string ( ) ,
149+ '\\' => "\\ " . to_string ( ) ,
150+ _ => esc. to_string ( ) ,
151+ } ) ;
152+
153+ let content = normal_char
154+ . or ( escape_seq)
155+ . repeated ( )
156+ . collect :: < Vec < String > > ( )
157+ . map ( |frags : Vec < String > | frags. concat ( ) ) ;
158+
159+ content
160+ . delimited_by ( just ( '"' ) , just ( '"' ) )
161+ . map ( |s : String | TokenData :: Str ( FastStr :: new ( s) ) )
163162 }
164163
165- fn parse_integer ( input : & str ) -> IResult < & str , TokenData > {
166- map_res ( digit1, |s : & str | s. parse :: < i32 > ( ) . map ( TokenData :: LitInt ) ) ( input)
164+ fn parse_integer < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
165+ text:: digits ( 10 )
166+ . collect :: < String > ( )
167+ . map ( |s| TokenData :: LitInt ( s. parse :: < i32 > ( ) . unwrap ( ) ) )
167168 }
168169
169- fn parse_identifier ( input : & str ) -> IResult < & str , TokenData > {
170- let ( input, ident) =
171- take_while1 ( |c : char | c. is_alphanumeric ( ) || c == '_' || c == '-' ) ( input) ?;
172- Ok ( ( input, TokenData :: LitStr ( FastStr :: new ( ident) ) ) )
170+ fn parse_identifier < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
171+ any ( )
172+ . filter ( |c : & char | c. is_alphanumeric ( ) || * c == '_' || * c == '-' )
173+ . repeated ( )
174+ . at_least ( 1 )
175+ . collect :: < String > ( )
176+ . map ( |s : String | TokenData :: LitStr ( FastStr :: new ( s) ) )
173177 }
174178
175- fn parse_literal ( input : & str ) -> IResult < & str , TokenData > {
176- alt ( ( Self :: parse_integer, Self :: parse_identifier) ) ( input )
179+ fn parse_literal < ' a > ( ) -> impl Parser < ' a , & ' a str , TokenData , extra :: Err < Rich < ' a , char > > > {
180+ choice ( ( Self :: parse_integer ( ) , Self :: parse_identifier ( ) ) )
177181 }
178182
179- pub fn parse_single_token ( input : & str ) -> IResult < & str , TokenData > {
180- alt ( (
181- Self :: parse_root,
182- Self :: parse_field,
183- Self :: parse_index_left,
184- Self :: parse_index_right,
185- Self :: parse_map_left,
186- Self :: parse_map_right,
187- Self :: parse_elem,
188- Self :: parse_any,
189- Self :: parse_quoted_string,
190- Self :: parse_literal,
191- ) ) ( input)
183+ pub fn parse_single_token < ' a > ( )
184+ -> impl Parser < ' a , & ' a str , TokenData , extra:: Err < Rich < ' a , char > > > {
185+ choice ( (
186+ Self :: parse_root ( ) ,
187+ Self :: parse_field ( ) ,
188+ Self :: parse_index_left ( ) ,
189+ Self :: parse_index_right ( ) ,
190+ Self :: parse_map_left ( ) ,
191+ Self :: parse_map_right ( ) ,
192+ Self :: parse_elem ( ) ,
193+ Self :: parse_any ( ) ,
194+ Self :: parse_quoted_string ( ) ,
195+ Self :: parse_literal ( ) ,
196+ ) )
192197 }
193198}
194199
@@ -204,32 +209,24 @@ impl PathIterator {
204209 return Err ( PathError :: EmptyPath ) ;
205210 }
206211
207- let mut tokens = Vec :: new ( ) ;
208- let mut remaining = src. as_ref ( ) ;
209- let mut position = 0 ;
210-
211- while !remaining. is_empty ( ) {
212- let start_pos = position;
213-
214- match PathParser :: parse_single_token ( remaining) {
215- Ok ( ( rest, token) ) => {
216- let consumed = remaining. len ( ) - rest. len ( ) ;
217- position += consumed;
218- remaining = rest;
219-
220- tokens. push ( PathToken :: new ( token, start_pos, position) ) ;
221- }
222- Err ( nom:: Err :: Error ( e) ) | Err ( nom:: Err :: Failure ( e) ) => {
223- return Err ( Self :: create_parse_error ( & e, src. as_ref ( ) , start_pos) ) ;
224- }
225- Err ( nom:: Err :: Incomplete ( _) ) => {
226- return Err ( PathError :: UnexpectedEof ) ;
227- }
228- }
212+ let ( tokens, errs) = PathParser :: parse_single_token ( )
213+ . map_with ( |token, e| {
214+ let span = e. span ( ) ;
215+ PathToken :: new ( token, span. start , span. end )
216+ } )
217+ . repeated ( )
218+ . collect :: < Vec < PathToken > > ( )
219+ . parse ( src. as_ref ( ) )
220+ . into_output_errors ( ) ;
221+ if !errs. is_empty ( ) {
222+ return Err ( PathError :: ParseError {
223+ position : errs[ 0 ] . span ( ) . start ,
224+ message : errs[ 0 ] . to_string ( ) . into ( ) ,
225+ } ) ;
229226 }
230227
231228 Ok ( Self {
232- tokens,
229+ tokens : tokens . unwrap ( ) ,
233230 position : 0 ,
234231 } )
235232 }
@@ -248,40 +245,6 @@ impl PathIterator {
248245 self . position += 1 ;
249246 token
250247 }
251-
252- fn create_parse_error (
253- nom_error : & nom:: error:: Error < & str > ,
254- original : & str ,
255- position : usize ,
256- ) -> PathError {
257- let remaining = & original[ position..] ;
258- let remaining_chars: Vec < char > = remaining. chars ( ) . take ( 3 ) . collect ( ) ;
259-
260- if remaining. starts_with ( '"' ) && !remaining[ 1 ..] . contains ( '"' ) {
261- PathError :: UnterminatedString {
262- start_position : position,
263- }
264- } else if let Some ( first_char) = remaining_chars. first ( ) {
265- if !first_char. is_ascii_alphanumeric ( )
266- && !matches ! (
267- * first_char,
268- '$' | '.' | '[' | ']' | '{' | '}' | ',' | '*' | '"'
269- )
270- {
271- PathError :: InvalidCharacter {
272- position,
273- character : * first_char,
274- }
275- } else {
276- PathError :: SyntaxError { position }
277- }
278- } else {
279- PathError :: ParseError {
280- position,
281- message : nom_error. to_string ( ) . into ( ) ,
282- }
283- }
284- }
285248}
286249
287250#[ cfg( test) ]
@@ -345,17 +308,11 @@ mod tests {
345308 fn test_error_handling ( ) {
346309 let result = PathIterator :: new ( "$@invalid" ) ;
347310 assert ! ( result. is_err( ) ) ;
348- assert ! ( matches!(
349- result. unwrap_err( ) ,
350- PathError :: InvalidCharacter { character: '@' , .. }
351- ) ) ;
311+ println ! ( "{:?}" , result. unwrap_err( ) ) ;
352312
353313 let result = PathIterator :: new ( "\" unclosed" ) ;
354314 assert ! ( result. is_err( ) ) ;
355- assert ! ( matches!(
356- result. unwrap_err( ) ,
357- PathError :: UnterminatedString { .. }
358- ) ) ;
315+ println ! ( "{:?}" , result. unwrap_err( ) ) ;
359316 }
360317
361318 #[ test]
@@ -441,18 +398,4 @@ mod tests {
441398
442399 assert_eq ! ( tokens, expected) ;
443400 }
444-
445- #[ test]
446- fn test_path_error_display ( ) {
447- let error = PathError :: SyntaxError { position : 5 } ;
448- assert ! ( error. to_string( ) . contains( "syntax error" ) ) ;
449- assert ! ( error. to_string( ) . contains( "at position 5" ) ) ;
450-
451- let error = PathError :: InvalidCharacter {
452- position : 3 ,
453- character : '@' ,
454- } ;
455- assert ! ( error. to_string( ) . contains( "invalid character" ) ) ;
456- assert ! ( error. to_string( ) . contains( "'@'" ) ) ;
457- }
458401}
0 commit comments