@@ -5,63 +5,6 @@ use thiserror::Error;
55use crate :: nodelist:: Node ;
66use crate :: tokens:: Token ;
77
8- /// Tokenize tag content respecting quoted strings.
9- ///
10- /// Splits on whitespace but keeps quoted strings as single tokens.
11- /// Handles both single and double quotes, and escaped quotes within strings.
12- fn tokenize_tag_content ( content : & str ) -> Vec < String > {
13- let mut tokens = Vec :: new ( ) ;
14- let mut current = String :: new ( ) ;
15- let mut in_quote: Option < char > = None ;
16- let chars = content. chars ( ) . peekable ( ) ;
17- let mut escape_next = false ;
18-
19- for ch in chars {
20- if escape_next {
21- // Add escaped character literally
22- current. push ( ch) ;
23- escape_next = false ;
24- continue ;
25- }
26-
27- match ( ch, in_quote) {
28- // Backslash - escape next character
29- ( '\\' , Some ( _) ) => {
30- current. push ( ch) ;
31- escape_next = true ;
32- }
33- // Start quote (double or single)
34- ( '"' | '\'' , None ) => {
35- in_quote = Some ( ch) ;
36- current. push ( ch) ;
37- }
38- // End quote (matching)
39- ( '"' , Some ( '"' ) ) | ( '\'' , Some ( '\'' ) ) => {
40- current. push ( ch) ;
41- in_quote = None ;
42- }
43- // Outside quote - whitespace = delimiter
44- ( c, None ) if c. is_whitespace ( ) => {
45- if !current. is_empty ( ) {
46- tokens. push ( current) ;
47- current = String :: new ( ) ;
48- }
49- }
50- // Inside quote or outside quote - regular char
51- ( _, Some ( _) | None ) => {
52- current. push ( ch) ;
53- }
54- }
55- }
56-
57- // Push remaining token
58- if !current. is_empty ( ) {
59- tokens. push ( current) ;
60- }
61-
62- tokens
63- }
64-
658pub struct Parser {
669 tokens : Vec < Token > ,
6710 current : usize ,
@@ -142,17 +85,50 @@ impl Parser {
14285 } ) ;
14386 } ;
14487
145- let tokens = tokenize_tag_content ( content_ref) ;
146-
147- let mut iter = tokens. into_iter ( ) ;
148- let name = iter. next ( ) . ok_or ( ParseError :: EmptyTag ) ?;
149- let bits: Vec < String > = iter. collect ( ) ;
150-
88+ let ( name, bits) = Self :: parse_tag_args ( content_ref) ?;
15189 let span = token. content_span_or_fallback ( ) ;
15290
15391 Ok ( Node :: Tag { name, bits, span } )
15492 }
15593
94+ fn parse_tag_args ( content : & str ) -> Result < ( String , Vec < String > ) , ParseError > {
95+ let estimated_args = ( content. len ( ) / 8 ) . clamp ( 2 , 8 ) ;
96+ let mut tokens = Vec :: with_capacity ( estimated_args) ;
97+ let mut current = String :: new ( ) ;
98+ let mut in_quote: Option < char > = None ;
99+ let mut chars = content. chars ( ) ;
100+ while let Some ( ch) = chars. next ( ) {
101+ match ( ch, in_quote) {
102+ ( '\\' , Some ( _) ) => {
103+ current. push ( ch) ;
104+ if let Some ( next) = chars. next ( ) {
105+ current. push ( next) ;
106+ }
107+ }
108+ ( '"' | '\'' , None ) => {
109+ in_quote = Some ( ch) ;
110+ current. push ( ch) ;
111+ }
112+ ( '"' , Some ( '"' ) ) | ( '\'' , Some ( '\'' ) ) => {
113+ current. push ( ch) ;
114+ in_quote = None ;
115+ }
116+ ( c, None ) if c. is_whitespace ( ) => {
117+ if !current. is_empty ( ) {
118+ tokens. push ( std:: mem:: take ( & mut current) ) ;
119+ }
120+ }
121+ _ => current. push ( ch) ,
122+ }
123+ }
124+ if !current. is_empty ( ) {
125+ tokens. push ( current) ;
126+ }
127+ let mut iter = tokens. into_iter ( ) ;
128+ let name = iter. next ( ) . ok_or ( ParseError :: EmptyTag ) ?;
129+ Ok ( ( name, iter. collect ( ) ) )
130+ }
131+
156132 fn parse_comment ( & mut self ) -> Result < Node , ParseError > {
157133 let token = self . peek_previous ( ) ?;
158134
@@ -754,81 +730,4 @@ mod tests {
754730 insta:: assert_yaml_snapshot!( test_nodelist) ;
755731 }
756732 }
757-
758- mod tokenization {
759- use super :: * ;
760-
761- #[ test]
762- fn test_tokenize_simple ( ) {
763- let content = "if condition" ;
764- let tokens = tokenize_tag_content ( content) ;
765- assert_eq ! ( tokens, vec![ "if" , "condition" ] ) ;
766- }
767-
768- #[ test]
769- fn test_tokenize_double_quoted_string ( ) {
770- let content = "translate \" Contact the owner of the site\" " ;
771- let tokens = tokenize_tag_content ( content) ;
772- assert_eq ! (
773- tokens,
774- vec![ "translate" , "\" Contact the owner of the site\" " ]
775- ) ;
776- }
777-
778- #[ test]
779- fn test_tokenize_single_quoted_string ( ) {
780- let content = "url 'view_name' arg1 arg2" ;
781- let tokens = tokenize_tag_content ( content) ;
782- assert_eq ! ( tokens, vec![ "url" , "'view_name'" , "arg1" , "arg2" ] ) ;
783- }
784-
785- #[ test]
786- fn test_tokenize_escaped_quotes ( ) {
787- let content = r#"trans "Say \"hello\"""# ;
788- let tokens = tokenize_tag_content ( content) ;
789- assert_eq ! ( tokens, vec![ "trans" , r#""Say \"hello\"""# ] ) ;
790- }
791-
792- #[ test]
793- fn test_tokenize_mixed_quotes ( ) {
794- let content = r#"trans 'He said "hello"'"# ;
795- let tokens = tokenize_tag_content ( content) ;
796- assert_eq ! ( tokens, vec![ "trans" , r#"'He said "hello"'"# ] ) ;
797- }
798-
799- #[ test]
800- fn test_tokenize_expression_with_operators ( ) {
801- let content = "if message.input_tokens > 0" ;
802- let tokens = tokenize_tag_content ( content) ;
803- assert_eq ! ( tokens, vec![ "if" , "message.input_tokens" , ">" , "0" ] ) ;
804- }
805-
806- #[ test]
807- fn test_tokenize_for_loop ( ) {
808- let content = "for item in items reversed" ;
809- let tokens = tokenize_tag_content ( content) ;
810- assert_eq ! ( tokens, vec![ "for" , "item" , "in" , "items" , "reversed" ] ) ;
811- }
812-
813- #[ test]
814- fn test_tokenize_empty_string ( ) {
815- let content = r#"if value == """# ;
816- let tokens = tokenize_tag_content ( content) ;
817- assert_eq ! ( tokens, vec![ "if" , "value" , "==" , r#""""# ] ) ;
818- }
819-
820- #[ test]
821- fn test_tokenize_with_template_variable ( ) {
822- let content = r#"trans "Hello {{ user }}""# ;
823- let tokens = tokenize_tag_content ( content) ;
824- assert_eq ! ( tokens, vec![ "trans" , r#""Hello {{ user }}""# ] ) ;
825- }
826-
827- #[ test]
828- fn test_tokenize_assignment ( ) {
829- let content = "with total=value|length" ;
830- let tokens = tokenize_tag_content ( content) ;
831- assert_eq ! ( tokens, vec![ "with" , "total=value|length" ] ) ;
832- }
833- }
834733}
0 commit comments