11use djls_source:: Span ;
2+ use memchr:: memchr3;
3+ use memchr:: memmem;
24
35use crate :: tokens:: TagDelimiter ;
46use crate :: tokens:: Token ;
@@ -68,7 +70,7 @@ impl Lexer {
6870
6971 self . consume_n ( TagDelimiter :: LENGTH ) ;
7072
71- match self . consume_until ( delimiter. closer ( ) ) {
73+ match self . consume_until_delimiter ( delimiter. closer ( ) ) {
7274 Ok ( text) => {
7375 let len = text. len ( ) ;
7476 let span = Span :: saturating_from_parts_usize ( content_start, len) ;
@@ -91,41 +93,38 @@ impl Lexer {
9193 }
9294
9395 fn lex_whitespace ( & mut self , c : char ) -> Token {
96+ self . consume ( ) ;
97+
9498 if c == '\n' || c == '\r' {
95- self . consume ( ) ; // \r or \n
9699 if c == '\r' && self . peek ( ) == '\n' {
97- self . consume ( ) ; // \n of \r\n
98- }
99- let span = Span :: saturating_from_bounds_usize ( self . start , self . current ) ;
100- Token :: Newline { span }
101- } else {
102- self . consume ( ) ; // Consume the first whitespace
103- while !self . is_at_end ( ) && self . peek ( ) . is_whitespace ( ) {
104- if self . peek ( ) == '\n' || self . peek ( ) == '\r' {
105- break ;
106- }
107100 self . consume ( ) ;
108101 }
109102 let span = Span :: saturating_from_bounds_usize ( self . start , self . current ) ;
110- Token :: Whitespace { span }
103+ return Token :: Newline { span } ;
111104 }
112- }
113-
114- fn lex_text ( & mut self ) -> Token {
115- let text_start = self . current ;
116105
117106 while !self . is_at_end ( ) {
118- let remaining = self . remaining_source ( ) ;
119- if ( self . peek ( ) == TagDelimiter :: CHAR_OPEN
120- && TagDelimiter :: from_input ( remaining) . is_some ( ) )
121- || remaining. starts_with ( '\n' )
122- || remaining. starts_with ( '\r' )
123- {
124- break ;
107+ let remaining = self . remaining_source ( ) . as_bytes ( ) ;
108+
109+ match remaining. first ( ) {
110+ Some ( & b'\n' | & b'\r' ) | None => break ,
111+ Some ( & b' ' | & b'\t' ) => self . current += 1 ,
112+ Some ( _) => {
113+ if !self . peek ( ) . is_whitespace ( ) {
114+ break ;
115+ }
116+ self . consume ( ) ;
117+ }
125118 }
126- self . consume ( ) ;
127119 }
128120
121+ let span = Span :: saturating_from_bounds_usize ( self . start , self . current ) ;
122+ Token :: Whitespace { span }
123+ }
124+
125+ fn lex_text ( & mut self ) -> Token {
126+ let text_start = self . current ;
127+ self . current += self . consume_until_stop_char ( ) ;
129128 let text = self . consumed_source_from ( text_start) ;
130129 let span = Span :: saturating_from_bounds_usize ( self . start , self . current ) ;
131130 Token :: Text {
@@ -167,29 +166,45 @@ impl Lexer {
167166 }
168167 }
169168
170- fn consume_until ( & mut self , delimiter : & str ) -> Result < String , String > {
169+ fn consume_until_delimiter ( & mut self , delimiter : & str ) -> Result < String , String > {
171170 let offset = self . current ;
172- let mut fallback: Option < usize > = None ;
173171
174- while self . current < self . source . len ( ) {
175- let remaining = self . remaining_source ( ) ;
172+ if let Some ( pos) = memmem:: find ( self . remaining_source ( ) . as_bytes ( ) , delimiter. as_bytes ( ) ) {
173+ self . current += pos;
174+ return Ok ( self . consumed_source_from ( offset) . to_string ( ) ) ;
175+ }
176176
177- if remaining. starts_with ( delimiter) {
178- return Ok ( self . consumed_source_from ( offset) . to_string ( ) ) ;
179- }
177+ self . current += self . consume_until_stop_char ( ) ;
178+ Err ( self . consumed_source_from ( offset) . to_string ( ) )
179+ }
180+
181+ fn consume_until_stop_char ( & self ) -> usize {
182+ let mut offset = 0 ;
183+ let max = self . source . len ( ) - self . current ;
180184
181- if fallback. is_none ( ) {
182- let ch = self . peek ( ) ;
183- if TagDelimiter :: from_input ( remaining) . is_some ( ) || matches ! ( ch, '\n' | '\r' ) {
184- fallback = Some ( self . current ) ;
185+ while offset < max {
186+ let remaining = & self . remaining_source ( ) [ offset..] ;
187+
188+ match memchr3 ( b'{' , b'\n' , b'\r' , remaining. as_bytes ( ) ) {
189+ None => {
190+ offset = max;
191+ break ;
185192 }
186- }
193+ Some ( pos) => {
194+ let is_newline = matches ! ( remaining. as_bytes( ) [ pos] , b'\n' | b'\r' ) ;
195+ let is_django_delimiter = TagDelimiter :: from_input ( & remaining[ pos..] ) . is_some ( ) ;
187196
188- self . consume ( ) ;
197+ if is_newline || is_django_delimiter {
198+ offset += pos;
199+ break ;
200+ }
201+
202+ offset += pos + 1 ;
203+ }
204+ }
189205 }
190206
191- self . current = fallback. unwrap_or ( self . current ) ;
192- Err ( self . consumed_source_from ( offset) . to_string ( ) )
207+ offset
193208 }
194209}
195210
0 commit comments