@@ -2,10 +2,16 @@ use crate::db::Db as TemplateDb;
22use crate :: tokens:: Token ;
33use crate :: tokens:: TokenContent ;
44
5+ const BLOCK_TAG_START : & str = "{%" ;
6+ const BLOCK_TAG_END : & str = "%}" ;
7+ const VARIABLE_TAG_START : & str = "{{" ;
8+ const VARIABLE_TAG_END : & str = "}}" ;
9+ const COMMENT_TAG_START : & str = "{#" ;
10+ const COMMENT_TAG_END : & str = "#}" ;
11+
512pub struct Lexer < ' db > {
613 db : & ' db dyn TemplateDb ,
714 source : String ,
8- chars : Vec < char > ,
915 start : usize ,
1016 current : usize ,
1117 line : usize ,
@@ -17,7 +23,6 @@ impl<'db> Lexer<'db> {
1723 Lexer {
1824 db,
1925 source : String :: from ( source) ,
20- chars : source. chars ( ) . collect ( ) ,
2126 start : 0 ,
2227 current : 0 ,
2328 line : 1 ,
@@ -32,22 +37,14 @@ impl<'db> Lexer<'db> {
3237
3338 let token = match self . peek ( ) {
3439 '{' => match self . peek_next ( ) {
35- '%' => self . lex_django_construct ( "%}" , |content, line, start| Token :: Block {
36- content,
37- line,
38- start,
40+ '%' => self . lex_django_construct ( BLOCK_TAG_END , |content, offset| {
41+ Token :: Block { content, offset }
3942 } ) ,
40- '{' => {
41- self . lex_django_construct ( "}}" , |content, line, start| Token :: Variable {
42- content,
43- line,
44- start,
45- } )
46- }
47- '#' => self . lex_django_construct ( "#}" , |content, line, start| Token :: Comment {
48- content,
49- line,
50- start,
43+ '{' => self . lex_django_construct ( VARIABLE_TAG_END , |content, offset| {
44+ Token :: Variable { content, offset }
45+ } ) ,
46+ '#' => self . lex_django_construct ( COMMENT_TAG_END , |content, offset| {
47+ Token :: Comment { content, offset }
5148 } ) ,
5249 _ => self . lex_text ( ) ,
5350 } ,
@@ -69,49 +66,43 @@ impl<'db> Lexer<'db> {
6966 tokens. push ( token) ;
7067 }
7168
72- tokens. push ( Token :: Eof { line : self . line } ) ;
69+ tokens. push ( Token :: Eof ) ;
7370
7471 tokens
7572 }
7673
7774 fn lex_django_construct (
7875 & mut self ,
7976 end : & str ,
80- token_fn : impl FnOnce ( TokenContent < ' db > , usize , usize ) -> Token < ' db > ,
77+ token_fn : impl FnOnce ( TokenContent < ' db > , usize ) -> Token < ' db > ,
8178 ) -> Token < ' db > {
82- let line = self . line ;
83- let start = self . start + 3 ;
79+ let offset = self . start + 3 ;
8480
8581 self . consume_n ( 2 ) ;
8682
8783 match self . consume_until ( end) {
8884 Ok ( text) => {
8985 self . consume_n ( 2 ) ;
9086 let content = TokenContent :: new ( self . db , text) ;
91- token_fn ( content, line , start )
87+ token_fn ( content, offset )
9288 }
9389 Err ( err_text) => {
9490 self . synchronize ( ) ;
9591 let content = TokenContent :: new ( self . db , err_text) ;
96- Token :: Error {
97- content,
98- line,
99- start,
100- }
92+ Token :: Error { content, offset }
10193 }
10294 }
10395 }
10496
10597 fn lex_whitespace ( & mut self , c : char ) -> Token < ' db > {
106- let line = self . line ;
107- let start = self . start ;
98+ let offset = self . start ;
10899
109100 if c == '\n' || c == '\r' {
110101 self . consume ( ) ; // \r or \n
111102 if c == '\r' && self . peek ( ) == '\n' {
112103 self . consume ( ) ; // \n of \r\n
113104 }
114- Token :: Newline { line , start }
105+ Token :: Newline { offset }
115106 } else {
116107 self . consume ( ) ; // Consume the first whitespace
117108 while !self . is_at_end ( ) && self . peek ( ) . is_whitespace ( ) {
@@ -121,67 +112,64 @@ impl<'db> Lexer<'db> {
121112 self . consume ( ) ;
122113 }
123114 let count = self . current - self . start ;
124- Token :: Whitespace { count, line , start }
115+ Token :: Whitespace { count, offset }
125116 }
126117 }
127118
128119 fn lex_text ( & mut self ) -> Token < ' db > {
129- let line = self . line ;
130- let start = self . start ;
120+ let text_start = self . current ;
131121
132- let mut text = String :: new ( ) ;
133122 while !self . is_at_end ( ) {
134- let c = self . peek ( ) ;
135-
136- if c == '{' {
137- let next = self . peek_next ( ) ;
138- if next == '%' || next == '{' || next == '#' {
139- break ;
140- }
141- } else if c == '\n' {
123+ if self . source [ self . current ..] . starts_with ( BLOCK_TAG_START )
124+ || self . source [ self . current ..] . starts_with ( VARIABLE_TAG_START )
125+ || self . source [ self . current ..] . starts_with ( COMMENT_TAG_START )
126+ || self . source [ self . current ..] . starts_with ( '\n' )
127+ {
142128 break ;
143129 }
144-
145- text. push ( c) ;
146130 self . consume ( ) ;
147131 }
148132
149- let content = TokenContent :: new ( self . db , text) ;
133+ let text = & self . source [ text_start..self . current ] ;
134+ let content = TokenContent :: new ( self . db , text. to_string ( ) ) ;
150135 Token :: Text {
151136 content,
152- line,
153- start,
137+ offset : self . start ,
154138 }
155139 }
156140
141+ #[ inline]
157142 fn peek ( & self ) -> char {
158- self . peek_at ( 0 )
143+ self . source [ self . current .. ] . chars ( ) . next ( ) . unwrap_or ( '\0' )
159144 }
160145
161146 fn peek_next ( & self ) -> char {
162- self . peek_at ( 1 )
147+ let mut chars = self . source [ self . current ..] . chars ( ) ;
148+ chars. next ( ) ; // Skip current
149+ chars. next ( ) . unwrap_or ( '\0' )
163150 }
164151
165152 fn peek_previous ( & self ) -> char {
166- self . peek_at ( -1 )
167- }
168-
169- fn peek_at ( & self , offset : isize ) -> char {
170- let Some ( index) = self . current . checked_add_signed ( offset) else {
153+ if self . current == 0 {
171154 return '\0' ;
172- } ;
173- self . chars . get ( index) . copied ( ) . unwrap_or ( '\0' )
155+ }
156+ let mut pos = self . current - 1 ;
157+ while !self . source . is_char_boundary ( pos) && pos > 0 {
158+ pos -= 1 ;
159+ }
160+ self . source [ pos..] . chars ( ) . next ( ) . unwrap_or ( '\0' )
174161 }
175162
163+ #[ inline]
176164 fn is_at_end ( & self ) -> bool {
177165 self . current >= self . source . len ( )
178166 }
179167
168+ #[ inline]
180169 fn consume ( & mut self ) {
181- if self . is_at_end ( ) {
182- return ;
170+ if let Some ( ch ) = self . source [ self . current .. ] . chars ( ) . next ( ) {
171+ self . current += ch . len_utf8 ( ) ;
183172 }
184- self . current += 1 ;
185173 }
186174
187175 fn consume_n ( & mut self , count : usize ) {
@@ -190,25 +178,24 @@ impl<'db> Lexer<'db> {
190178 }
191179 }
192180
193- fn consume_until ( & mut self , s : & str ) -> Result < String , String > {
194- let start = self . current ;
195- while !self . is_at_end ( ) {
196- if self . chars [ self . current ..self . chars . len ( ) ]
197- . starts_with ( s. chars ( ) . collect :: < Vec < _ > > ( ) . as_slice ( ) )
198- {
199- return Ok ( self . source [ start..self . current ] . trim ( ) . to_string ( ) ) ;
181+ fn consume_until ( & mut self , delimiter : & str ) -> Result < String , String > {
182+ let offset = self . current ;
183+
184+ while self . current < self . source . len ( ) {
185+ if self . source [ self . current ..] . starts_with ( delimiter) {
186+ return Ok ( self . source [ offset..self . current ] . trim ( ) . to_string ( ) ) ;
200187 }
201188 self . consume ( ) ;
202189 }
203- Err ( self . source [ start..self . current ] . trim ( ) . to_string ( ) )
190+
191+ Err ( self . source [ offset..self . current ] . trim ( ) . to_string ( ) )
204192 }
205193
206194 fn synchronize ( & mut self ) {
207- let sync_chars = & [ '{' , '\n' , '\r' ] ;
195+ const SYNC_POINTS : & [ u8 ] = b"{ \n \r " ;
208196
209197 while !self . is_at_end ( ) {
210- let current_char = self . peek ( ) ;
211- if sync_chars. contains ( & current_char) {
198+ if SYNC_POINTS . contains ( & self . source . as_bytes ( ) [ self . current ] ) {
212199 return ;
213200 }
214201 self . consume ( ) ;
0 commit comments