@@ -85,6 +85,7 @@ const specialIdentifiers = new Map([
8585 [ "else" , TokenType . ELSE ] ,
8686 [ "in" , TokenType . IN ] ,
8787] ) ;
88+
8889export const SPECIAL_IDENTIFIER_TOKENS = Array . from ( specialIdentifiers . values ( ) ) ;
8990
9091export class Tokenizer {
@@ -94,8 +95,7 @@ export class Tokenizer {
9495 private current : number ;
9596 private line : number ;
9697 private col : number ;
97- private prevLineLeadingWhiteSpace : number ;
98- private currLineLeadingWhiteSpace : number ;
98+ private readonly indentStack : number [ ] ;
9999 private specialIdentifiers : Map < string , TokenType > ;
100100 private forbiddenIdentifiers : Map < string , TokenType > ;
101101 // forbiddenOperators: Set<TokenType>;
@@ -106,8 +106,7 @@ export class Tokenizer {
106106 this . current = 0 ;
107107 this . line = 0 ;
108108 this . col = 0 ;
109- this . prevLineLeadingWhiteSpace = 0 ;
110- this . currLineLeadingWhiteSpace = 0 ;
109+ this . indentStack = [ 0 ] ;
111110 this . specialIdentifiers = specialIdentifiers ;
112111 // Not used by us, but should be kept reserved as per Python spec
113112 this . forbiddenIdentifiers = new Map ( [
@@ -254,7 +253,7 @@ export class Tokenizer {
254253 //// SPECIAL MARKERS
255254 // Comment -- advance to end of line.
256255 case '#' :
257- while ( this . peek ( ) != '\n' && ! this . isAtEnd ( ) ) {
256+ while ( ( this . peek ( ) != '\n' || this . peek ( ) != '\r' ) && ! this . isAtEnd ( ) ) {
258257 this . advance ( ) ;
259258 }
260259 break ;
@@ -275,40 +274,62 @@ export class Tokenizer {
275274 this . addToken ( TokenType . NEWLINE ) ;
276275 this . line += 1 ;
277276 this . col = 0 ;
278- // @TODO fix me
279- // // Avoid lines that are completely empty.
280- // if (this.peek() === '\n' || this.peek() === '\r') {
281- // this.advance();
282- // if (this.peek() === '\n') {
283- // this.advance();
284- // }
285- // this.addToken(TokenType.NEWLINE);
286- // break;
287- // }
288- this . prevLineLeadingWhiteSpace = this . currLineLeadingWhiteSpace ;
289- this . currLineLeadingWhiteSpace = 0 ;
277+ let accLeadingWhiteSpace = 0 ;
290278 // Detect significant whitespace
291279 while ( this . peek ( ) === " " && ! this . isAtEnd ( ) ) {
292- this . currLineLeadingWhiteSpace += 1 ;
280+ accLeadingWhiteSpace += 1 ;
293281 // Consume the rest of the line's leading whitespace.
294282 this . advance ( ) ;
295283 }
296- if ( this . currLineLeadingWhiteSpace > this . prevLineLeadingWhiteSpace ) {
297- if ( this . currLineLeadingWhiteSpace % 4 !== 0 ) {
298- throw new TokenizerErrors . NonFourIndentError ( this . line , this . col , this . source , this . current ) ;
284+ // The following block handles things like
285+ /*
286+ def foo():
287+ pass
288+ <---- this newline should be zapped
289+ pass <---- this should be part of the block
290+ */
291+ while ( ( this . peek ( ) === "\n" || this . peek ( ) === "\r" ) && ! this . isAtEnd ( ) ) {
292+ // Handle \r\n on Windows
293+ if ( this . peek ( ) === "\r" ) {
294+ this . advance ( ) ;
295+ if ( this . peek ( ) === "\n" ) {
296+ this . advance ( ) ;
297+ }
298+ } else {
299+ this . advance ( ) ;
300+ }
301+ this . line += 1 ;
302+ this . col = 0 ;
303+ accLeadingWhiteSpace = 0 ;
304+ // Detect significant whitespace
305+ while ( this . peek ( ) === " " && ! this . isAtEnd ( ) ) {
306+ accLeadingWhiteSpace += 1 ;
307+ // Consume the rest of the line's leading whitespace.
308+ this . advance ( ) ;
299309 }
300- const indents = Math . floor ( ( this . currLineLeadingWhiteSpace - this . prevLineLeadingWhiteSpace ) / 4 ) ;
310+ }
311+ if ( accLeadingWhiteSpace % 4 !== 0 ) {
312+ throw new TokenizerErrors . NonFourIndentError ( this . line , this . col , this . source , this . current ) ;
313+ }
314+ const tos = this . indentStack [ this . indentStack . length - 1 ] ;
315+ if ( accLeadingWhiteSpace > tos ) {
316+ this . indentStack . push ( accLeadingWhiteSpace ) ;
317+ const indents = Math . floor ( ( accLeadingWhiteSpace - tos ) / 4 ) ;
301318 for ( let i = 0 ; i < indents ; ++ i ) {
302319 this . addToken ( TokenType . INDENT ) ;
303320 }
304- break ;
305- }
306- if ( this . currLineLeadingWhiteSpace < this . prevLineLeadingWhiteSpace ) {
307- const indents = Math . floor ( ( this . prevLineLeadingWhiteSpace - this . currLineLeadingWhiteSpace ) / 4 ) ;
321+ } else if ( accLeadingWhiteSpace < tos ) {
322+ if ( this . indentStack . length == 0 ) {
323+ throw new TokenizerErrors . InconsistentIndentError ( this . line , this . col , this . source , this . current ) ;
324+ }
325+ const prev = this . indentStack . pop ( ) ;
326+ if ( prev === undefined || prev === null ) {
327+ throw new TokenizerErrors . InconsistentIndentError ( this . line , this . col , this . source , this . current ) ;
328+ }
329+ const indents = Math . floor ( ( prev - accLeadingWhiteSpace ) / 4 ) ;
308330 for ( let i = 0 ; i < indents ; ++ i ) {
309331 this . addToken ( TokenType . DEDENT ) ;
310332 }
311- break ;
312333 }
313334 break ;
314335 // String
@@ -420,6 +441,11 @@ export class Tokenizer {
420441 this . start = this . current ;
421442 this . scanToken ( ) ;
422443 }
444+ // Unravel the indent stack
445+ while ( this . indentStack [ this . indentStack . length - 1 ] !== 0 ) {
446+ this . indentStack . pop ( ) ;
447+ this . addToken ( TokenType . DEDENT ) ;
448+ }
423449 this . tokens . push ( new Token ( TokenType . ENDMARKER , "" , this . line , this . col , this . current ) ) ;
424450 return this . tokens
425451 }
0 commit comments