@@ -171,10 +171,21 @@ public Vocabulary getVocabulary() {
171
171
172
172
// new version with semantic actions in parser
173
173
174
+ private static class Indent {
175
+ public final int indent ;
176
+ public final int altindent ;
177
+ public static final Indent EMPTY = new Indent (0 , 0 );
178
+
179
+ public Indent (int indent , int altindent ) {
180
+ this .indent = indent ;
181
+ this .altindent = altindent ;
182
+ }
183
+ }
184
+
174
185
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
175
186
private java .util .LinkedList <Token > tokens = new java .util .LinkedList <>();
176
187
// The stack that keeps track of the indentation level.
177
- private java .util .Stack <Integer > indents = new java .util .Stack <>();
188
+ private java .util .Stack <Indent > indents = new java .util .Stack <>();
178
189
// The amount of opened braces, brackets and parenthesis.
179
190
private int opened = 0 ;
180
191
// The most recently produced token.
@@ -298,6 +309,20 @@ private Token createDedent() {
298
309
return dedent ;
299
310
}
300
311
312
+ private Token createIndentError (int type ) {
313
+ // For some reason, CPython sets the error position to the end of line
314
+ int cur = getCharIndex ();
315
+ String s ;
316
+ do {
317
+ s = _input .getText (new Interval (cur , cur ));
318
+ cur ++;
319
+ } while (!s .isEmpty () && s .charAt (0 ) != '\n' );
320
+ cur --;
321
+ CommonToken error = new CommonToken (this ._tokenFactorySourcePair , type , DEFAULT_TOKEN_CHANNEL , cur , cur );
322
+ error .setLine (this .lastToken .getLine ());
323
+ return error ;
324
+ }
325
+
301
326
private CommonToken commonToken (int type , String text ) {
302
327
int stop = Math .max (this .getCharIndex () - 1 , 0 );
303
328
int start = Math .max (text .isEmpty () ? stop : stop - text .length () + 1 , 0 );
@@ -311,9 +336,14 @@ private CommonToken commonToken(int type, String text) {
311
336
// such that the total number of characters up to and including
312
337
// the replacement is a multiple of eight [...]"
313
338
//
339
+ // Altindent is an alternative measure of spaces where tabs are
340
+ // counted as one space. The purpose is to validate that the code
341
+ // doesn't mix tabs and spaces in inconsistent way.
342
+ //
314
343
// -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
315
- static int getIndentationCount (String spaces ) {
316
- int count = 0 ;
344
+ static Indent getIndentationCount (String spaces ) {
345
+ int indent = 0 ;
346
+ int altindent = 0 ;
317
347
for (char ch : spaces .toCharArray ()) {
318
348
switch (ch ) {
319
349
case '\r' :
@@ -322,15 +352,17 @@ static int getIndentationCount(String spaces) {
322
352
// ignore
323
353
break ;
324
354
case '\t' :
325
- count += 8 - (count % 8 );
355
+ indent += 8 - (indent % 8 );
356
+ altindent ++;
326
357
break ;
327
358
default :
328
359
// A normal space char.
329
- count ++;
360
+ indent ++;
361
+ altindent ++;
330
362
}
331
363
}
332
364
333
- return count ;
365
+ return new Indent ( indent , altindent ) ;
334
366
}
335
367
336
368
boolean atStartOfInput () {
@@ -405,28 +437,41 @@ private void NEWLINE_action(RuleContext _localctx, int actionIndex) {
405
437
}
406
438
else {
407
439
emit (commonToken (NEWLINE , "\n " ));
408
- int indent ;
440
+ Indent indent ;
409
441
if (next == EOF ) {
410
442
// don't add indents if we're going to finish
411
- indent = 0 ;
443
+ indent = Indent . EMPTY ;
412
444
} else {
413
445
indent = getIndentationCount (getText ());
414
446
}
415
- int previous = indents .isEmpty () ? 0 : indents .peek ();
416
- if (indent == previous ) {
447
+ Indent previous = indents .isEmpty () ? Indent .EMPTY : indents .peek ();
448
+ if (indent .indent == previous .indent ) {
449
+ if (indent .altindent != previous .altindent ) {
450
+ this .emit (createIndentError (Python3Parser .TAB_ERROR ));
451
+ }
417
452
// skip indents of the same size as the present indent-size
418
453
skip ();
419
454
}
420
- else if (indent > previous ) {
455
+ else if (indent .indent > previous .indent ) {
456
+ if (indent .altindent <= previous .altindent ) {
457
+ this .emit (createIndentError (Python3Parser .TAB_ERROR ));
458
+ }
421
459
indents .push (indent );
422
460
emit (commonToken (Python3Parser .INDENT , "" ));
423
461
}
424
462
else {
425
463
// Possibly emit more than 1 DEDENT token.
426
- while (!indents .isEmpty () && indents .peek () > indent ) {
464
+ while (!indents .isEmpty () && indents .peek (). indent > indent . indent ) {
427
465
this .emit (createDedent ());
428
466
indents .pop ();
429
467
}
468
+ Indent expectedIndent = indents .isEmpty () ? Indent .EMPTY : indents .peek ();
469
+ if (expectedIndent .indent != indent .indent ) {
470
+ this .emit (createIndentError (Python3Parser .INDENT_ERROR ));
471
+ }
472
+ if (expectedIndent .altindent != indent .altindent ) {
473
+ this .emit (createIndentError (Python3Parser .TAB_ERROR ));
474
+ }
430
475
}
431
476
}
432
477
0 commit comments