@@ -35,15 +35,26 @@ grammar Python3;
35
35
// All comments that start with "///" are copy-pasted from
36
36
// The Python Language Reference
37
37
38
- tokens { INDENT, DEDENT, INDENT_ERROR }
38
+ tokens { INDENT, DEDENT, INDENT_ERROR, TAB_ERROR }
39
39
40
40
@lexer::members {
41
41
// new version with semantic actions in parser
42
42
43
+ private static class Indent {
44
+ public final int indent;
45
+ public final int altindent;
46
+ public static final Indent EMPTY = new Indent(0 , 0 );
47
+
48
+ public Indent(int indent, int altindent) {
49
+ this.indent = indent;
50
+ this.altindent = altindent;
51
+ }
52
+ }
53
+
43
54
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
44
55
private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
45
56
// The stack that keeps track of the indentation level.
46
- private java.util.Stack<Integer > indents = new java.util.Stack<>();
57
+ private java.util.Stack<Indent > indents = new java.util.Stack<>();
47
58
// The amount of opened braces, brackets and parenthesis.
48
59
private int opened = 0;
49
60
// The most recently produced token.
@@ -167,7 +178,7 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
167
178
return dedent;
168
179
}
169
180
170
- private Token createIndentError() {
181
+ private Token createIndentError(int type ) {
171
182
// For some reason, CPython sets the error position to the end of line
172
183
int cur = getCharIndex();
173
184
String s;
@@ -176,7 +187,7 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
176
187
cur++;
177
188
} while (!s.isEmpty() && s.charAt(0) != ' \n ' );
178
189
cur--;
179
- CommonToken error = new CommonToken(this._tokenFactorySourcePair, Python3Parser. INDENT_ERROR , DEFAULT_TOKEN_CHANNEL , cur, cur);
190
+ CommonToken error = new CommonToken(this._tokenFactorySourcePair, type , DEFAULT_TOKEN_CHANNEL , cur, cur);
180
191
error.setLine(this.lastToken.getLine());
181
192
return error;
182
193
}
@@ -194,9 +205,14 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
194
205
// such that the total number of characters up to and including
195
206
// the replacement is a multiple of eight [...]"
196
207
//
208
+ // Altindent is an alternative measure of spaces where tabs are
209
+ // counted as one space. The purpose is to validate that the code
210
+ // doesn't mix tabs and spaces in inconsistent way.
211
+ //
197
212
// -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
198
- static int getIndentationCount(String spaces) {
199
- int count = 0 ;
213
+ static Indent getIndentationCount(String spaces) {
214
+ int indent = 0 ;
215
+ int altindent = 0 ;
200
216
for (char ch : spaces.toCharArray()) {
201
217
switch (ch) {
202
218
case ' \r ' :
@@ -205,15 +221,17 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
205
221
// ignore
206
222
break ;
207
223
case ' \t ' :
208
- count += 8 - (count % 8 );
224
+ indent += 8 - (indent % 8 );
225
+ altindent++;
209
226
break ;
210
227
default :
211
228
// A normal space char.
212
- count++;
229
+ indent++;
230
+ altindent++;
213
231
}
214
232
}
215
233
216
- return count ;
234
+ return new Indent(indent, altindent) ;
217
235
}
218
236
219
237
boolean atStartOfInput() {
@@ -1769,31 +1787,40 @@ NEWLINE
1769
1787
}
1770
1788
else {
1771
1789
emit(commonToken(NEWLINE, " \n " ));
1772
- int indent;
1790
+ Indent indent;
1773
1791
if (next == EOF) {
1774
1792
// don't add indents if we're going to finish
1775
- indent = 0 ;
1793
+ indent = Indent.EMPTY ;
1776
1794
} else {
1777
1795
indent = getIndentationCount(getText());
1778
1796
}
1779
- int previous = indents.isEmpty() ? 0 : indents.peek();
1780
- if (indent == previous) {
1797
+ Indent previous = indents.isEmpty() ? Indent.EMPTY : indents.peek();
1798
+ if (indent.indent == previous.indent) {
1799
+ if (indent.altindent != previous.altindent) {
1800
+ this.emit(createIndentError(Python3Parser.TAB_ERROR));
1801
+ }
1781
1802
// skip indents of the same size as the present indent-size
1782
1803
skip();
1783
1804
}
1784
- else if (indent > previous) {
1805
+ else if (indent.indent > previous.indent) {
1806
+ if (indent.altindent <= previous.altindent) {
1807
+ this.emit(createIndentError(Python3Parser.TAB_ERROR));
1808
+ }
1785
1809
indents.push(indent);
1786
1810
emit(commonToken(Python3Parser.INDENT , getText()));
1787
1811
}
1788
1812
else {
1789
1813
// Possibly emit more than 1 DEDENT token.
1790
- while (!indents.isEmpty() && indents.peek() > indent) {
1814
+ while (!indents.isEmpty() && indents.peek().indent > indent. indent) {
1791
1815
this.emit(createDedent());
1792
1816
indents.pop();
1793
1817
}
1794
- int expectedIndent = indents.empty() ? 0 : indents.peek();
1795
- if (expectedIndent != indent) {
1796
- this.emit(createIndentError());
1818
+ Indent expectedIndent = indents.isEmpty() ? Indent.EMPTY : indents.peek();
1819
+ if (expectedIndent.indent != indent.indent) {
1820
+ this.emit(createIndentError(Python3Parser.INDENT_ERROR));
1821
+ }
1822
+ if (expectedIndent.altindent != indent.altindent) {
1823
+ this.emit(createIndentError(Python3Parser.TAB_ERROR));
1797
1824
}
1798
1825
}
1799
1826
}
0 commit comments