Skip to content

Commit 35a7359

Browse files
committed
Implement TabError detection
1 parent 95bac26 commit 35a7359

File tree

2 files changed

+49
-18
lines changed

2 files changed

+49
-18
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/antlr/DescriptiveBailErrorListener.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol,
9696
entireMessage = "unindent does not match any outer indentation level";
9797
errorType = ErrorType.Indentation;
9898
break;
99+
case Python3Parser.TAB_ERROR:
100+
entireMessage = "inconsistent use of tabs and spaces in indentation";
101+
errorType = ErrorType.Tab;
102+
break;
99103
case Python3Parser.INDENT:
100104
entireMessage = "unexpected indent";
101105
errorType = ErrorType.Indentation;

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/antlr/Python3.g4

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,26 @@ grammar Python3;
3535
// All comments that start with "///" are copy-pasted from
3636
// The Python Language Reference
3737

38-
tokens { INDENT, DEDENT, INDENT_ERROR }
38+
tokens { INDENT, DEDENT, INDENT_ERROR, TAB_ERROR }
3939

4040
@lexer::members {
4141
// new version with semantic actions in parser
4242
43+
private static class Indent {
44+
public final int indent;
45+
public final int altindent;
46+
public static final Indent EMPTY = new Indent(0, 0);
47+
48+
public Indent(int indent, int altindent) {
49+
this.indent = indent;
50+
this.altindent = altindent;
51+
}
52+
}
53+
4354
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
4455
private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
4556
// The stack that keeps track of the indentation level.
46-
private java.util.Stack<Integer> indents = new java.util.Stack<>();
57+
private java.util.Stack<Indent> indents = new java.util.Stack<>();
4758
// The amount of opened braces, brackets and parenthesis.
4859
private int opened = 0;
4960
// The most recently produced token.
@@ -167,7 +178,7 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
167178
return dedent;
168179
}
169180

170-
private Token createIndentError() {
181+
private Token createIndentError(int type) {
171182
// For some reason, CPython sets the error position to the end of line
172183
int cur = getCharIndex();
173184
String s;
@@ -176,7 +187,7 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
176187
cur++;
177188
} while (!s.isEmpty() && s.charAt(0) != '\n');
178189
cur--;
179-
CommonToken error = new CommonToken(this._tokenFactorySourcePair, Python3Parser.INDENT_ERROR, DEFAULT_TOKEN_CHANNEL, cur, cur);
190+
CommonToken error = new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, cur, cur);
180191
error.setLine(this.lastToken.getLine());
181192
return error;
182193
}
@@ -194,9 +205,14 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
194205
// such that the total number of characters up to and including
195206
// the replacement is a multiple of eight [...]"
196207
//
208+
// Altindent is an alternative measure of spaces where tabs are
209+
// counted as one space. The purpose is to validate that the code
210+
// doesn't mix tabs and spaces in inconsistent way.
211+
//
197212
// -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
198-
static int getIndentationCount(String spaces) {
199-
int count = 0;
213+
static Indent getIndentationCount(String spaces) {
214+
int indent = 0;
215+
int altindent = 0;
200216
for (char ch : spaces.toCharArray()) {
201217
switch (ch) {
202218
case '\r':
@@ -205,15 +221,17 @@ tokens { INDENT, DEDENT, INDENT_ERROR }
205221
// ignore
206222
break;
207223
case '\t':
208-
count += 8 - (count % 8);
224+
indent += 8 - (indent % 8);
225+
altindent++;
209226
break;
210227
default:
211228
// A normal space char.
212-
count++;
229+
indent++;
230+
altindent++;
213231
}
214232
}
215233

216-
return count;
234+
return new Indent(indent, altindent);
217235
}
218236

219237
boolean atStartOfInput() {
@@ -1769,31 +1787,40 @@ NEWLINE
17691787
}
17701788
else {
17711789
emit(commonToken(NEWLINE, "\n"));
1772-
int indent;
1790+
Indent indent;
17731791
if (next == EOF) {
17741792
// don't add indents if we're going to finish
1775-
indent = 0;
1793+
indent = Indent.EMPTY;
17761794
} else {
17771795
indent = getIndentationCount(getText());
17781796
}
1779-
int previous = indents.isEmpty() ? 0 : indents.peek();
1780-
if (indent == previous) {
1797+
Indent previous = indents.isEmpty() ? Indent.EMPTY : indents.peek();
1798+
if (indent.indent == previous.indent) {
1799+
if (indent.altindent != previous.altindent) {
1800+
this.emit(createIndentError(Python3Parser.TAB_ERROR));
1801+
}
17811802
// skip indents of the same size as the present indent-size
17821803
skip();
17831804
}
1784-
else if (indent > previous) {
1805+
else if (indent.indent > previous.indent) {
1806+
if (indent.altindent <= previous.altindent) {
1807+
this.emit(createIndentError(Python3Parser.TAB_ERROR));
1808+
}
17851809
indents.push(indent);
17861810
emit(commonToken(Python3Parser.INDENT, getText()));
17871811
}
17881812
else {
17891813
// Possibly emit more than 1 DEDENT token.
1790-
while (!indents.isEmpty() && indents.peek() > indent) {
1814+
while (!indents.isEmpty() && indents.peek().indent > indent.indent) {
17911815
this.emit(createDedent());
17921816
indents.pop();
17931817
}
1794-
int expectedIndent = indents.empty() ? 0 : indents.peek();
1795-
if (expectedIndent != indent) {
1796-
this.emit(createIndentError());
1818+
Indent expectedIndent = indents.isEmpty() ? Indent.EMPTY : indents.peek();
1819+
if (expectedIndent.indent != indent.indent) {
1820+
this.emit(createIndentError(Python3Parser.INDENT_ERROR));
1821+
}
1822+
if (expectedIndent.altindent != indent.altindent) {
1823+
this.emit(createIndentError(Python3Parser.TAB_ERROR));
17971824
}
17981825
}
17991826
}

0 commit comments

Comments
 (0)