Skip to content

Commit 3944116

Browse files
committed
[GR-23226] Add IndentationError and TabError to the parser
PullRequest: graalpython/1000
2 parents d55a433 + a798ae4 commit 3944116

File tree

15 files changed

+1722
-1465
lines changed

15 files changed

+1722
-1465
lines changed

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/parser/BasicTests.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ public void moduleWithLincense() throws Exception {
8484
"print(\"module A\")");
8585
}
8686

87+
@Test
88+
public void leadingIndent1() throws Exception {
89+
checkSyntaxErrorMessage(" 1", "IndentationError: unexpected indent");
90+
}
91+
92+
@Test
93+
public void leadingIndent2() throws Exception {
94+
checkTreeResult(" # foo\npass");
95+
}
96+
8797
@Test
8898
public void annAssign01() throws Exception {
8999
checkTreeResult("a: int = 1");

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/parser/SSTNodeWithScopeFinderTest.java

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,17 @@ public void tryTest() throws Exception {
8282

8383
@Test
8484
public void ifTest() throws Exception {
85-
String code = " if True:\n" +
86-
" a = 1\n" +
87-
" def fn1():\n" +
88-
" return 10\n" +
89-
" elif False:\n" +
90-
" b = 2\n" +
91-
" def fn2():\n" +
92-
" return 20\n" +
93-
" else:\n" +
94-
" def fn3():\n" +
95-
" return 30\n";
85+
String code = "if True:\n" +
86+
" a = 1\n" +
87+
" def fn1():\n" +
88+
" return 10\n" +
89+
"elif False:\n" +
90+
" b = 2\n" +
91+
" def fn2():\n" +
92+
" return 20\n" +
93+
"else:\n" +
94+
" def fn3():\n" +
95+
" return 30\n";
9696
checkFinder(code, code.indexOf("def fn1"), code.indexOf("elif"), false);
9797
checkFinder(code, code.indexOf("def fn2"), code.indexOf("else"), false);
9898
checkFinder(code, code.indexOf("def fn3"), code.indexOf("30") + 3, false);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ModuleRootNode Name: <module 'leadingIndent2'> SourceSection: [0,11]` # foo↵pass`
2+
Signature: varArgs=False, varKeywordArgs=False, noArguments=True, positionalOnly=True, requiresKeywordArgs=False
3+
FreeVars: None
4+
NeedsCellFrame: False
5+
FrameDescriptor: Empty
6+
Documentation: None
7+
InnerRootNode SourceSection: [0,11]` # foo↵pass`
8+
EmptyNode SourceSection: [6,11]`↵pass`

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/Python3Core.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
*/
2626
package com.oracle.graal.python.builtins;
2727

28+
import static com.oracle.graal.python.builtins.PythonBuiltinClassType.IndentationError;
29+
import static com.oracle.graal.python.builtins.PythonBuiltinClassType.TabError;
2830
import static com.oracle.graal.python.nodes.SpecialAttributeNames.__PACKAGE__;
2931
import static com.oracle.graal.python.runtime.exception.PythonErrorType.SyntaxError;
3032

@@ -168,6 +170,7 @@
168170
import com.oracle.graal.python.builtins.objects.thread.ThreadBuiltins;
169171
import com.oracle.graal.python.builtins.objects.traceback.TracebackBuiltins;
170172
import com.oracle.graal.python.builtins.objects.tuple.TupleBuiltins;
173+
import com.oracle.graal.python.builtins.objects.type.LazyPythonClass;
171174
import com.oracle.graal.python.builtins.objects.type.PythonBuiltinClass;
172175
import com.oracle.graal.python.builtins.objects.type.TypeBuiltins;
173176
import com.oracle.graal.python.builtins.objects.type.TypeNodes.GetNameNode;
@@ -725,22 +728,34 @@ public PFloat getNaN() {
725728
}
726729

727730
@Override
728-
public RuntimeException raiseInvalidSyntax(Source source, SourceSection section, String message, Object... arguments) {
731+
public RuntimeException raiseInvalidSyntax(PythonParser.ErrorType type, Source source, SourceSection section, String message, Object... arguments) {
729732
CompilerDirectives.transferToInterpreter();
730733
Node location = new Node() {
731734
@Override
732735
public SourceSection getSourceSection() {
733736
return section;
734737
}
735738
};
736-
throw raiseInvalidSyntax(location, message, arguments);
739+
throw raiseInvalidSyntax(type, location, message, arguments);
737740
}
738741

739742
@Override
740743
@TruffleBoundary
741-
public RuntimeException raiseInvalidSyntax(Node location, String message, Object... arguments) {
744+
public RuntimeException raiseInvalidSyntax(PythonParser.ErrorType type, Node location, String message, Object... arguments) {
742745
PBaseException instance;
743-
instance = factory().createBaseException(SyntaxError, message, arguments);
746+
LazyPythonClass cls;
747+
switch (type) {
748+
case Indentation:
749+
cls = IndentationError;
750+
break;
751+
case Tab:
752+
cls = TabError;
753+
break;
754+
default:
755+
cls = SyntaxError;
756+
break;
757+
}
758+
instance = factory().createBaseException(cls, message, arguments);
744759
SourceSection section = location.getSourceSection();
745760
Source source = section.getSource();
746761
String path = source.getPath();

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ public PTuple doObject(VirtualFrame frame, Object a, Object b,
533533
public abstract static class EvalNode extends PythonBuiltinNode {
534534
protected final String funcname = "eval";
535535
private final BranchProfile hasFreeVarsBranch = BranchProfile.create();
536-
@Child protected CompileNode compileNode = CompileNode.create(false);
536+
@Child protected CompileNode compileNode;
537537
@Child private GenericInvokeNode invokeNode = GenericInvokeNode.create();
538538
@Child private HasInheritedAttributeNode hasGetItemNode;
539539

@@ -571,7 +571,7 @@ protected boolean isAnyNone(Object object) {
571571
}
572572

573573
protected PCode createAndCheckCode(VirtualFrame frame, Object source) {
574-
PCode code = compileNode.execute(frame, source, "<string>", getMode(), 0, false, -1);
574+
PCode code = getCompileNode().execute(frame, source, "<string>", getMode(), 0, false, -1);
575575
assertNoFreeVars(code);
576576
return code;
577577
}
@@ -680,6 +680,18 @@ PNone badGlobals(@SuppressWarnings("unused") Object source, Object globals, @Sup
680680
PNone badLocals(@SuppressWarnings("unused") Object source, @SuppressWarnings("unused") PDict globals, Object locals) {
681681
throw raise(TypeError, ErrorMessages.LOCALS_MUST_BE_MAPPING, funcname, locals);
682682
}
683+
684+
private CompileNode getCompileNode() {
685+
if (compileNode == null) {
686+
CompilerDirectives.transferToInterpreterAndInvalidate();
687+
compileNode = insert(CompileNode.create(false, shouldStripLeadingWhitespace()));
688+
}
689+
return compileNode;
690+
}
691+
692+
protected boolean shouldStripLeadingWhitespace() {
693+
return true;
694+
}
683695
}
684696

685697
@Builtin(name = EXEC, minNumOfPositionalArgs = 1, parameterNames = {"source", "globals", "locals"})
@@ -697,6 +709,11 @@ public final Object execute(VirtualFrame frame) {
697709
executeInternal(frame);
698710
return PNone.NONE;
699711
}
712+
713+
@Override
714+
protected boolean shouldStripLeadingWhitespace() {
715+
return false;
716+
}
700717
}
701718

702719
// compile(source, filename, mode, flags=0, dont_inherit=False, optimize=-1)
@@ -709,13 +726,16 @@ public abstract static class CompileNode extends PythonBuiltinNode {
709726
* Truffle tooling
710727
*/
711728
private final boolean mayBeFromFile;
729+
private final boolean lstrip;
712730

713-
public CompileNode(boolean mayBeFromFile) {
731+
public CompileNode(boolean mayBeFromFile, boolean lstrip) {
714732
this.mayBeFromFile = mayBeFromFile;
733+
this.lstrip = lstrip;
715734
}
716735

717736
public CompileNode() {
718737
this.mayBeFromFile = true;
738+
this.lstrip = false;
719739
}
720740

721741
public abstract PCode execute(VirtualFrame frame, Object source, String filename, String mode, Object kwFlags, Object kwDontInherit, Object kwOptimize);
@@ -761,6 +781,9 @@ PCode compile(String expression, String filename, String mode, Object kwFlags, O
761781
} else {
762782
throw raise(ValueError, ErrorMessages.COMPILE_MUST_BE);
763783
}
784+
if (lstrip) {
785+
code = code.replaceFirst("^[ \t]", "");
786+
}
764787
final String codeToCompile = code;
765788
Supplier<CallTarget> createCode = () -> {
766789
Source source = PythonLanguage.newSource(context, codeToCompile, filename, mayBeFromFile);
@@ -788,7 +811,11 @@ private static String createString(byte[] bytes, Charset charset) {
788811
}
789812

790813
public static CompileNode create(boolean mapFilenameToUri) {
791-
return BuiltinFunctionsFactory.CompileNodeFactory.create(mapFilenameToUri, new ReadArgumentNode[]{});
814+
return BuiltinFunctionsFactory.CompileNodeFactory.create(mapFilenameToUri, false, new ReadArgumentNode[]{});
815+
}
816+
817+
public static CompileNode create(boolean mapFilenameToUri, boolean lstrip) {
818+
return BuiltinFunctionsFactory.CompileNodeFactory.create(mapFilenameToUri, lstrip, new ReadArgumentNode[]{});
792819
}
793820
}
794821

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonErrorStrategy.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -49,6 +49,8 @@
4949
import org.antlr.v4.runtime.TokenStream;
5050
import org.antlr.v4.runtime.misc.Interval;
5151

52+
import com.oracle.graal.python.parser.antlr.DescriptiveBailErrorListener;
53+
import com.oracle.graal.python.runtime.PythonParser.ErrorType;
5254
import com.oracle.truffle.api.source.Source;
5355
import com.oracle.truffle.api.source.SourceSection;
5456

@@ -76,6 +78,13 @@ static SourceSection getPosition(Source source, Exception e) {
7678
return source.createSection(token.getStartIndex(), Math.max(0, token.getStopIndex() - token.getStartIndex()));
7779
}
7880

81+
static ErrorType getErrorType(Exception e) {
82+
if (e instanceof DescriptiveBailErrorListener.EmptyRecognitionException) {
83+
return ((DescriptiveBailErrorListener.EmptyRecognitionException) e).getErrorType();
84+
}
85+
return ErrorType.Generic;
86+
}
87+
7988
private static String getTokeLineText(Parser recognizer, Token token) {
8089
TokenStream tokenStream = recognizer.getTokenStream();
8190
int index = token.getTokenIndex();

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonParserImpl.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import java.io.DataInputStream;
3131
import java.io.DataOutputStream;
3232
import java.io.IOException;
33+
import java.util.regex.Matcher;
34+
import java.util.regex.Pattern;
3335

3436
import org.antlr.v4.runtime.CharStreams;
3537
import org.antlr.v4.runtime.CommonTokenStream;
@@ -76,6 +78,8 @@ public final class PythonParserImpl implements PythonParser, PythonCodeSerialize
7678
private long numberOfFiles = 0;
7779
private static final boolean IN_IMAGE_BUILD_TIME = ImageInfo.inImageBuildtimeCode();
7880

81+
private static final Pattern START_INDENT_REGEX = Pattern.compile("^([ \t]+)[^#\r\n\f]");
82+
7983
public static final DescriptiveBailErrorListener ERROR_LISTENER = new DescriptiveBailErrorListener();
8084

8185
public PythonParserImpl(Env env) {
@@ -274,6 +278,12 @@ private CacheItem parseWithANTLR(ParserMode mode, ParserErrorCallback errors, Py
274278
throw errors.raiseInvalidSyntax(source, source.createUnavailableSection(), "encoding problem: %s", e.getEncodingName());
275279
}
276280
}
281+
// We need to reject inputs starting with indent, but doing it in ANTLR is expensive, so we
282+
// do it here manually
283+
Matcher matcher = START_INDENT_REGEX.matcher(sourceText);
284+
if (matcher.find()) {
285+
throw errors.raiseInvalidSyntax(ErrorType.Indentation, source, source.createSection(0, matcher.end(1)), "unexpected indent");
286+
}
277287
// ANTLR parsing
278288
Python3Parser parser = getPython3Parser(source, sourceText, errors);
279289
parser.setFactory(sstFactory);
@@ -370,6 +380,7 @@ private static PException handleParserError(ParserErrorCallback errors, Source s
370380
SourceSection section = PythonErrorStrategy.getPosition(source, e);
371381
// from parser we are getting RuntimeExceptions
372382
String message = e instanceof RuntimeException && e.getMessage() != null ? e.getMessage() : "invalid syntax";
373-
throw errors.raiseInvalidSyntax(source, section, message);
383+
ErrorType errorType = PythonErrorStrategy.getErrorType(e);
384+
throw errors.raiseInvalidSyntax(errorType, source, section, message);
374385
}
375386
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/antlr/DescriptiveBailErrorListener.java

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import org.antlr.v4.runtime.misc.IntervalSet;
5151

5252
import com.oracle.graal.python.parser.antlr.Python3Parser.Single_inputContext;
53+
import com.oracle.graal.python.runtime.PythonParser.ErrorType;
5354
import com.oracle.graal.python.runtime.PythonParser.PIncompleteSourceException;
5455
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
5556

@@ -67,25 +68,53 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol,
6768

6869
String entireMessage = e == null || e.getMessage() == null ? "invalid syntax" : e.getMessage();
6970

71+
IntervalSet expectedTokens = null;
7072
if (e != null) {
71-
PIncompleteSourceException handleRecognitionException = handleRecognitionException(e.getExpectedTokens(), entireMessage, e, line);
72-
if (handleRecognitionException != null) {
73-
throw handleRecognitionException;
74-
}
73+
expectedTokens = e.getExpectedTokens();
7574
} else if (recognizer instanceof Python3Parser) {
76-
PIncompleteSourceException handleRecognitionException = handleRecognitionException(((Python3Parser) recognizer).getExpectedTokens(), entireMessage, null, line);
77-
if (handleRecognitionException != null) {
78-
throw handleRecognitionException;
79-
}
75+
expectedTokens = ((Python3Parser) recognizer).getExpectedTokens();
8076
}
77+
8178
if (isInteractive(recognizer)) {
82-
PIncompleteSourceException handleRecognitionException = handleInteractiveException(recognizer, offendingSymbol);
83-
if (handleRecognitionException != null) {
84-
throw handleRecognitionException;
79+
PIncompleteSourceException incompleteSourceException = null;
80+
if (expectedTokens != null) {
81+
incompleteSourceException = handleRecognitionException(expectedTokens, entireMessage, e, line);
82+
}
83+
if (incompleteSourceException == null) {
84+
incompleteSourceException = handleInteractiveException(recognizer, offendingSymbol);
85+
}
86+
if (incompleteSourceException != null) {
87+
throw incompleteSourceException;
8588
}
8689
}
90+
8791
if (offendingSymbol instanceof Token) {
88-
throw new RuntimeException(entireMessage, new EmptyRecognitionException(entireMessage, recognizer, (Token) offendingSymbol));
92+
Token token = (Token) offendingSymbol;
93+
ErrorType errorType = ErrorType.Generic;
94+
switch (token.getType()) {
95+
case Python3Parser.INDENT_ERROR:
96+
entireMessage = "unindent does not match any outer indentation level";
97+
errorType = ErrorType.Indentation;
98+
break;
99+
case Python3Parser.TAB_ERROR:
100+
entireMessage = "inconsistent use of tabs and spaces in indentation";
101+
errorType = ErrorType.Tab;
102+
break;
103+
case Python3Parser.INDENT:
104+
entireMessage = "unexpected indent";
105+
errorType = ErrorType.Indentation;
106+
break;
107+
case Python3Parser.DEDENT:
108+
entireMessage = "unexpected unindent";
109+
errorType = ErrorType.Indentation;
110+
break;
111+
default:
112+
if (expectedTokens != null && expectedTokens.contains(Python3Parser.INDENT)) {
113+
entireMessage = "expected an indented block";
114+
errorType = ErrorType.Indentation;
115+
}
116+
}
117+
throw new EmptyRecognitionException(errorType, entireMessage, recognizer, token);
89118
}
90119
throw new RuntimeException(entireMessage, e);
91120
}
@@ -143,18 +172,24 @@ private static boolean isBackslash(Object offendingSymbol) {
143172
return false;
144173
}
145174

146-
private static class EmptyRecognitionException extends RecognitionException {
175+
public static class EmptyRecognitionException extends RecognitionException {
147176
private static final long serialVersionUID = 1L;
148177
private Token offendingToken;
178+
private ErrorType errorType;
149179

150-
public EmptyRecognitionException(String message, Recognizer<?, ?> recognizer, Token offendingToken) {
180+
public EmptyRecognitionException(ErrorType errorType, String message, Recognizer<?, ?> recognizer, Token offendingToken) {
151181
super(message, recognizer, offendingToken.getInputStream(), null);
182+
this.errorType = errorType;
152183
this.offendingToken = offendingToken;
153184
}
154185

155186
@Override
156187
public Token getOffendingToken() {
157188
return offendingToken;
158189
}
190+
191+
public ErrorType getErrorType() {
192+
return errorType;
193+
}
159194
}
160195
}

0 commit comments

Comments
 (0)