Skip to content

Commit 7f9cc93

Browse files
committed
Move checking for beginning-of-file indent to a preprocessing step
1 parent 4b41b43 commit 7f9cc93

File tree

5 files changed

+31
-3
lines changed

5 files changed

+31
-3
lines changed

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/parser/BasicTests.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ public void moduleWithLincense() throws Exception {
8484
"print(\"module A\")");
8585
}
8686

87+
@Test
88+
public void leadingIndent1() throws Exception {
89+
checkSyntaxErrorMessage(" 1", "IndentationError: unexpected indent");
90+
}
91+
92+
@Test
93+
public void leadingIndent2() throws Exception {
94+
checkTreeResult(" # foo\npass");
95+
}
96+
8797
@Test
8898
public void annAssign01() throws Exception {
8999
checkTreeResult("a: int = 1");
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ModuleRootNode Name: <module 'leadingIndent2'> SourceSection: [0,11]` # foo↵pass`
2+
Signature: varArgs=False, varKeywordArgs=False, noArguments=True, positionalOnly=True, requiresKeywordArgs=False
3+
FreeVars: None
4+
NeedsCellFrame: False
5+
FrameDescriptor: Empty
6+
Documentation: None
7+
InnerRootNode SourceSection: [0,11]` # foo↵pass`
8+
EmptyNode SourceSection: [6,11]`↵pass`

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonErrorStrategy.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonParserImpl.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import java.io.DataInputStream;
3131
import java.io.DataOutputStream;
3232
import java.io.IOException;
33+
import java.util.regex.Matcher;
34+
import java.util.regex.Pattern;
3335

3436
import org.antlr.v4.runtime.CharStreams;
3537
import org.antlr.v4.runtime.CommonTokenStream;
@@ -76,6 +78,8 @@ public final class PythonParserImpl implements PythonParser, PythonCodeSerialize
7678
private long numberOfFiles = 0;
7779
private static final boolean IN_IMAGE_BUILD_TIME = ImageInfo.inImageBuildtimeCode();
7880

81+
private static final Pattern START_INDENT_REGEX = Pattern.compile("^([ \t]+)[^#\r\n\f]");
82+
7983
public static final DescriptiveBailErrorListener ERROR_LISTENER = new DescriptiveBailErrorListener();
8084

8185
public PythonParserImpl(Env env) {
@@ -274,6 +278,12 @@ private CacheItem parseWithANTLR(ParserMode mode, ParserErrorCallback errors, Py
274278
throw errors.raiseInvalidSyntax(source, source.createUnavailableSection(), "encoding problem: %s", e.getEncodingName());
275279
}
276280
}
281+
// We need to reject inputs starting with indent, but doing it in ANTLR is expensive, so we
282+
// do it here manually
283+
Matcher matcher = START_INDENT_REGEX.matcher(sourceText);
284+
if (matcher.find()) {
285+
throw errors.raiseInvalidSyntax(ErrorType.Indentation, source, source.createSection(0, matcher.end(1)), "unexpected indent");
286+
}
277287
// ANTLR parsing
278288
Python3Parser parser = getPython3Parser(source, sourceText, errors);
279289
parser.setFactory(sstFactory);

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/antlr/Python3.g4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,8 +1774,8 @@ ASYNC : 'async';
17741774
AWAIT : 'await';
17751775

17761776
NEWLINE
1777-
: (
1778-
{atStartOfInput()}? SPACES |
1777+
: ( // For performance reasons, rejecting input starting with indent is handled by a preprocessing step in PythonParserImpl
1778+
// {atStartOfInput()}? SPACES |
17791779
( '\r'? '\n' | '\r' | '\f' ) SPACES?
17801780
)
17811781
{

0 commit comments

Comments
 (0)