Skip to content

Commit 04953a8

Browse files
committed
[GR-29495] Don't use parser for str.isidentifier.
PullRequest: graalpython/1626
2 parents d6b8415 + ca77a59 commit 04953a8

File tree

6 files changed

+79
-64
lines changed

6 files changed

+79
-64
lines changed

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/parser/BasicTests.java

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import org.junit.Assert;
4747
import org.junit.Test;
4848

49-
import com.oracle.graal.python.runtime.PythonCore;
49+
import com.oracle.graal.python.builtins.objects.str.StringUtils;
5050
import com.oracle.graal.python.runtime.PythonParser;
5151
import com.oracle.truffle.api.Truffle;
5252
import com.oracle.truffle.api.frame.Frame;
@@ -925,25 +925,29 @@ public void spaceEnd() throws Exception {
925925

926926
@Test
927927
public void isIdentifier() throws Exception {
928-
PythonCore core = context.getCore();
929-
PythonParser parser = core.getParser();
930-
Assert.assertTrue(parser.isIdentifier(core, "hello"));
931-
Assert.assertTrue(parser.isIdentifier(core, "_"));
932-
Assert.assertTrue(parser.isIdentifier(core, "b0"));
933-
Assert.assertTrue(parser.isIdentifier(core, "bc"));
934-
Assert.assertTrue(parser.isIdentifier(core, "b_"));
935-
Assert.assertTrue(parser.isIdentifier(core, "µ"));
936-
937-
Assert.assertFalse(parser.isIdentifier(core, " hello"));
938-
Assert.assertFalse(parser.isIdentifier(core, "hello "));
939-
Assert.assertFalse(parser.isIdentifier(core, "hel lo"));
940-
Assert.assertFalse(parser.isIdentifier(core, "hel?o"));
941-
Assert.assertFalse(parser.isIdentifier(core, "hel!o"));
942-
943-
Assert.assertFalse(parser.isIdentifier(core, " "));
944-
Assert.assertFalse(parser.isIdentifier(core, "["));
945-
Assert.assertFalse(parser.isIdentifier(core, "©"));
946-
Assert.assertFalse(parser.isIdentifier(core, "0"));
928+
Assert.assertTrue(StringUtils.isIdentifier("hello"));
929+
Assert.assertTrue(StringUtils.isIdentifier("_"));
930+
Assert.assertTrue(StringUtils.isIdentifier("b0"));
931+
Assert.assertTrue(StringUtils.isIdentifier("bc"));
932+
Assert.assertTrue(StringUtils.isIdentifier("b_"));
933+
Assert.assertTrue(StringUtils.isIdentifier("µ"));
934+
935+
Assert.assertTrue(StringUtils.isIdentifier("for"));
936+
Assert.assertTrue(StringUtils.isIdentifier("break"));
937+
Assert.assertTrue(StringUtils.isIdentifier("while"));
938+
Assert.assertTrue(StringUtils.isIdentifier("return"));
939+
Assert.assertTrue(StringUtils.isIdentifier("def"));
940+
941+
Assert.assertFalse(StringUtils.isIdentifier(" hello"));
942+
Assert.assertFalse(StringUtils.isIdentifier("hello "));
943+
Assert.assertFalse(StringUtils.isIdentifier("hel lo"));
944+
Assert.assertFalse(StringUtils.isIdentifier("hel?o"));
945+
Assert.assertFalse(StringUtils.isIdentifier("hel!o"));
946+
947+
Assert.assertFalse(StringUtils.isIdentifier(" "));
948+
Assert.assertFalse(StringUtils.isIdentifier("["));
949+
Assert.assertFalse(StringUtils.isIdentifier("©"));
950+
Assert.assertFalse(StringUtils.isIdentifier("0"));
947951
}
948952

949953
@Test

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/PCode.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
5252
import com.oracle.graal.python.builtins.objects.function.Signature;
5353
import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject;
54+
import com.oracle.graal.python.builtins.objects.str.StringUtils;
5455
import com.oracle.graal.python.builtins.objects.tuple.PTuple;
5556
import com.oracle.graal.python.nodes.ModuleRootNode;
5657
import com.oracle.graal.python.nodes.PClosureFunctionRootNode;
@@ -264,7 +265,7 @@ private static Object[] extractVarnames(RootNode rootNode, String[] parameterIds
264265
if (FrameSlotIDs.RETURN_SLOT_ID.equals(varName) || varName.startsWith(FrameSlotIDs.TEMP_LOCAL_PREFIX)) {
265266
// pass
266267
} else if (!varNameList.contains(varName)) {
267-
if (PythonLanguage.getCore().getParser().isIdentifier(PythonLanguage.getCore(), varName)) {
268+
if (StringUtils.isIdentifier(varName)) {
268269
if (!freeVarsSet.contains(varName) && !cellVarsSet.contains(varName)) {
269270
varNameList.add(varName);
270271
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1950,7 +1950,7 @@ protected String getName() {
19501950
public abstract static class IsIdentifierNode extends PythonUnaryBuiltinNode {
19511951
@Specialization
19521952
boolean doString(String self) {
1953-
return getCore().getParser().isIdentifier(getCore(), self);
1953+
return StringUtils.isIdentifier(self);
19541954
}
19551955

19561956
@Specialization(replaces = "doString")

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,4 +319,56 @@ public static int compareToUnicodeAware(String a, String b) {
319319
}
320320
return len1 - len2;
321321
}
322+
323+
/**
324+
* Python identifiers are defined to start with an XID_Start or '_' character, followed by any
325+
* number of XID_Continue characters. Python keywords are not treated in a special way, so they
326+
* are identifiers as well.
327+
*/
328+
@TruffleBoundary
329+
public static boolean isIdentifier(String value) {
330+
int pos = 0;
331+
if (value.isEmpty()) {
332+
return false;
333+
}
334+
int c = value.codePointAt(pos);
335+
int type = Character.getType(c);
336+
if (c != '_') {
337+
// Unicode XID_Start
338+
switch (type) {
339+
case Character.UPPERCASE_LETTER:
340+
case Character.LOWERCASE_LETTER:
341+
case Character.TITLECASE_LETTER:
342+
case Character.MODIFIER_LETTER:
343+
case Character.OTHER_LETTER:
344+
case Character.LETTER_NUMBER:
345+
break;
346+
default:
347+
return false;
348+
}
349+
}
350+
pos += Character.charCount(c);
351+
while (pos < value.length()) {
352+
c = value.codePointAt(pos);
353+
type = Character.getType(c);
354+
// Unicode XID_Continue
355+
switch (type) {
356+
case Character.UPPERCASE_LETTER:
357+
case Character.LOWERCASE_LETTER:
358+
case Character.TITLECASE_LETTER:
359+
case Character.MODIFIER_LETTER:
360+
case Character.OTHER_LETTER:
361+
case Character.LETTER_NUMBER:
362+
case Character.NON_SPACING_MARK:
363+
case Character.COMBINING_SPACING_MARK:
364+
case Character.DECIMAL_DIGIT_NUMBER:
365+
case Character.CONNECTOR_PUNCTUATION:
366+
break;
367+
default:
368+
return false;
369+
}
370+
pos += Character.charCount(c);
371+
}
372+
return true;
373+
}
322374
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonParserImpl.java

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535

3636
import org.antlr.v4.runtime.CharStreams;
3737
import org.antlr.v4.runtime.CommonTokenStream;
38-
import org.antlr.v4.runtime.Token;
3938
import org.graalvm.nativeimage.ImageInfo;
4039

4140
import com.oracle.graal.python.PythonFileDetector;
@@ -57,7 +56,6 @@
5756
import com.oracle.graal.python.parser.sst.SSTNodeWithScopeFinder;
5857
import com.oracle.graal.python.parser.sst.SSTSerializerVisitor;
5958
import com.oracle.graal.python.parser.sst.SerializationUtils;
60-
import com.oracle.graal.python.parser.sst.StringUtils;
6159
import com.oracle.graal.python.runtime.PythonCodeSerializer;
6260
import com.oracle.graal.python.runtime.PythonCore;
6361
import com.oracle.graal.python.runtime.PythonOptions;
@@ -387,36 +385,6 @@ public Node parseN(ParserMode mode, int optimizeLevel, ParserErrorCallback error
387385
}
388386
}
389387

390-
@Override
391-
@TruffleBoundary
392-
public boolean isIdentifier(PythonCore core, String snippet) {
393-
if (snippet.length() != snippet.trim().length()) {
394-
// identifier cannot start or end with any whitspace
395-
return false;
396-
}
397-
Python3Lexer lexer = new Python3Lexer(CharStreams.fromString(snippet));
398-
Token t = lexer.nextToken();
399-
if (t.getType() == Python3Lexer.NAME) {
400-
// the first token is identifier
401-
t = lexer.nextToken();
402-
if (t.getType() == Python3Lexer.NEWLINE) {
403-
// lexer alwayes add new line at the end
404-
t = lexer.nextToken();
405-
if (t.getType() == Python3Lexer.EOF) {
406-
// now we are sure that this is identifer
407-
return true;
408-
}
409-
}
410-
}
411-
return false;
412-
}
413-
414-
@Override
415-
@TruffleBoundary
416-
public String unescapeJavaString(PythonCore core, String str) {
417-
return StringUtils.unescapeJavaString(core, str);
418-
}
419-
420388
private static PException handleParserError(ParserErrorCallback errors, Source source, Exception e) {
421389
try {
422390
if (e instanceof PException && InteropLibrary.getUncached().getExceptionType(e) == ExceptionType.PARSE_ERROR) {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonParser.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,16 +133,6 @@ default RuntimeException raiseInvalidSyntax(Source source, SourceSection section
133133
*/
134134
Node parse(ParserMode mode, int optimizeLevel, ParserErrorCallback errors, Source source, Frame currentFrame, String[] arguments);
135135

136-
/**
137-
* Check if an expression can be parsed as an identifier
138-
*/
139-
boolean isIdentifier(PythonCore core, String snippet);
140-
141-
/**
142-
* Unescape Python escapes from a Java string
143-
*/
144-
public abstract String unescapeJavaString(PythonCore core, String str);
145-
146136
/**
147137
* Runtime exception used to indicate incomplete source code during parsing.
148138
*/

0 commit comments

Comments
 (0)