Skip to content

Commit f793ee3

Browse files
committed
Invalid escape sequences in (f-)string literals should produce a deprecation warning
1 parent 37d882a commit f793ee3

File tree

11 files changed

+109
-28
lines changed

11 files changed

+109
-28
lines changed

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/nodes/literal/FormatStringTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,11 @@ public RuntimeException raiseInvalidSyntax(ErrorType type, Node location, String
408408
throw new RuntimeException("SyntaxError: " + String.format(message, arguments));
409409
}
410410

411+
@Override
412+
public void warn(Object type, String format, Object... args) {
413+
throw new RuntimeException("Warning: " + String.format(format, args));
414+
}
415+
411416
@Override
412417
public PythonLanguage getLanguage() {
413418
return null;

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/parser/StringUtilsTests.java

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,35 +40,73 @@
4040
*/
4141
package com.oracle.graal.python.test.parser;
4242

43-
import com.oracle.graal.python.parser.sst.StringUtils;
4443
import org.junit.Assert;
4544
import org.junit.Test;
4645

46+
import com.oracle.graal.python.PythonLanguage;
47+
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
48+
import com.oracle.graal.python.parser.sst.StringUtils;
49+
import com.oracle.graal.python.runtime.PythonParser.ErrorType;
50+
import com.oracle.graal.python.runtime.PythonParser.ParserErrorCallback;
51+
import com.oracle.truffle.api.nodes.Node;
52+
import com.oracle.truffle.api.source.Source;
53+
import com.oracle.truffle.api.source.SourceSection;
54+
4755
public class StringUtilsTests extends ParserTestBase {
56+
private static final ParserErrorCallback errorCallback = new ParserErrorCallback() {
57+
@Override
58+
public RuntimeException raise(PythonBuiltinClassType type, String message, Object... args) {
59+
Assert.fail("Unexpected error: " + String.format(message, args));
60+
return null;
61+
}
62+
63+
@Override
64+
public RuntimeException raiseInvalidSyntax(ErrorType type, Source source, SourceSection section, String message, Object... arguments) {
65+
Assert.fail("Unexpected error: " + String.format(message, arguments));
66+
return null;
67+
}
68+
69+
@Override
70+
public RuntimeException raiseInvalidSyntax(ErrorType type, Node location, String message, Object... arguments) {
71+
Assert.fail("Unexpected error: " + String.format(message, arguments));
72+
return null;
73+
}
74+
75+
@Override
76+
public void warn(Object type, String format, Object... args) {
77+
Assert.fail("Unexpected warning: " + String.format(format, args));
78+
}
79+
80+
@Override
81+
public PythonLanguage getLanguage() {
82+
return null;
83+
}
84+
};
85+
4886
@Test
4987
public void unicodeCharNameBasic() throws Exception {
50-
Assert.assertEquals("Δ", StringUtils.unescapeJavaString("\\N{GREEK CAPITAL LETTER DELTA}"));
51-
Assert.assertEquals("A", StringUtils.unescapeJavaString("\\N{LATIN CAPITAL LETTER A}"));
52-
Assert.assertEquals("A", StringUtils.unescapeJavaString("\\N{LATIN CAPITAL LETTER a}"));
53-
Assert.assertEquals("A", StringUtils.unescapeJavaString("\\N{LATIN CAPITAL LETTEr a}"));
54-
Assert.assertEquals("A", StringUtils.unescapeJavaString("\\N{latin capital letter a}"));
55-
Assert.assertEquals("AHOJ", StringUtils.unescapeJavaString("A\\N{LATIN CAPITAL LETTER H}OJ"));
56-
Assert.assertEquals("AHOJ", StringUtils.unescapeJavaString("\\N{LATIN CAPITAL LETTER A}\\N{LATIN CAPITAL LETTER H}\\N{LATIN CAPITAL LETTER O}\\N{LATIN CAPITAL LETTER J}"));
88+
Assert.assertEquals("Δ", StringUtils.unescapeJavaString(errorCallback, "\\N{GREEK CAPITAL LETTER DELTA}"));
89+
Assert.assertEquals("A", StringUtils.unescapeJavaString(errorCallback, "\\N{LATIN CAPITAL LETTER A}"));
90+
Assert.assertEquals("A", StringUtils.unescapeJavaString(errorCallback, "\\N{LATIN CAPITAL LETTER a}"));
91+
Assert.assertEquals("A", StringUtils.unescapeJavaString(errorCallback, "\\N{LATIN CAPITAL LETTEr a}"));
92+
Assert.assertEquals("A", StringUtils.unescapeJavaString(errorCallback, "\\N{latin capital letter a}"));
93+
Assert.assertEquals("AHOJ", StringUtils.unescapeJavaString(errorCallback, "A\\N{LATIN CAPITAL LETTER H}OJ"));
94+
Assert.assertEquals("AHOJ", StringUtils.unescapeJavaString(errorCallback, "\\N{LATIN CAPITAL LETTER A}\\N{LATIN CAPITAL LETTER H}\\N{LATIN CAPITAL LETTER O}\\N{LATIN CAPITAL LETTER J}"));
5795
checkUnknownChar("ahoj");
5896
}
5997

6098
@Test
6199
public void blockHangulSyllables() throws Exception {
62-
Assert.assertEquals("가", StringUtils.unescapeJavaString("\\N{HANGUL SYLLABLE GA}"));
63-
Assert.assertEquals("돐", StringUtils.unescapeJavaString("\\N{HANGUL SYLLABLE DOLS}"));
64-
Assert.assertEquals("똜", StringUtils.unescapeJavaString("\\N{HANGUL SYLLABLE DDOLS}"));
100+
Assert.assertEquals("가", StringUtils.unescapeJavaString(errorCallback, "\\N{HANGUL SYLLABLE GA}"));
101+
Assert.assertEquals("돐", StringUtils.unescapeJavaString(errorCallback, "\\N{HANGUL SYLLABLE DOLS}"));
102+
Assert.assertEquals("똜", StringUtils.unescapeJavaString(errorCallback, "\\N{HANGUL SYLLABLE DDOLS}"));
65103
}
66104

67105
@Test
68106
public void blockCjkUnifiedIdeograph() throws Exception {
69-
Assert.assertEquals("㐀", StringUtils.unescapeJavaString("\\N{CJK Unified Ideograph-3400}"));
70-
Assert.assertEquals("𫝜", StringUtils.unescapeJavaString("\\N{CJK Unified Ideograph-2B75C}"));
71-
Assert.assertEquals("丳", StringUtils.unescapeJavaString("\\N{CJK Unified Ideograph-4E33}"));
107+
Assert.assertEquals("㐀", StringUtils.unescapeJavaString(errorCallback, "\\N{CJK Unified Ideograph-3400}"));
108+
Assert.assertEquals("𫝜", StringUtils.unescapeJavaString(errorCallback, "\\N{CJK Unified Ideograph-2B75C}"));
109+
Assert.assertEquals("丳", StringUtils.unescapeJavaString(errorCallback, "\\N{CJK Unified Ideograph-4E33}"));
72110
}
73111

74112
@Test

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/Python3Core.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@
174174
import com.oracle.graal.python.builtins.objects.type.TypeNodes.GetNameNode;
175175
import com.oracle.graal.python.builtins.objects.zipimporter.ZipImporterBuiltins;
176176
import com.oracle.graal.python.nodes.BuiltinNames;
177+
import com.oracle.graal.python.nodes.attributes.ReadAttributeFromDynamicObjectNode;
178+
import com.oracle.graal.python.nodes.call.CallNode;
177179
import com.oracle.graal.python.nodes.call.GenericInvokeNode;
178180
import com.oracle.graal.python.runtime.PythonCodeSerializer;
179181
import com.oracle.graal.python.runtime.PythonContext;
@@ -210,7 +212,7 @@ public final class Python3Core implements PythonCore {
210212

211213
public static final Pattern MISSING_PARENTHESES_PATTERN = Pattern.compile("^(print|exec) +([^(][^;]*).*");
212214

213-
private static final String[] initializeCoreFiles() {
215+
private static String[] initializeCoreFiles() {
214216
// Order matters!
215217
List<String> coreFiles = new ArrayList<>(Arrays.asList(
216218
"_descriptor",
@@ -305,7 +307,7 @@ private static final String[] initializeCoreFiles() {
305307
c = null;
306308
}
307309

308-
private static final PythonBuiltins[] initializeBuiltins() {
310+
private static PythonBuiltins[] initializeBuiltins() {
309311
List<PythonBuiltins> builtins = new ArrayList<>(Arrays.asList(
310312
new BuiltinConstructors(),
311313
new BuiltinFunctions(),
@@ -490,6 +492,7 @@ public boolean isInitialized() {
490492
return initialized;
491493
}
492494

495+
@Override
493496
public void initialize(PythonContext context) {
494497
singletonContext = context;
495498
initializeJavaCore();
@@ -525,16 +528,19 @@ public void postInitialize() {
525528
}
526529
}
527530

531+
@Override
528532
@TruffleBoundary
529533
public PythonModule lookupBuiltinModule(String name) {
530534
return builtinModules.get(name);
531535
}
532536

537+
@Override
533538
public PythonBuiltinClass lookupType(PythonBuiltinClassType type) {
534539
assert builtinTypes[type.ordinal()] != null;
535540
return builtinTypes[type.ordinal()];
536541
}
537542

543+
@Override
538544
@TruffleBoundary
539545
public String[] builtinModuleNames() {
540546
return builtinModules.keySet().toArray(new String[0]);
@@ -557,6 +563,14 @@ public PException raise(PythonBuiltinClassType type, String format, Object... ar
557563
throw PException.fromObject(instance, null, PythonOptions.isPExceptionWithJavaStacktrace(getLanguage()));
558564
}
559565

566+
@Override
567+
@TruffleBoundary
568+
public void warn(Object type, String format, Object... args) {
569+
PythonModule warningsModule = lookupBuiltinModule("_warnings");
570+
Object warn = ReadAttributeFromDynamicObjectNode.getUncached().execute(warningsModule.getStorage(), "warn");
571+
CallNode.getUncached().execute(warn, String.format(format, args), type);
572+
}
573+
560574
private void publishBuiltinModules() {
561575
PythonModule sysModule = builtinModules.get("sys");
562576
PDict sysModules = (PDict) sysModule.getAttribute("modules");
@@ -704,23 +718,28 @@ private void loadFile(String s, String prefix) {
704718
GenericInvokeNode.getUncached().execute(callTarget, PArguments.withGlobals(mod));
705719
}
706720

721+
@Override
707722
public PythonObjectFactory factory() {
708723
return objectFactory;
709724
}
710725

726+
@Override
711727
public void setContext(PythonContext context) {
712728
assert singletonContext == null;
713729
singletonContext = context;
714730
}
715731

732+
@Override
716733
public PInt getTrue() {
717734
return pyTrue;
718735
}
719736

737+
@Override
720738
public PInt getFalse() {
721739
return pyFalse;
722740
}
723741

742+
@Override
724743
public PFloat getNaN() {
725744
return pyNaN;
726745
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@
8484
import com.oracle.truffle.api.CompilerDirectives;
8585
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
8686
import com.oracle.truffle.api.dsl.Cached;
87+
import com.oracle.truffle.api.dsl.Cached.Shared;
8788
import com.oracle.truffle.api.dsl.Fallback;
8889
import com.oracle.truffle.api.dsl.GenerateNodeFactory;
8990
import com.oracle.truffle.api.dsl.NodeFactory;
9091
import com.oracle.truffle.api.dsl.Specialization;
9192
import com.oracle.truffle.api.dsl.TypeSystemReference;
92-
import com.oracle.truffle.api.dsl.Cached.Shared;
9393
import com.oracle.truffle.api.frame.VirtualFrame;
9494
import com.oracle.truffle.api.library.CachedLibrary;
9595

@@ -159,7 +159,7 @@ Object encode(VirtualFrame frame, Object bytes, @SuppressWarnings("unused") Stri
159159
PythonCore core = getCore();
160160
byte[] byteArray = toBytes.execute(frame, bytes);
161161
String string = strFromBytes(byteArray);
162-
String unescapedString = core.getParser().unescapeJavaString(string);
162+
String unescapedString = core.getParser().unescapeJavaString(core, string);
163163
return factory().createTuple(new Object[]{unescapedString, byteArray.length});
164164
}
165165

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonParserImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,8 @@ public boolean isIdentifier(PythonCore core, String snippet) {
389389

390390
@Override
391391
@TruffleBoundary
392-
public String unescapeJavaString(String str) {
393-
return StringUtils.unescapeJavaString(str);
392+
public String unescapeJavaString(PythonCore core, String str) {
393+
return StringUtils.unescapeJavaString(core, str);
394394
}
395395

396396
private static PException handleParserError(ParserErrorCallback errors, Source source, Exception e) {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/sst/FormatStringParser.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,8 @@ public static int createTokens(ArrayList<Token> tokens, ParserErrorCallback erro
278278
// double "\\" is skipped, note that "\\\N{...}" should still be
279279
// treated as \N escape sequence
280280
index++;
281+
} else if (lookahead(text, index, len, '{')) {
282+
StringUtils.warnInvalidEscapeSequence(errorCallback, text.charAt(index + 1));
281283
} else if (lookahead(text, index, len, 'N', '{')) {
282284
// skip escape sequence \N{...}, it should not be treated as an
283285
// expression inside f-string, but \\N{...} should be left intact

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/sst/StringLiteralSSTNode.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ public static StringLiteralSSTNode create(String[] values, int startOffset, int
197197
}
198198
if (!isRaw && !isFormat) {
199199
try {
200-
text = StringUtils.unescapeJavaString(text);
200+
text = StringUtils.unescapeJavaString(errors, text);
201201
} catch (PException e) {
202202
e.expect(PythonBuiltinClassType.UnicodeDecodeError, IsBuiltinClassProfile.getUncached());
203203
String message = e.getMessage();
@@ -224,7 +224,7 @@ public static StringLiteralSSTNode create(String[] values, int startOffset, int
224224
formatStringLiterals.ensureCapacity(formatStringLiterals.size() + literals.length);
225225
for (int i = 0; i < literals.length; i++) {
226226
if (literals[i] != null && !isRaw) {
227-
literals[i] = StringUtils.unescapeJavaString(literals[i]);
227+
literals[i] = StringUtils.unescapeJavaString(errors, literals[i]);
228228
}
229229
formatStringLiterals.add(literals[i]);
230230
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/sst/StringUtils.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import com.oracle.graal.python.nodes.expression.ExpressionNode;
4949
import com.oracle.graal.python.nodes.literal.StringLiteralNode;
5050
import com.oracle.graal.python.nodes.statement.StatementNode;
51+
import com.oracle.graal.python.runtime.PythonParser.ParserErrorCallback;
5152
import com.oracle.truffle.api.CompilerDirectives;
5253

5354
public class StringUtils {
@@ -79,11 +80,12 @@ public static StringLiteralNode extractDoc(ExpressionNode node) {
7980
return null;
8081
}
8182

82-
public static String unescapeJavaString(String st) {
83+
public static String unescapeJavaString(ParserErrorCallback errorCallback, String st) {
8384
if (st.indexOf("\\") == -1) {
8485
return st;
8586
}
8687
StringBuilder sb = new StringBuilder(st.length());
88+
boolean wasDeprecationWarning = false;
8789
for (int i = 0; i < st.length(); i++) {
8890
char ch = st.charAt(i);
8991
if (ch == '\\') {
@@ -180,6 +182,10 @@ public static String unescapeJavaString(String st) {
180182
i = doCharacterName(st, sb, i + 2);
181183
continue;
182184
default:
185+
if (!wasDeprecationWarning) {
186+
wasDeprecationWarning = true;
187+
warnInvalidEscapeSequence(errorCallback, nextChar);
188+
}
183189
sb.append(ch);
184190
sb.append(nextChar);
185191
i++;
@@ -192,6 +198,10 @@ public static String unescapeJavaString(String st) {
192198
return sb.toString();
193199
}
194200

201+
public static void warnInvalidEscapeSequence(ParserErrorCallback errorCallback, char nextChar) {
202+
errorCallback.warn("DeprecationWarning", "invalid escape sequence '\\%c'", nextChar);
203+
}
204+
195205
private static final String UNICODE_ERROR = "'unicodeescape' codec can't decode bytes in position %d-%d:";
196206
private static final String MALFORMED_ERROR = " malformed \\N character escape";
197207
private static final String UNKNOWN_UNICODE_ERROR = " unknown Unicode character name";

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonCore.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
*/
2626
package com.oracle.graal.python.runtime;
2727

28-
import com.oracle.graal.python.util.Supplier;
29-
3028
import com.oracle.graal.python.PythonLanguage;
3129
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
3230
import com.oracle.graal.python.builtins.objects.floats.PFloat;
@@ -36,6 +34,7 @@
3634
import com.oracle.graal.python.runtime.PythonParser.ParserErrorCallback;
3735
import com.oracle.graal.python.runtime.exception.PException;
3836
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
37+
import com.oracle.graal.python.util.Supplier;
3938

4039
/**
4140
* Storage for initialized Python built-in modules and types.
@@ -67,9 +66,14 @@ public interface PythonCore extends ParserErrorCallback {
6766
public String[] builtinModuleNames();
6867

6968
// Error throwing functions
69+
@Override
7070
public PException raise(PythonBuiltinClassType type, String format, Object... args);
7171

72+
@Override
73+
void warn(Object type, String format, Object... args);
74+
7275
// Accessors
76+
@Override
7377
public PythonLanguage getLanguage();
7478

7579
public PythonParser getParser();

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonParser.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ default RuntimeException raiseInvalidSyntax(Source source, SourceSection section
109109
return raiseInvalidSyntax(source, section, ErrorMessages.INVALID_SYNTAX, new Object[0]);
110110
}
111111

112+
void warn(Object type, String format, Object... args);
113+
112114
PythonLanguage getLanguage();
113115
}
114116

@@ -129,7 +131,7 @@ default RuntimeException raiseInvalidSyntax(Source source, SourceSection section
129131
/**
130132
* Unescape Python escapes from a Java string
131133
*/
132-
public abstract String unescapeJavaString(String str);
134+
public abstract String unescapeJavaString(PythonCore core, String str);
133135

134136
/**
135137
* Runtime exception used to indicate incomplete source code during parsing.

0 commit comments

Comments
 (0)