Skip to content

Commit 708cc18

Browse files
committed
fix format string parser off by one error and adjust UnicodeDecodeErrorBuiltins to match the new offsets
1 parent 696a0f7 commit 708cc18

File tree

4 files changed

+27
-25
lines changed

4 files changed

+27
-25
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeDecodeErrorBuiltins.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@
4646
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.IDX_REASON;
4747
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.IDX_START;
4848
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.UNICODE_ERROR_ATTR_FACTORY;
49+
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.getArgAsBytes;
4950
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.getArgAsInt;
50-
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.getArgAsObject;
5151
import static com.oracle.graal.python.builtins.objects.exception.UnicodeErrorBuiltins.getArgAsString;
5252
import static com.oracle.graal.python.nodes.SpecialMethodNames.__INIT__;
5353
import static com.oracle.graal.python.nodes.SpecialMethodNames.__STR__;
@@ -59,8 +59,8 @@
5959
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
6060
import com.oracle.graal.python.builtins.PythonBuiltins;
6161
import com.oracle.graal.python.builtins.objects.PNone;
62-
import com.oracle.graal.python.lib.PyObjectGetItem;
63-
import com.oracle.graal.python.lib.PyObjectSizeNode;
62+
import com.oracle.graal.python.builtins.objects.bytes.PBytesLike;
63+
import com.oracle.graal.python.builtins.objects.common.SequenceStorageNodes;
6464
import com.oracle.graal.python.lib.PyObjectStrAsJavaStringNode;
6565
import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
6666
import com.oracle.graal.python.nodes.function.PythonBuiltinNode;
@@ -88,15 +88,16 @@ public abstract static class UnicodeDecodeErrorInitNode extends PythonBuiltinNod
8888
public abstract Object execute(VirtualFrame frame, PBaseException self, Object[] args);
8989

9090
@Specialization
91-
Object initNoArgs(PBaseException self, Object[] args,
91+
Object initNoArgs(VirtualFrame frame, PBaseException self, Object[] args,
92+
@Cached UnicodeErrorBuiltins.GetArgAsBytesNode getArgAsBytesNode,
9293
@Cached CastToJavaStringNode toJavaStringNode,
9394
@Cached CastToJavaIntExactNode toJavaIntExactNode,
9495
@Cached BaseExceptionBuiltins.BaseExceptionInitNode baseInitNode) {
9596
baseInitNode.execute(self, args);
9697
// PyArg_ParseTuple(args, "UOnnU"), TODO: add proper error messages
9798
self.setExceptionAttributes(new Object[]{
9899
getArgAsString(args, 0, this, toJavaStringNode),
99-
getArgAsObject(args, 1, this),
100+
getArgAsBytes(frame, args, 1, this, getArgAsBytesNode),
100101
getArgAsInt(args, 2, this, toJavaIntExactNode),
101102
getArgAsInt(args, 3, this, toJavaIntExactNode),
102103
getArgAsString(args, 4, this, toJavaStringNode)
@@ -111,8 +112,8 @@ public abstract static class UnicodeEncodeErrorStrNode extends PythonUnaryBuilti
111112
@Specialization
112113
Object str(VirtualFrame frame, PBaseException self,
113114
@Cached BaseExceptionAttrNode attrNode,
114-
@Cached PyObjectGetItem getItem,
115-
@Cached PyObjectSizeNode sizeNode,
115+
@Cached SequenceStorageNodes.GetItemNode getitemNode,
116+
@Cached SequenceStorageNodes.LenNode lenNode,
116117
@Cached PyObjectStrAsJavaStringNode strNode) {
117118
if (self.getExceptionAttributes() == null) {
118119
// Not properly initialized.
@@ -121,13 +122,13 @@ Object str(VirtualFrame frame, PBaseException self,
121122

122123
// Get reason and encoding as strings, which they might not be if they've been
123124
// modified after we were constructed.
124-
Object object = attrNode.get(self, IDX_OBJECT, UNICODE_ERROR_ATTR_FACTORY);
125+
PBytesLike object = (PBytesLike) attrNode.get(self, IDX_OBJECT, UNICODE_ERROR_ATTR_FACTORY);
125126
final int start = attrNode.getInt(self, IDX_START, UNICODE_ERROR_ATTR_FACTORY);
126127
final int end = attrNode.getInt(self, IDX_END, UNICODE_ERROR_ATTR_FACTORY);
127128
final String encoding = strNode.execute(frame, attrNode.get(self, IDX_ENCODING, UNICODE_ERROR_ATTR_FACTORY));
128129
final String reason = strNode.execute(frame, attrNode.get(self, IDX_REASON, UNICODE_ERROR_ATTR_FACTORY));
129-
if (start < sizeNode.execute(frame, object) && end == start + 1) {
130-
final int b = (int) getItem.execute(frame, object, 0);
130+
if (start < lenNode.execute(object.getSequenceStorage()) && end == start + 1) {
131+
final int b = (int) getitemNode.execute(frame, object.getSequenceStorage(), 0);
131132
return PythonUtils.format("'%s' codec can't decode byte 0x%02x in position %d: %s", encoding, b, start, reason);
132133
} else {
133134
return PythonUtils.format("'%s' codec can't decode bytes in position %d-%d: %s", encoding, start, end - 1, reason);

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeErrorBuiltins.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import com.oracle.graal.python.builtins.CoreFunctions;
4949
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
5050
import com.oracle.graal.python.builtins.PythonBuiltins;
51-
import com.oracle.graal.python.builtins.modules.CodecsModuleBuiltins;
5251
import com.oracle.graal.python.builtins.objects.PNone;
5352
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAccessLibrary;
5453
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
@@ -61,6 +60,7 @@
6160
import com.oracle.graal.python.nodes.util.CastToJavaIntExactNode;
6261
import com.oracle.graal.python.nodes.util.CastToJavaStringNode;
6362
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
63+
import com.oracle.truffle.api.CompilerDirectives;
6464
import com.oracle.truffle.api.dsl.Cached;
6565
import com.oracle.truffle.api.dsl.GenerateNodeFactory;
6666
import com.oracle.truffle.api.dsl.NodeFactory;
@@ -109,22 +109,25 @@ public static int getArgAsInt(Object[] args, int index, PNodeWithRaise raiseNode
109109
}
110110

111111
public abstract static class GetArgAsBytesNode extends PNodeWithRaiseAndIndirectCall {
112-
abstract PBytes execute(VirtualFrame frame, Object val, String encoding);
112+
abstract PBytes execute(VirtualFrame frame, Object val);
113113

114114
@Specialization
115-
PBytes doString(String value, String encoding,
116-
@Cached CodecsModuleBuiltins.CodecsEncodeToJavaBytesNode encode,
115+
@CompilerDirectives.TruffleBoundary
116+
PBytes doString(String value,
117117
@Cached PythonObjectFactory factory) {
118-
return factory.createBytes(encode.execute(value, encoding, "ignore"));
118+
// TODO: cbasca cPython works directly with bytes while we have Java strings which are
119+
// encoded, here we decode using the system encoding but this might not be the correct /
120+
// ideal case
121+
return factory.createBytes(value.getBytes());
119122
}
120123

121124
@Specialization
122-
PBytes doBytes(PBytes value, @SuppressWarnings("unused") String encoding) {
125+
PBytes doBytes(PBytes value) {
123126
return value;
124127
}
125128

126129
@Specialization(guards = {"!isPBytes(value)", "!isString(value)"})
127-
PBytes doOther(VirtualFrame frame, Object value, @SuppressWarnings("unused") String encoding,
130+
PBytes doOther(VirtualFrame frame, Object value,
128131
@CachedLibrary(limit = "getCallSiteInlineCacheMaxDepth()") PythonBufferAccessLibrary bufferLib,
129132
@Cached PythonObjectFactory factory) {
130133
try {
@@ -141,9 +144,7 @@ public static Object getArgAsBytes(VirtualFrame frame, Object[] args, int index,
141144
if (args.length < index + 1) {
142145
throw raiseNode.raise(PythonBuiltinClassType.TypeError);
143146
} else {
144-
// the encoding must have been already set during init
145-
assert args[IDX_ENCODING] instanceof String;
146-
return getArgAsBytesNode.execute(frame, args[index], (String) args[IDX_ENCODING]);
147+
return getArgAsBytesNode.execute(frame, args[index]);
147148
}
148149
}
149150

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/sst/FormatStringParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ public static int createTokens(ArrayList<Token> tokens, ParserErrorCallback erro
264264
// Missing the closing brace. The escape sequence is malformed,
265265
// which will be reported by the String escaping code later,
266266
// here we just end the parsing
267-
index = len - 1;
267+
index = len;
268268
break parserLoop;
269269
}
270270
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/sst/StringUtils.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ public static void warnInvalidEscapeSequence(ParserErrorCallback errorCallback,
213213
}
214214

215215
private static final String UNICODE_ERROR = "'unicodeescape' codec can't decode bytes in position %d-%d:";
216-
private static final String MALFORMED_ERROR = " malformed \\N character escape";
216+
private static final String MALFORMED_ERROR = "malformed \\N character escape";
217217
private static final String TRUNCATED_XXX_ERROR = "truncated \\xXX escape";
218218
private static final String TRUNCATED_UXXXX_ERROR = "truncated \\uXXXX escape";
219219
private static final String TRUNCATED_UXXXXXXXX_ERROR = "truncated \\UXXXXXXXX escape";
@@ -266,15 +266,15 @@ private static PException createTruncatedError(String text, int startIndex, int
266266
@CompilerDirectives.TruffleBoundary
267267
private static int doCharacterName(String text, StringBuilder sb, int offset) {
268268
if (offset >= text.length()) {
269-
throw PConstructAndRaiseNode.raiseUncachedUnicodeDecodeError("unicodeescape", text, offset - 2, offset - 1, MALFORMED_ERROR);
269+
throw PConstructAndRaiseNode.raiseUncachedUnicodeDecodeError("unicodeescape", text, offset - 2, offset, MALFORMED_ERROR);
270270
}
271271
char ch = text.charAt(offset);
272272
if (ch != '{') {
273-
throw PConstructAndRaiseNode.raiseUncachedUnicodeDecodeError("unicodeescape", text, offset - 2, offset - 1, MALFORMED_ERROR);
273+
throw PConstructAndRaiseNode.raiseUncachedUnicodeDecodeError("unicodeescape", text, offset - 2, offset, MALFORMED_ERROR);
274274
}
275275
int closeIndex = text.indexOf("}", offset + 1);
276276
if (closeIndex == -1) {
277-
throw PConstructAndRaiseNode.raiseUncachedUnicodeDecodeError("unicodeescape", text, offset - 2, text.length() - 1, MALFORMED_ERROR);
277+
throw PConstructAndRaiseNode.raiseUncachedUnicodeDecodeError("unicodeescape", text, offset - 2, text.length(), MALFORMED_ERROR);
278278
}
279279
String charName = text.substring(offset + 1, closeIndex).toUpperCase();
280280
// When JDK 1.8 will not be supported, we can replace with Character.codePointOf(String

0 commit comments

Comments
 (0)