|
50 | 50 | import java.nio.charset.CharacterCodingException;
|
51 | 51 | import java.nio.charset.Charset;
|
52 | 52 | import java.nio.charset.CodingErrorAction;
|
| 53 | +import java.util.Arrays; |
53 | 54 | import java.util.HashMap;
|
54 | 55 | import java.util.List;
|
55 | 56 | import java.util.Map;
|
|
58 | 59 | import com.oracle.graal.python.builtins.CoreFunctions;
|
59 | 60 | import com.oracle.graal.python.builtins.PythonBuiltins;
|
60 | 61 | import com.oracle.graal.python.builtins.objects.PNone;
|
| 62 | +import com.oracle.graal.python.builtins.objects.bytes.BytesNodes; |
61 | 63 | import com.oracle.graal.python.builtins.objects.bytes.PBytes;
|
62 | 64 | import com.oracle.graal.python.builtins.objects.bytes.PIBytesLike;
|
63 | 65 | import com.oracle.graal.python.builtins.objects.common.SequenceStorageNodes;
|
64 | 66 | import com.oracle.graal.python.builtins.objects.tuple.PTuple;
|
65 | 67 | import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
|
66 | 68 | import com.oracle.graal.python.nodes.function.PythonBuiltinNode;
|
| 69 | +import com.oracle.graal.python.nodes.function.builtins.PythonBinaryBuiltinNode; |
| 70 | +import com.oracle.graal.python.nodes.truffle.PythonArithmeticTypes; |
| 71 | +import com.oracle.graal.python.runtime.PythonCore; |
67 | 72 | import com.oracle.truffle.api.CompilerDirectives;
|
68 | 73 | import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
|
69 | 74 | import com.oracle.truffle.api.dsl.Cached;
|
70 | 75 | import com.oracle.truffle.api.dsl.Fallback;
|
71 | 76 | import com.oracle.truffle.api.dsl.GenerateNodeFactory;
|
| 77 | +import com.oracle.truffle.api.dsl.ImportStatic; |
72 | 78 | import com.oracle.truffle.api.dsl.NodeFactory;
|
73 | 79 | import com.oracle.truffle.api.dsl.Specialization;
|
74 | 80 | import com.oracle.truffle.api.profiles.ValueProfile;
|
@@ -251,6 +257,102 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
|
251 | 257 | }
|
252 | 258 | }
|
253 | 259 |
|
| 260 | + @Builtin(name = "unicode_escape_encode", fixedNumOfPositionalArgs = 1, keywordArguments = {"errors"}) |
| 261 | + @GenerateNodeFactory |
| 262 | + @ImportStatic(PythonArithmeticTypes.class) |
| 263 | + abstract static class UnicodeEscapeEncode extends PythonBinaryBuiltinNode { |
| 264 | + static final byte[] hexdigits = "0123456789abcdef".getBytes(); |
| 265 | + |
| 266 | + @Specialization |
| 267 | + @TruffleBoundary |
| 268 | + Object encode(String str, @SuppressWarnings("unused") Object errors) { |
| 269 | + // Initial allocation of bytes for UCS4 strings needs 10 bytes per source character |
| 270 | + // ('\U00xxxxxx') |
| 271 | + byte[] bytes = new byte[str.length() * 10]; |
| 272 | + int j = 0; |
| 273 | + for (int i = 0; i < str.length(); i++) { |
| 274 | + int ch = str.codePointAt(i); |
| 275 | + /* U+0000-U+00ff range */ |
| 276 | + if (ch < 0x100) { |
| 277 | + if (ch >= ' ' && ch < 127) { |
| 278 | + if (ch != '\\') { |
| 279 | + /* Copy printable US ASCII as-is */ |
| 280 | + bytes[j++] = (byte) ch; |
| 281 | + } else { |
| 282 | + /* Escape backslashes */ |
| 283 | + bytes[j++] = '\\'; |
| 284 | + bytes[j++] = '\\'; |
| 285 | + } |
| 286 | + } else if (ch == '\t') { |
| 287 | + /* Map special whitespace to '\t', \n', '\r' */ |
| 288 | + bytes[j++] = '\\'; |
| 289 | + bytes[j++] = 't'; |
| 290 | + } else if (ch == '\n') { |
| 291 | + bytes[j++] = '\\'; |
| 292 | + bytes[j++] = 'n'; |
| 293 | + } else if (ch == '\r') { |
| 294 | + bytes[j++] = '\\'; |
| 295 | + bytes[j++] = 'r'; |
| 296 | + } else { |
| 297 | + /* Map non-printable US ASCII and 8-bit characters to '\xHH' */ |
| 298 | + bytes[j++] = '\\'; |
| 299 | + bytes[j++] = 'x'; |
| 300 | + bytes[j++] = hexdigits[(ch >> 4) & 0x000F]; |
| 301 | + bytes[j++] = hexdigits[ch & 0x000F]; |
| 302 | + } |
| 303 | + } else if (ch < 0x10000) { |
| 304 | + /* U+0100-U+ffff range: Map 16-bit characters to '\\uHHHH' */ |
| 305 | + bytes[j++] = '\\'; |
| 306 | + bytes[j++] = 'u'; |
| 307 | + bytes[j++] = hexdigits[(ch >> 12) & 0x000F]; |
| 308 | + bytes[j++] = hexdigits[(ch >> 8) & 0x000F]; |
| 309 | + bytes[j++] = hexdigits[(ch >> 4) & 0x000F]; |
| 310 | + bytes[j++] = hexdigits[ch & 0x000F]; |
| 311 | + } else { |
| 312 | + /* U+010000-U+10ffff range: Map 21-bit characters to '\U00HHHHHH' */ |
| 313 | + /* Make sure that the first two digits are zero */ |
| 314 | + bytes[j++] = '\\'; |
| 315 | + bytes[j++] = 'U'; |
| 316 | + bytes[j++] = '0'; |
| 317 | + bytes[j++] = '0'; |
| 318 | + bytes[j++] = hexdigits[(ch >> 20) & 0x0000000F]; |
| 319 | + bytes[j++] = hexdigits[(ch >> 16) & 0x0000000F]; |
| 320 | + bytes[j++] = hexdigits[(ch >> 12) & 0x0000000F]; |
| 321 | + bytes[j++] = hexdigits[(ch >> 8) & 0x0000000F]; |
| 322 | + bytes[j++] = hexdigits[(ch >> 4) & 0x0000000F]; |
| 323 | + bytes[j++] = hexdigits[ch & 0x0000000F]; |
| 324 | + } |
| 325 | + } |
| 326 | + bytes = Arrays.copyOf(bytes, j); |
| 327 | + return factory().createTuple(new Object[]{factory().createBytes(bytes), str.length()}); |
| 328 | + } |
| 329 | + |
| 330 | + @Fallback |
| 331 | + Object encode(Object str, @SuppressWarnings("unused") Object errors) { |
| 332 | + throw raise(TypeError, "unicode_escape_encode() argument 1 must be str, not %p", str); |
| 333 | + } |
| 334 | + } |
| 335 | + |
| 336 | + @Builtin(name = "unicode_escape_decode", fixedNumOfPositionalArgs = 1, keywordArguments = {"errors"}) |
| 337 | + @GenerateNodeFactory |
| 338 | + abstract static class UnicodeEscapeDecode extends PythonBinaryBuiltinNode { |
| 339 | + @Specialization(guards = "isBytes(bytes)") |
| 340 | + Object encode(Object bytes, @SuppressWarnings("unused") PNone errors, |
| 341 | + @Cached("create()") BytesNodes.ToBytesNode toBytes) { |
| 342 | + // for now we'll just parse this as a String, ignoring any error strategies |
| 343 | + PythonCore core = getCore(); |
| 344 | + byte[] byteArray = toBytes.execute(bytes); |
| 345 | + String string = strFromBytes(byteArray); |
| 346 | + String unescapedString = core.getParser().unescapeJavaString(string); |
| 347 | + return factory().createTuple(new Object[]{unescapedString, byteArray.length}); |
| 348 | + } |
| 349 | + |
| 350 | + @TruffleBoundary |
| 351 | + private static String strFromBytes(byte[] execute) { |
| 352 | + return new String(execute); |
| 353 | + } |
| 354 | + } |
| 355 | + |
254 | 356 | // _codecs.encode(obj, encoding='utf-8', errors='strict')
|
255 | 357 | @Builtin(name = "__truffle_encode", fixedNumOfPositionalArgs = 1, keywordArguments = {"encoding", "errors"})
|
256 | 358 | @GenerateNodeFactory
|
|
0 commit comments