|
50 | 50 | import java.nio.charset.CharacterCodingException;
|
51 | 51 | import java.nio.charset.Charset;
|
52 | 52 | import java.nio.charset.CodingErrorAction;
|
| 53 | +import java.util.Arrays; |
53 | 54 | import java.util.HashMap;
|
54 | 55 | import java.util.List;
|
55 | 56 | import java.util.Map;
|
|
65 | 66 | import com.oracle.graal.python.builtins.objects.tuple.PTuple;
|
66 | 67 | import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
|
67 | 68 | import com.oracle.graal.python.nodes.function.PythonBuiltinNode;
|
| 69 | +import com.oracle.graal.python.nodes.function.builtins.PythonBinaryBuiltinNode; |
| 70 | +import com.oracle.graal.python.nodes.truffle.PythonArithmeticTypes; |
68 | 71 | import com.oracle.graal.python.runtime.PythonCore;
|
69 | 72 | import com.oracle.truffle.api.CompilerDirectives;
|
70 | 73 | import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
|
71 | 74 | import com.oracle.truffle.api.dsl.Cached;
|
72 | 75 | import com.oracle.truffle.api.dsl.Fallback;
|
73 | 76 | import com.oracle.truffle.api.dsl.GenerateNodeFactory;
|
| 77 | +import com.oracle.truffle.api.dsl.ImportStatic; |
74 | 78 | import com.oracle.truffle.api.dsl.NodeFactory;
|
75 | 79 | import com.oracle.truffle.api.dsl.Specialization;
|
76 | 80 | import com.oracle.truffle.api.profiles.ValueProfile;
|
@@ -253,13 +257,89 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
|
253 | 257 | }
|
254 | 258 | }
|
255 | 259 |
|
| 260 | + @Builtin(name = "unicode_escape_encode", fixedNumOfPositionalArgs = 1, keywordArguments = {"errors"}) |
| 261 | + @GenerateNodeFactory |
| 262 | + @ImportStatic(PythonArithmeticTypes.class) |
| 263 | + abstract static class UnicodeEscapeEncode extends PythonBinaryBuiltinNode { |
| 264 | + static final byte[] hexdigits = "0123456789abcdef".getBytes(); |
| 265 | + |
| 266 | + @Specialization |
| 267 | + @TruffleBoundary |
| 268 | + Object encode(String str, @SuppressWarnings("unused") Object errors) { |
| 269 | + // Initial allocation of bytes for UCS4 strings needs 10 bytes per source character |
| 270 | + // ('\U00xxxxxx') |
| 271 | + byte[] bytes = new byte[str.length() * 10]; |
| 272 | + int j = 0; |
| 273 | + for (int i = 0; i < str.length(); i++) { |
| 274 | + int ch = str.codePointAt(i); |
| 275 | + /* U+0000-U+00ff range */ |
| 276 | + if (ch < 0x100) { |
| 277 | + if (ch >= ' ' && ch < 127) { |
| 278 | + if (ch != '\\') { |
| 279 | + /* Copy printable US ASCII as-is */ |
| 280 | + bytes[j++] = (byte) ch; |
| 281 | + } else { |
| 282 | + /* Escape backslashes */ |
| 283 | + bytes[j++] = '\\'; |
| 284 | + bytes[j++] = '\\'; |
| 285 | + } |
| 286 | + } else if (ch == '\t') { |
| 287 | + /* Map special whitespace to '\t', \n', '\r' */ |
| 288 | + bytes[j++] = '\\'; |
| 289 | + bytes[j++] = 't'; |
| 290 | + } else if (ch == '\n') { |
| 291 | + bytes[j++] = '\\'; |
| 292 | + bytes[j++] = 'n'; |
| 293 | + } else if (ch == '\r') { |
| 294 | + bytes[j++] = '\\'; |
| 295 | + bytes[j++] = 'r'; |
| 296 | + } else { |
| 297 | + /* Map non-printable US ASCII and 8-bit characters to '\xHH' */ |
| 298 | + bytes[j++] = '\\'; |
| 299 | + bytes[j++] = 'x'; |
| 300 | + bytes[j++] = hexdigits[(ch >> 4) & 0x000F]; |
| 301 | + bytes[j++] = hexdigits[ch & 0x000F]; |
| 302 | + } |
| 303 | + } else if (ch < 0x10000) { |
| 304 | + /* U+0100-U+ffff range: Map 16-bit characters to '\\uHHHH' */ |
| 305 | + bytes[j++] = '\\'; |
| 306 | + bytes[j++] = 'u'; |
| 307 | + bytes[j++] = hexdigits[(ch >> 12) & 0x000F]; |
| 308 | + bytes[j++] = hexdigits[(ch >> 8) & 0x000F]; |
| 309 | + bytes[j++] = hexdigits[(ch >> 4) & 0x000F]; |
| 310 | + bytes[j++] = hexdigits[ch & 0x000F]; |
| 311 | + } else { |
| 312 | + /* U+010000-U+10ffff range: Map 21-bit characters to '\U00HHHHHH' */ |
| 313 | + /* Make sure that the first two digits are zero */ |
| 314 | + bytes[j++] = '\\'; |
| 315 | + bytes[j++] = 'U'; |
| 316 | + bytes[j++] = '0'; |
| 317 | + bytes[j++] = '0'; |
| 318 | + bytes[j++] = hexdigits[(ch >> 20) & 0x0000000F]; |
| 319 | + bytes[j++] = hexdigits[(ch >> 16) & 0x0000000F]; |
| 320 | + bytes[j++] = hexdigits[(ch >> 12) & 0x0000000F]; |
| 321 | + bytes[j++] = hexdigits[(ch >> 8) & 0x0000000F]; |
| 322 | + bytes[j++] = hexdigits[(ch >> 4) & 0x0000000F]; |
| 323 | + bytes[j++] = hexdigits[ch & 0x0000000F]; |
| 324 | + } |
| 325 | + } |
| 326 | + bytes = Arrays.copyOf(bytes, j); |
| 327 | + return factory().createTuple(new Object[]{factory().createBytes(bytes), str.length()}); |
| 328 | + } |
| 329 | + |
| 330 | + @Fallback |
| 331 | + Object encode(Object str, @SuppressWarnings("unused") Object errors) { |
| 332 | + throw raise(TypeError, "unicode_escape_encode() argument 1 must be str, not %p", str); |
| 333 | + } |
| 334 | + } |
| 335 | + |
256 | 336 | @Builtin(name = "unicode_escape_decode", fixedNumOfPositionalArgs = 1, keywordArguments = {"errors"})
|
257 | 337 | @GenerateNodeFactory
|
258 |
| - abstract static class UnicodeEscapeDecode extends PythonBuiltinNode { |
| 338 | + abstract static class UnicodeEscapeDecode extends PythonBinaryBuiltinNode { |
259 | 339 | @Specialization(guards = "isBytes(bytes)")
|
260 | 340 | Object encode(Object bytes, @SuppressWarnings("unused") PNone errors,
|
261 | 341 | @Cached("create()") BytesNodes.ToBytesNode toBytes) {
|
262 |
| - // this is basically just parsing as a String |
| 342 | + // for now we'll just parse this as a String, ignoring any error strategies |
263 | 343 | PythonCore core = getCore();
|
264 | 344 | byte[] byteArray = toBytes.execute(bytes);
|
265 | 345 | String string = strFromBytes(byteArray);
|
|
0 commit comments