Skip to content

Commit b3e7fa9

Browse files
committed
Implement 'codecs.raw_unicode_escape_encode'.
1 parent 9f5f697 commit b3e7fa9

File tree

2 files changed

+96
-40
lines changed

2 files changed

+96
-40
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
6262
import com.oracle.graal.python.builtins.objects.bytes.PIBytesLike;
6363
import com.oracle.graal.python.builtins.objects.common.SequenceStorageNodes;
64+
import com.oracle.graal.python.builtins.objects.tuple.PTuple;
6465
import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
6566
import com.oracle.graal.python.nodes.function.PythonBuiltinNode;
6667
import com.oracle.truffle.api.CompilerDirectives;
@@ -225,10 +226,35 @@ protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFa
225226
return CodecsModuleBuiltinsFactory.getFactories();
226227
}
227228

229+
abstract static class EncodeBaseNode extends PythonBuiltinNode {
230+
231+
protected static CodingErrorAction convertCodingErrorAction(String errors) {
232+
CodingErrorAction errorAction;
233+
switch (errors) {
234+
// TODO: see [GR-10256] to implement the correct handling mechanics
235+
case "ignore":
236+
case "surrogatepass":
237+
errorAction = CodingErrorAction.IGNORE;
238+
break;
239+
case "replace":
240+
case "surrogateescape":
241+
case "namereplace":
242+
case "backslashreplace":
243+
case "xmlcharrefreplace":
244+
errorAction = CodingErrorAction.REPLACE;
245+
break;
246+
default:
247+
errorAction = CodingErrorAction.REPORT;
248+
break;
249+
}
250+
return errorAction;
251+
}
252+
}
253+
228254
// _codecs.encode(obj, encoding='utf-8', errors='strict')
229255
@Builtin(name = "__truffle_encode", fixedNumOfPositionalArgs = 1, keywordArguments = {"encoding", "errors"})
230256
@GenerateNodeFactory
231-
public abstract static class CodecsEncodeNode extends PythonBuiltinNode {
257+
public abstract static class CodecsEncodeNode extends EncodeBaseNode {
232258
@Child private SequenceStorageNodes.LenNode lenNode;
233259

234260
@Specialization(guards = "isString(str)")
@@ -278,25 +304,7 @@ Object encode(Object str, @SuppressWarnings("unused") Object encoding, @Suppress
278304

279305
@TruffleBoundary
280306
private PBytes encodeString(String self, String encoding, String errors) {
281-
CodingErrorAction errorAction;
282-
switch (errors) {
283-
// TODO: see [GR-10256] to implement the correct handling mechanics
284-
case "ignore":
285-
case "surrogatepass":
286-
errorAction = CodingErrorAction.IGNORE;
287-
break;
288-
case "replace":
289-
case "surrogateescape":
290-
case "namereplace":
291-
case "backslashreplace":
292-
case "xmlcharrefreplace":
293-
errorAction = CodingErrorAction.REPLACE;
294-
break;
295-
default:
296-
errorAction = CodingErrorAction.REPORT;
297-
break;
298-
}
299-
307+
CodingErrorAction errorAction = convertCodingErrorAction(errors);
300308
try {
301309
Charset charset = getCharset(encoding);
302310
ByteBuffer encoded = charset.newEncoder().onMalformedInput(errorAction).onUnmappableCharacter(errorAction).encode(CharBuffer.wrap(self));
@@ -320,6 +328,54 @@ private int getLength(PBytes b) {
320328
}
321329
}
322330

331+
@Builtin(name = "__truffle_raw_encode", fixedNumOfPositionalArgs = 1, keywordArguments = {"errors"})
332+
@GenerateNodeFactory
333+
public abstract static class RawEncodeNode extends EncodeBaseNode {
334+
335+
@Specialization
336+
PTuple encode(String self, @SuppressWarnings("unused") PNone none) {
337+
return encodeString(self, "strict");
338+
}
339+
340+
@Specialization
341+
PTuple encode(String self, String errors) {
342+
return encodeString(self, errors);
343+
}
344+
345+
@TruffleBoundary
346+
private PTuple encodeString(String self, String errors) {
347+
CodingErrorAction errorAction = convertCodingErrorAction(errors);
348+
349+
try {
350+
Charset charset = getCharset("utf-32");
351+
ByteBuffer encoded = charset.newEncoder().onMalformedInput(errorAction).onUnmappableCharacter(errorAction).encode(CharBuffer.wrap(self));
352+
int n = encoded.remaining();
353+
ByteBuffer buf = ByteBuffer.allocate(n);
354+
assert n % Integer.BYTES == 0;
355+
while (encoded.hasRemaining()) {
356+
byte[] b = new byte[4];
357+
encoded.get(b);
358+
359+
boolean write = false;
360+
for (int i = 0; i < b.length; i++) {
361+
if (write || b[i] != 0) {
362+
buf.put(b[i]);
363+
write = true;
364+
}
365+
}
366+
}
367+
buf.flip();
368+
n = buf.remaining();
369+
byte[] data = new byte[n];
370+
buf.get(data);
371+
return factory().createTuple(new Object[]{factory().createBytes(data), self.length()});
372+
} catch (CharacterCodingException e) {
373+
throw raise(UnicodeEncodeError, "%s", e.getMessage());
374+
}
375+
}
376+
377+
}
378+
323379
// _codecs.decode(obj, encoding='utf-8', errors='strict')
324380
@Builtin(name = "__truffle_decode", fixedNumOfPositionalArgs = 1, keywordArguments = {"encoding", "errors"})
325381
@GenerateNodeFactory

graalpython/lib-graalpython/_codecs.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ def __codec_registry_init__():
147147
# TODO implement the encode / decode methods
148148
@__builtin__
149149
def escape_encode(data, errors=None):
150-
raise NotImplementedError()
150+
raise NotImplementedError("escape_encode")
151151

152152

153153
@__builtin__
154154
def escape_decode(data, errors=None):
155-
raise NotImplementedError()
155+
raise NotImplementedError("escape_decode")
156156

157157

158158
@__builtin__
@@ -207,7 +207,7 @@ def utf_16_be_decode(string, errors=None, final=False):
207207

208208
@__builtin__
209209
def utf_16_ex_decode(data, errors=None, byteorder=0, final=False):
210-
raise NotImplementedError()
210+
raise NotImplementedError("utf_16_ex_decode")
211211

212212

213213
@__builtin__
@@ -242,37 +242,37 @@ def utf_32_be_decode(string, errors=None, final=False):
242242

243243
@__builtin__
244244
def utf_32_ex_decode(data, errors=None, byteorder=0, final=False):
245-
raise NotImplementedError()
245+
raise NotImplementedError("utf_32_ex_decode")
246246

247247

248248
@__builtin__
249249
def unicode_escape_encode(string, errors=None):
250-
raise NotImplementedError()
250+
raise NotImplementedError("unicode_escape_encode")
251251

252252

253253
@__builtin__
254254
def unicode_escape_decode(string, errors=None):
255-
raise NotImplementedError()
255+
raise NotImplementedError("unicode_escape_decode")
256256

257257

258258
@__builtin__
259259
def unicode_internal_encode(obj, errors=None):
260-
raise NotImplementedError()
260+
raise NotImplementedError("unicode_internal_encode")
261261

262262

263263
@__builtin__
264264
def unicode_internal_decode(obj, errors=None):
265-
raise NotImplementedError()
265+
raise NotImplementedError("unicode_internal_decode")
266266

267267

268268
@__builtin__
269269
def raw_unicode_escape_encode(string, errors=None):
270-
raise NotImplementedError()
270+
return __truffle_raw_encode(string, errors)
271271

272272

273273
@__builtin__
274274
def raw_unicode_escape_decode(string, errors=None):
275-
raise NotImplementedError()
275+
raise NotImplementedError("raw_unicode_escape_decode")
276276

277277

278278
@__builtin__
@@ -297,49 +297,49 @@ def ascii_decode(string, errors=None):
297297

298298
@__builtin__
299299
def charmap_encode(string, errors=None, mapping=None):
300-
raise NotImplementedError()
300+
raise NotImplementedError("charmap_encode")
301301

302302

303303
@__builtin__
304304
def charmap_decode(string, errors=None, mapping=None):
305-
raise NotImplementedError()
305+
raise NotImplementedError("charmap_decode")
306306

307307

308308
@__builtin__
309309
def charmap_build(mapping):
310-
raise NotImplementedError()
310+
raise NotImplementedError("charmap_build")
311311

312312

313313
@__builtin__
314314
def readbuffer_encode(data, errors=None):
315-
raise NotImplementedError()
315+
raise NotImplementedError("readbuffer_encode")
316316

317317

318318
@__builtin__
319319
def mbcs_encode(string, errors=None):
320-
raise NotImplementedError()
320+
raise NotImplementedError("mbcs_encode")
321321

322322

323323
@__builtin__
324324
def mbcs_decode(string, errors=None, final=False):
325-
raise NotImplementedError()
325+
raise NotImplementedError("mbcs_decode")
326326

327327

328328
@__builtin__
329329
def oem_encode(string, errors):
330-
raise NotImplementedError()
330+
raise NotImplementedError("oem_encode")
331331

332332

333333
@__builtin__
334334
def oem_decode(string, errors=None, final=False):
335-
raise NotImplementedError()
335+
raise NotImplementedError("oem_decode")
336336

337337

338338
@__builtin__
339339
def code_page_encode(code_page, string, errors=None):
340-
raise NotImplementedError()
340+
raise NotImplementedError("code_page_encode")
341341

342342

343343
@__builtin__
344344
def code_page_decode(code_page, string, errors=None, final=False):
345-
raise NotImplementedError()
345+
raise NotImplementedError("code_page_decode")

0 commit comments

Comments
 (0)