|
43 | 43 |
|
44 | 44 | LITERALS = (ast.Num, ast.Str)
|
45 | 45 |
|
| 46 | +# A variant of the 'replace' error handler that replaces unencodable characters with U+FFFD |
| 47 | +# rather than '?'. Without this, a string like '\uD800' (which is not encodable) would get mapped |
| 48 | +# to '?', and potentially clash with the regular string '?' if it appeared elsewhere in the source |
| 49 | +# code. Used in 'get_label_for_object' below. Based on code from https://peps.python.org/pep-0293/ |
| 50 | +def fffd_replace(exc): |
| 51 | + if isinstance(exc, UnicodeEncodeError): |
| 52 | + return ((exc.end-exc.start)*u"\\ufffd", exc.end) |
| 53 | + elif isinstance(exc, UnicodeDecodeError): |
| 54 | + return (u"\\ufffd", exc.end) |
| 55 | + elif isinstance(exc, UnicodeTranslateError): |
| 56 | + return ((exc.end-exc.start)*u"\\ufffd", exc.end) |
| 57 | + else: |
| 58 | + raise TypeError("can't handle %s" % exc.__name__) |
| 59 | + |
| 60 | +import codecs |
| 61 | +codecs.register_error("fffdreplace", fffd_replace) |
| 62 | + |
46 | 63 | class _CObject(object):
|
47 | 64 | '''Utility class to wrap arbitrary C objects.
|
48 | 65 | Treat all objects as unique. Rely on naming in the
|
@@ -239,7 +256,7 @@ def get_label_for_object(self, obj, default_label, obj_type):
|
239 | 256 | else:
|
240 | 257 | prefix = u"C_bytes$"
|
241 | 258 | if t is str:
|
242 |
| - obj = obj.encode("utf8", errors='replace') |
| 259 | + obj = obj.encode("utf8", errors='fffdreplace') |
243 | 260 | return prefix + hashlib.sha1(obj).hexdigest()
|
244 | 261 | if t is bytes:
|
245 | 262 | return prefix + hashlib.sha1(obj).hexdigest()
|
|
0 commit comments