Skip to content

Commit d0aa6de

Browse files
fix
1 parent c086137 commit d0aa6de

File tree

6 files changed

+44
-45
lines changed

6 files changed

+44
-45
lines changed

mypyc/irbuild/specialize.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,17 @@
8484
join_formatted_strings,
8585
tokenizer_format_call,
8686
)
87+
from mypyc.primitives.bytes_ops import (
88+
bytes_decode_ascii_strict,
89+
bytes_decode_latin1_strict,
90+
bytes_decode_utf8_strict,
91+
)
8792
from mypyc.primitives.dict_ops import (
8893
dict_items_op,
8994
dict_keys_op,
9095
dict_setdefault_spec_init_op,
9196
dict_values_op,
9297
)
93-
from mypyc.primitives.bytes_ops import bytes_decode_utf8_strict, bytes_decode_latin1_strict, bytes_decode_ascii_strict
9498
from mypyc.primitives.list_ops import new_list_set_item_op
9599
from mypyc.primitives.str_ops import (
96100
str_encode_ascii_strict,
@@ -748,15 +752,12 @@ def bytes_decode_fast_path(builder: IRBuilder, expr: CallExpr, callee: RefExpr)
748752
return None
749753

750754
encoding = "utf8"
751-
errors = "strict"
752755

753756
# Handle up to 2 arguments: decode([encoding], [errors])
754757
if len(expr.arg_kinds) > 0 and isinstance(expr.args[0], StrExpr):
755758
if expr.arg_kinds[0] == ARG_NAMED:
756759
if expr.arg_names[0] == "encoding":
757760
encoding = expr.args[0].value
758-
elif expr.arg_names[0] == "errors":
759-
errors = expr.args[0].value
760761
elif expr.arg_kinds[0] == ARG_POS:
761762
encoding = expr.args[0].value
762763
else:
@@ -766,16 +767,9 @@ def bytes_decode_fast_path(builder: IRBuilder, expr: CallExpr, callee: RefExpr)
766767
if expr.arg_kinds[1] == ARG_NAMED:
767768
if expr.arg_names[1] == "encoding":
768769
encoding = expr.args[1].value
769-
elif expr.arg_names[1] == "errors":
770-
errors = expr.args[1].value
771-
elif expr.arg_kinds[1] == ARG_POS:
772-
errors = expr.args[1].value
773770
else:
774771
return None
775772

776-
if errors != "strict":
777-
return None
778-
779773
normalized = encoding.lower().replace("-", "").replace("_", "")
780774

781775
if normalized in ("utf8", "utf", "u8", "cp65001"):

mypyc/lib-rt/CPy.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -764,9 +764,9 @@ CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index);
764764
PyObject *CPyBytes_Concat(PyObject *a, PyObject *b);
765765
PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter);
766766
CPyTagged CPyBytes_Ord(PyObject *obj);
767-
PyObject *CPy_DecodeUtf8(PyObject *bytes_obj, const char *errors);
768-
PyObject *CPy_DecodeLatin1(PyObject *bytes_obj, const char *errors);
769-
PyObject *CPy_DecodeAscii(PyObject *bytes_obj, const char *errors);
767+
PyObject *CPy_DecodeUtf8(PyObject *bytes_obj);
768+
PyObject *CPy_DecodeLatin1(PyObject *bytes_obj);
769+
PyObject *CPy_DecodeAscii(PyObject *bytes_obj);
770770

771771

772772
int CPyBytes_Compare(PyObject *left, PyObject *right);

mypyc/lib-rt/bytes_ops.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ CPyTagged CPyBytes_Ord(PyObject *obj) {
164164
}
165165

166166

167-
PyObject *CPy_DecodeUtf8(PyObject *bytes_obj, const char *errors) {
167+
PyObject *CPy_DecodeUtf8(PyObject *bytes_obj) {
168168
if (!PyBytes_Check(bytes_obj)) {
169169
PyErr_SetString(PyExc_TypeError, "expected bytes object");
170170
return NULL;
@@ -173,11 +173,11 @@ PyObject *CPy_DecodeUtf8(PyObject *bytes_obj, const char *errors) {
173173
char *data = PyBytes_AS_STRING(bytes_obj);
174174
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
175175

176-
return PyUnicode_DecodeUTF8(data, size, errors);
176+
return PyUnicode_DecodeUTF8(data, size, NULL);
177177
}
178178

179179

180-
PyObject *CPy_DecodeLatin1(PyObject *bytes_obj, const char *errors) {
180+
PyObject *CPy_DecodeLatin1(PyObject *bytes_obj) {
181181
if (!PyBytes_Check(bytes_obj)) {
182182
PyErr_SetString(PyExc_TypeError, "expected bytes object");
183183
return NULL;
@@ -186,11 +186,11 @@ PyObject *CPy_DecodeLatin1(PyObject *bytes_obj, const char *errors) {
186186
char *data = PyBytes_AS_STRING(bytes_obj);
187187
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
188188

189-
return PyUnicode_DecodeLatin1(data, size, errors);
189+
return PyUnicode_DecodeLatin1(data, size, NULL);
190190
}
191191

192192

193-
PyObject *CPy_DecodeAscii(PyObject *bytes_obj, const char *errors) {
193+
PyObject *CPy_DecodeAscii(PyObject *bytes_obj) {
194194
if (!PyBytes_Check(bytes_obj)) {
195195
PyErr_SetString(PyExc_TypeError, "expected bytes object");
196196
return NULL;
@@ -199,5 +199,5 @@ PyObject *CPy_DecodeAscii(PyObject *bytes_obj, const char *errors) {
199199
char *data = PyBytes_AS_STRING(bytes_obj);
200200
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
201201

202-
return PyUnicode_DecodeASCII(data, size, errors);
202+
return PyUnicode_DecodeASCII(data, size, NULL);
203203
}

mypyc/primitives/bytes_ops.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,23 +111,23 @@
111111

112112
bytes_decode_utf8_strict = custom_primitive_op(
113113
name="decode",
114-
arg_types=[bytes_rprimitive, str_rprimitive],
114+
arg_types=[bytes_rprimitive],
115115
return_type=str_rprimitive,
116116
c_function_name="CPy_DecodeUtf8",
117117
error_kind=ERR_MAGIC,
118118
)
119119

120120
bytes_decode_latin1_strict = custom_primitive_op(
121121
name="decode_latin1",
122-
arg_types=[bytes_rprimitive, str_rprimitive],
122+
arg_types=[bytes_rprimitive],
123123
return_type=str_rprimitive,
124124
c_function_name="CPy_DecodeLatin1",
125125
error_kind=ERR_MAGIC,
126126
)
127127

128128
bytes_decode_ascii_strict = custom_primitive_op(
129129
name="decode_ascii",
130-
arg_types=[bytes_rprimitive, str_rprimitive],
130+
arg_types=[bytes_rprimitive],
131131
return_type=str_rprimitive,
132132
c_function_name="CPy_DecodeAscii",
133133
error_kind=ERR_MAGIC,

mypyc/test-data/irbuild-bytes.test

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ L0:
186186
b4 = r10
187187
return 1
188188

189-
[case testDecodeBytes]
189+
[case testDecode]
190190
def f(b: bytes) -> None:
191191
b.decode()
192192
b.decode('utf8')
@@ -195,29 +195,35 @@ def f(b: bytes) -> None:
195195
b.decode('latin1', 'strict')
196196
b.decode('ascii')
197197
b.decode('latin-1')
198-
b.decode('utf-8', 'ignore')
198+
b.decode('utf-8', 'ignore')
199199
b.decode('ascii', 'replace')
200200
b.decode('latin1', 'ignore')
201201
[out]
202202
def f(b):
203203
b :: bytes
204-
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15 :: str
204+
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17, r18, r19, r20, r21 :: str
205205
L0:
206206
r0 = CPy_DecodeUtf8(b)
207207
r1 = CPy_DecodeUtf8(b)
208-
r2 = CPy_DecodeUtf8(b)
209-
r3 = CPy_DecodeUtf8(b)
210-
r4 = CPy_DecodeLatin1(b)
211-
r5 = CPy_DecodeAscii(b)
212-
r6 = CPy_DecodeLatin1(b)
213-
r7 = 'utf-8'
214-
r8 = 'ignore'
215-
r9 = CPy_Decode(b, r7, r8)
216-
r10 = 'ascii'
217-
r11 = 'replace'
218-
r12 = CPy_Decode(b, r10, r11)
219-
r13 = 'latin1'
208+
r2 = 'utf-8'
209+
r3 = 'strict'
210+
r4 = CPy_Decode(b, r2, r3)
211+
r5 = 'utf-8'
212+
r6 = 'strict'
213+
r7 = CPy_Decode(b, r5, r6)
214+
r8 = 'latin1'
215+
r9 = 'strict'
216+
r10 = CPy_Decode(b, r8, r9)
217+
r11 = CPy_DecodeAscii(b)
218+
r12 = CPy_DecodeLatin1(b)
219+
r13 = 'utf-8'
220220
r14 = 'ignore'
221221
r15 = CPy_Decode(b, r13, r14)
222+
r16 = 'ascii'
223+
r17 = 'replace'
224+
r18 = CPy_Decode(b, r16, r17)
225+
r19 = 'latin1'
226+
r20 = 'ignore'
227+
r21 = CPy_Decode(b, r19, r20)
222228
return 1
223229

mypyc/test-data/irbuild-str.test

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -335,14 +335,13 @@ def f(b: bytes) -> None:
335335
[out]
336336
def f(b):
337337
b :: bytes
338-
r0, r1, r2, r3, r4, r5 :: str
338+
r0, r1, r2, r3, r4 :: str
339339
L0:
340-
r0 = CPy_Decode(b, 0, 0)
341-
r1 = 'utf-8'
342-
r2 = CPy_Decode(b, r1, 0)
343-
r3 = 'utf-8'
344-
r4 = 'backslashreplace'
345-
r5 = CPy_Decode(b, r3, r4)
340+
r0 = CPy_DecodeUtf8(b)
341+
r1 = CPy_DecodeUtf8(b)
342+
r2 = 'utf-8'
343+
r3 = 'backslashreplace'
344+
r4 = CPy_Decode(b, r2, r3)
346345
return 1
347346

348347
[case testEncode_64bit]

0 commit comments

Comments
 (0)