Skip to content

Commit f2ccd8a

Browse files
committed
Addressing review comments.
* Fixing code comments. * Adding tests with more unicode chars. * Adding commit ID for code copied from cpython.git.
1 parent 4bb11f5 commit f2ccd8a

File tree

3 files changed

+20
-6
lines changed

3 files changed

+20
-6
lines changed

mypyc/lib-rt/str_ops.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <Python.h>
66
#include "CPy.h"
77

8-
// Copied from cpython.git:Objects/unicodeobject.c.
8+
// Copied from cpython.git:Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
99
#define BLOOM_MASK unsigned long
1010
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
1111
#if LONG_BIT >= 128
@@ -18,7 +18,8 @@
1818
#error "LONG_BIT is smaller than 32"
1919
#endif
2020

21-
// Copied from cpython.git:Objects/unicodeobject.c. This is needed for str.strip("...").
21+
// Copied from cpython.git:Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
22+
// This is needed for str.strip("...").
2223
static inline BLOOM_MASK
2324
make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
2425
{
@@ -226,14 +227,18 @@ PyObject *CPyStr_RSplit(PyObject *str, PyObject *sep, CPyTagged max_split) {
226227
return PyUnicode_RSplit(str, sep, temp_max_split);
227228
}
228229

229-
// This function has been copied from _PyUnicode_XStrip in cpython.git:Objects/unicodeobject.c.
230+
// This function has been copied from _PyUnicode_XStrip in cpython.git:Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
230231
static PyObject *_PyStr_XStrip(PyObject *self, int striptype, PyObject *sepobj) {
231232
const void *data;
232233
int kind;
233234
Py_ssize_t i, j, len;
234235
BLOOM_MASK sepmask;
235236
Py_ssize_t seplen;
236237

238+
// This check is needed from Python 3.9 and earlier.
239+
if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
240+
return NULL;
241+
237242
kind = PyUnicode_KIND(self);
238243
data = PyUnicode_DATA(self);
239244
len = PyUnicode_GET_LENGTH(self);
@@ -272,11 +277,15 @@ static PyObject *_PyStr_XStrip(PyObject *self, int striptype, PyObject *sepobj)
272277
return PyUnicode_Substring(self, i, j);
273278
}
274279

275-
// Copied from do_strip function in cpython.git/Objects/unicodeobject.c.
280+
// Copied from do_strip function in cpython.git/Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
276281
PyObject *_CPyStr_Strip(PyObject *self, int strip_type, PyObject *sep) {
277282
if (sep == NULL || sep == Py_None) {
278283
Py_ssize_t len, i, j;
279284

285+
// This check is needed from Python 3.9 and earlier.
286+
if (PyUnicode_READY(self) == -1)
287+
return NULL;
288+
280289
len = PyUnicode_GET_LENGTH(self);
281290

282291
if (PyUnicode_IS_ASCII(self)) {

mypyc/primitives/str_ops.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@
136136
)
137137

138138
# str.strip, str.lstrip, str.rstrip
139-
# Order of iteration matters. It should correspond with LEFTSTRIP, RIGHTSTRIP and BOTHSTRIP macros defined in CPy.h.
140139
for strip_prefix in ["l", "r", ""]:
141140
method_op(
142141
name=f"{strip_prefix}strip",

mypyc/test-data/run-strings.test

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,6 @@ def test_surrogate() -> None:
776776
assert repr("foobar\x00\xab\ud912\U00012345") == r"'foobar\x00«\ud912𒍅'"
777777

778778
[case testStrip]
779-
# This is a negative test. strip variants without args does not use efficient primitives.
780779
def test_all_strips_default() -> None:
781780
s = " a1\t"
782781
assert s.lstrip() == "a1\t"
@@ -787,3 +786,10 @@ def test_all_strips() -> None:
787786
assert s.lstrip("xy") == "b2yy"
788787
assert s.strip("xy") == "b2"
789788
assert s.rstrip("xy") == "xxb2"
789+
def test_unicode_whitespace() -> None:
790+
assert "\u200A\u000D\u2009\u2020\u000Dtt\u0085\u000A".strip() == "\u2020\u000Dtt"
791+
def test_unicode_range() -> None:
792+
assert "\u2029 \U00107581 ".lstrip() == "\U00107581 "
793+
assert "\u2029 \U0010AAAA\U00104444B\u205F ".strip() == "\U0010AAAA\U00104444B"
794+
assert " \u3000\u205F ".strip() == ""
795+
assert "\u2029 \U00102865\u205F ".rstrip() == "\u2029 \U00102865"

0 commit comments

Comments
 (0)