Skip to content

Commit ed6e17b

Browse files
committed
Refactor implementation
1 parent 07d2273 commit ed6e17b

File tree

3 files changed

+85
-58
lines changed

3 files changed

+85
-58
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
251251
Behaviour is expected to be an exact match of `textwrap.dedent`.
252252
Return a new reference on success, NULL with exception set on error.
253253
*/
254-
PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode);
254+
extern PyObject* _PyUnicode_Dedent(PyObject *unicode);
255255

256256
/* --- Misc functions ----------------------------------------------------- */
257257

Lib/test/test_cmd_line.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
spawn_python, kill_python, assert_python_ok, assert_python_failure,
1818
interpreter_requires_environment
1919
)
20+
from textwrap import dedent
21+
2022

2123
if not support.has_subprocess_support:
2224
raise unittest.SkipTest("test module requires subprocess")
@@ -1053,7 +1055,6 @@ def test_int_max_str_digits(self):
10531055

10541056
def test_cmd_dedent(self):
10551057
# test that -c auto-dedents its arguments
1056-
from textwrap import dedent
10571058
test_cases = [
10581059
(
10591060
"""
@@ -1096,6 +1097,14 @@ def test_cmd_dedent(self):
10961097
# textwrap.dedent behavior, but might not be intuitive.
10971098
"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
10981099
),
1100+
(
1101+
'',
1102+
'',
1103+
),
1104+
(
1105+
' \t\n\t\n \t\t\t \t\t \t\n\t\t \n\n\n\t\t\t ',
1106+
'',
1107+
),
10991108
]
11001109
for code, expected in test_cases:
11011110
# Run the auto-dedent case

Objects/unicodeobject.c

Lines changed: 74 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14270,29 +14270,22 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1427014270
return Py_BuildValue("(N)", copy);
1427114271
}
1427214272

14273-
/* Dedent a string.
14274-
Behaviour is expected to be an exact match of `textwrap.dedent`.
14275-
Return a new reference on success, NULL with exception set on error.
14276-
*/
14277-
PyAPI_FUNC(PyObject *)
14278-
_PyUnicode_Dedent(PyObject *unicode)
14279-
{
14280-
Py_ssize_t src_len = 0;
14281-
const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
14282-
if (!src) {
14283-
return NULL;
14284-
}
14285-
if (src_len <= 0) {
14286-
Py_INCREF(unicode);
14287-
return unicode;
14288-
}
14289-
14290-
const char *end = src + src_len;
14291-
14292-
// [candidate_start, candidate_start + candidate_len)
14273+
/*
14274+
This function searchs the longest common leading whitespace
14275+
of all lines in the [src, end).
14276+
It returns the length of the common leading whitespace and sets `output` to
14277+
point to the beginning of the common leading whitespace if length > 0.
14278+
*/
14279+
static Py_ssize_t
14280+
search_longest_common_leading_whitespace(
14281+
const char * const src,
14282+
const char * const end,
14283+
const char * * output
14284+
) {
14285+
// [_start, _start + _len)
1429314286
// describes the current longest common leading whitespace
14294-
const char *candidate_start = NULL;
14295-
Py_ssize_t candidate_len = 0;
14287+
const char *_start = NULL;
14288+
Py_ssize_t _len = 0;
1429614289

1429714290
for (const char *iter = src; iter < end; ++iter) {
1429814291
const char *line_start = iter;
@@ -14305,8 +14298,7 @@ _PyUnicode_Dedent(PyObject *unicode)
1430514298
in this line */
1430614299
if (iter == line_start) {
1430714300
// some line has no indent, fast exit!
14308-
Py_INCREF(unicode);
14309-
return unicode;
14301+
return 0;
1431014302
}
1431114303
leading_whitespace_end = iter;
1431214304
}
@@ -14318,47 +14310,73 @@ _PyUnicode_Dedent(PyObject *unicode)
1431814310
continue;
1431914311
}
1432014312

14321-
if (!candidate_start) {
14313+
if (!_start) {
1432214314
// update the first leading whitespace
14323-
candidate_start = line_start;
14324-
candidate_len = leading_whitespace_end - line_start;
14325-
assert(candidate_len > 0);
14326-
} else {
14315+
_start = line_start;
14316+
_len = leading_whitespace_end - line_start;
14317+
assert(_len > 0);
14318+
}
14319+
else {
1432714320
/* We then compare with the current longest leading whitespace.
1432814321
14329-
[line_start, leading_whitespace_end) is the leading whitespace of
14330-
this line,
14322+
[line_start, leading_whitespace_end) is the leading
14323+
whitespace of this line,
1433114324
14332-
[candidate_start, candidate_start + candidate_len)
14333-
is the leading whitespace of the current longest leading
14334-
whitespace. */
14335-
Py_ssize_t new_candidate_len = 0;
14325+
[_start, _start + _len) is the leading whitespace of the
14326+
current longest leading whitespace. */
14327+
Py_ssize_t new_len = 0;
14328+
const char *_iter = _start, *line_iter = line_start;
1433614329

14337-
for (const char *candidate_iter = candidate_start,
14338-
*line_iter = line_start;
14339-
candidate_iter < candidate_start + candidate_len &&
14340-
line_iter < leading_whitespace_end;
14341-
++candidate_iter, ++line_iter) {
14342-
if (*candidate_iter != *line_iter) {
14343-
break;
14344-
}
14345-
++new_candidate_len;
14330+
while (_iter < _start + _len && line_iter < leading_whitespace_end
14331+
&& *_iter == *line_iter)
14332+
{
14333+
++_iter;
14334+
++line_iter;
14335+
++new_len;
1434614336
}
1434714337

14348-
candidate_len = new_candidate_len;
14349-
if (candidate_len == 0) {
14338+
_len = new_len;
14339+
if (_len == 0) {
1435014340
// No common things now, fast exit!
14351-
Py_INCREF(unicode);
14352-
return unicode;
14341+
return 0;
1435314342
}
1435414343
}
1435514344
}
1435614345

14357-
assert(candidate_len >= 0);
14358-
/* Final check for strings that contain nothing but whitespace. */
14359-
if (candidate_len == 0) {
14360-
Py_INCREF(unicode);
14361-
return unicode;
14346+
assert(_len >= 0);
14347+
if (_len > 0) {
14348+
*output = _start;
14349+
}
14350+
return _len;
14351+
}
14352+
14353+
/* Dedent a string.
14354+
Behaviour is expected to be an exact match of `textwrap.dedent`.
14355+
Return a new reference on success, NULL with exception set on error.
14356+
*/
14357+
PyObject *
14358+
_PyUnicode_Dedent(PyObject *unicode)
14359+
{
14360+
Py_ssize_t src_len = 0;
14361+
const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
14362+
if (!src) {
14363+
return NULL;
14364+
}
14365+
assert(src_len >= 0);
14366+
if (src_len == 0) {
14367+
return Py_NewRef(unicode);
14368+
}
14369+
14370+
const char *const end = src + src_len;
14371+
14372+
// [whitespace_start, whitespace_start + whitespace_len)
14373+
// describes the current longest common leading whitespace
14374+
const char *whitespace_start = NULL;
14375+
Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
14376+
src, end, &whitespace_start);
14377+
14378+
if (whitespace_len == 0) {
14379+
return Py_NewRef(unicode);
1436214380
}
1436314381

1436414382
// now we should trigger a dedent
@@ -14390,12 +14408,12 @@ _PyUnicode_Dedent(PyObject *unicode)
1439014408
continue;
1439114409
}
1439214410

14393-
/* copy [new_line_start + candidate_len, iter) to buffer, then
14411+
/* copy [new_line_start + whitespace_len, iter) to buffer, then
1439414412
conditionally append '\n' */
1439514413

14396-
Py_ssize_t new_line_len = iter - line_start - candidate_len;
14414+
Py_ssize_t new_line_len = iter - line_start - whitespace_len;
1439714415
assert(new_line_len >= 0);
14398-
memcpy(dest_iter, line_start + candidate_len, new_line_len);
14416+
memcpy(dest_iter, line_start + whitespace_len, new_line_len);
1439914417

1440014418
dest_iter += new_line_len;
1440114419

0 commit comments

Comments
 (0)