diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 8dfcaedd5ef2e8..68ac71b747eedc 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -259,9 +259,11 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( /* Dedent a string. Behaviour is expected to be an exact match of `textwrap.dedent`. - Return a new reference on success, NULL with exception set on error. + Return a new reference on success, NULL with an exception set on error. + + Export for '_testinternalcapi' shared extension. */ -extern PyObject* _PyUnicode_Dedent(PyObject *unicode); +PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode); /* --- Misc functions ----------------------------------------------------- */ diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 6a9c60f3a6d75e..f18377927cdbc2 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1074,6 +1074,94 @@ def test_transform_decimal_and_space(self): self.assertRaises(SystemError, transform_decimal, []) # CRASHES transform_decimal(NULL) + @support.cpython_only + @unittest.skipIf(_testinternalcapi is None,'need _testinternalcapi module') + def test_dedent(self): + from _testinternalcapi import _PyUnicode_Dedent as dedent + self.assertEqual('hello\nworld', dedent(' hello\n world')) + self.assertEqual('hello\nmy\n friend', dedent(' hello\n my\n friend')) + + # Only spaces. + text = " " + expect = "" + self.assertEqual(expect, dedent(text)) + + # Only tabs. + text = "\t\t\t\t" + expect = "" + self.assertEqual(expect, dedent(text)) + + # A mixture. + text = " \t \t\t \t " + expect = "" + self.assertEqual(expect, dedent(text)) + + # ASCII whitespace. + text = "\f\n\r\t\v " + expect = "\n" + self.assertEqual(expect, dedent(text)) + + # One newline. + text = "\n" + expect = "\n" + self.assertEqual(expect, dedent(text)) + + # Windows-style newlines. + text = "\r\n" * 5 + expect = "\n" * 5 + self.assertEqual(expect, dedent(text)) + + # Whitespace mixture. + text = " \n\t\n \n\t\t\n\n\n " + expect = "\n\n\n\n\n\n" + self.assertEqual(expect, dedent(text)) + + # Lines consisting only of whitespace are always normalised + text = "a\n \n\t\n" + expect = "a\n\n\n" + self.assertEqual(expect, dedent(text)) + + # Whitespace characters on non-empty lines are retained + text = "a\r\n\r\n\r\n" + expect = "a\r\n\n\n" + self.assertEqual(expect, dedent(text)) + + # Uneven indentation with declining indent level. + text = " Foo\n Bar\n" # 5 spaces, then 4 + expect = " Foo\nBar\n" + self.assertEqual(expect, dedent(text)) + + # Declining indent level with blank line. + text = " Foo\n\n Bar\n" # 5 spaces, blank, then 4 + expect = " Foo\n\nBar\n" + self.assertEqual(expect, dedent(text)) + + # Declining indent level with whitespace only line. + text = " Foo\n \n Bar\n" # 5 spaces, then 4, then 4 + expect = " Foo\n\nBar\n" + self.assertEqual(expect, dedent(text)) + + text = " hello\tthere\n how are\tyou?" + expect = "hello\tthere\nhow are\tyou?" + self.assertEqual(expect, dedent(text)) + + # dedent() only removes whitespace that can be uniformly removed! + text = "\thello there\n\thow are you?" + expect = "hello there\nhow are you?" + self.assertEqual(expect, dedent(text)) + + text = '''\ + def foo(): + while 1: + return foo + ''' + expect = '''\ +def foo(): + while 1: + return foo +''' + self.assertEqual(expect, dedent(text)) + @support.cpython_only @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') def test_concat(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst new file mode 100644 index 00000000000000..d28137bcc9dd69 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst @@ -0,0 +1 @@ +:option:`-c` now dedents like :func:`textwrap.dedent` diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 243c7346576fc6..af67a4d2b488ba 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -34,7 +34,7 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pylifecycle.h" // _PyInterpreterConfig_InitFromDict() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII() +#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII(), _PyUnicode_Dedent() #include "clinic/_testinternalcapi.c.h" @@ -1416,6 +1416,18 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } + +/* Test _PyUnicode_Dedent() */ +static PyObject * +unicode_dedent(PyObject *self, PyObject *arg) +{ + if (arg == Py_None) { + arg = NULL; + } + return _PyUnicode_Dedent(arg); +} + + static PyObject * test_pyobject_is_freed(const char *test_name, PyObject *op) { @@ -2422,6 +2434,7 @@ static PyMethodDef module_functions[] = { {"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS}, {"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL}, {"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O}, + {"_PyUnicode_Dedent", unicode_dedent, METH_O}, {"check_pyobject_forbidden_bytes_is_freed", check_pyobject_forbidden_bytes_is_freed, METH_NOARGS}, {"check_pyobject_freed_is_freed", check_pyobject_freed_is_freed, METH_NOARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4c88e4c1fdca2e..b767b964d68822 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14309,7 +14309,7 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) } /* -This function searchs the longest common leading whitespace +This function searches the longest common leading whitespace of all lines in the [src, end). It returns the length of the common leading whitespace and sets `output` to point to the beginning of the common leading whitespace if length > 0. @@ -14331,7 +14331,7 @@ search_longest_common_leading_whitespace( // scan the whole line while (iter < end && *iter != '\n') { - if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') { + if (!leading_whitespace_end && !Py_ISSPACE(Py_CHARMASK(*iter))) { /* `iter` points to the first non-whitespace character in this line */ if (iter == line_start) { @@ -14390,7 +14390,7 @@ search_longest_common_leading_whitespace( /* Dedent a string. Behaviour is expected to be an exact match of `textwrap.dedent`. - Return a new reference on success, NULL with exception set on error. + Return a new reference on success, NULL with an exception set on error. */ PyObject * _PyUnicode_Dedent(PyObject *unicode) @@ -14413,10 +14413,6 @@ _PyUnicode_Dedent(PyObject *unicode) Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( src, end, &whitespace_start); - if (whitespace_len == 0) { - return Py_NewRef(unicode); - } - // now we should trigger a dedent char *dest = PyMem_Malloc(src_len); if (!dest) { @@ -14431,7 +14427,7 @@ _PyUnicode_Dedent(PyObject *unicode) // iterate over a line to find the end of a line while (iter < end && *iter != '\n') { - if (in_leading_space && *iter != ' ' && *iter != '\t') { + if (in_leading_space && !Py_ISSPACE(Py_CHARMASK(*iter))) { in_leading_space = false; } ++iter; @@ -14441,8 +14437,10 @@ _PyUnicode_Dedent(PyObject *unicode) bool append_newline = iter < end; // if this line has all white space, write '\n' and continue - if (in_leading_space && append_newline) { - *dest_iter++ = '\n'; + if (in_leading_space) { + if (append_newline) { + *dest_iter++ = '\n'; + } continue; }