Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,11 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(

/* Dedent a string.
Behaviour is expected to be an exact match of `textwrap.dedent`.
Return a new reference on success, NULL with exception set on error.
Return a new reference on success, NULL with an exception set on error.

Export for test_capi.test_unicode
*/
extern PyObject* _PyUnicode_Dedent(PyObject *unicode);
PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode);

/* --- Misc functions ----------------------------------------------------- */

Expand Down
89 changes: 89 additions & 0 deletions Lib/test/test_capi/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,95 @@ def test_transform_decimal_and_space(self):
self.assertRaises(SystemError, transform_decimal, [])
# CRASHES transform_decimal(NULL)

@support.cpython_only
@unittest.skipIf(_testinternalcapi is None,'need _testinternalcapi module')
def test_dedent(self):
from _testinternalcapi import _PyUnicode_Dedent as dedent
self.assertEqual('hello\nworld', dedent(' hello\n world'))
self.assertEqual('hello\nmy\n friend', dedent(' hello\n my\n friend'))

# Only spaces.
text = " "
expect = ""
self.assertEqual(expect, dedent(text))

# Only tabs.
text = "\t\t\t\t"
expect = ""
self.assertEqual(expect, dedent(text))

# A mixture.
text = " \t \t\t \t "
expect = ""
self.assertEqual(expect, dedent(text))

# ASCII whitespace.
text = "\f\n\r\t\v "
expect = "\n"
self.assertEqual(expect, dedent(text))

# One newline.
text = "\n"
expect = "\n"
self.assertEqual(expect, dedent(text))

# Windows-style newlines.
text = "\r\n" * 5
expect = "\n" * 5
self.assertEqual(expect, dedent(text))

# Whitespace mixture.
text = " \n\t\n \n\t\t\n\n\n "
expect = "\n\n\n\n\n\n"
self.assertEqual(expect, dedent(text))

# Lines consisting only of whitespace are always normalised
text = "a\n \n\t\n"
expect = "a\n\n\n"
self.assertEqual(expect, dedent(text))

# Whitespace characters on non-empty lines are retained
text = "a\r\n\r\n\r\n"
expect = "a\r\n\n\n"
self.assertEqual(expect, dedent(text))

# Uneven indentation with declining indent level.
text = " Foo\n Bar\n" # 5 spaces, then 4
expect = " Foo\nBar\n"
self.assertEqual(expect, dedent(text))

# Declining indent level with blank line.
text = " Foo\n\n Bar\n" # 5 spaces, blank, then 4
expect = " Foo\n\nBar\n"
self.assertEqual(expect, dedent(text))

# Declining indent level with whitespace only line.
text = " Foo\n \n Bar\n" # 5 spaces, then 4, then 4
expect = " Foo\n\nBar\n"
self.assertEqual(expect, dedent(text))

text = " hello\tthere\n how are\tyou?"
expect = "hello\tthere\nhow are\tyou?"
self.assertEqual(expect, dedent(text))

# dedent() only removes whitespace that can be uniformly removed!
text = "\thello there\n\thow are you?"
expect = "hello there\nhow are you?"
self.assertEqual(expect, dedent(text))

text = '''\
def foo():
while 1:
return foo
'''
expect = '''\
def foo():
while 1:
return foo
'''
self.assertEqual(expect, dedent(text))


@support.cpython_only
@unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
def test_concat(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:option:`-c` now dedents like :func:`textwrap.dedent`
14 changes: 13 additions & 1 deletion Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "pycore_pyerrors.h" // _PyErr_ChainExceptions1()
#include "pycore_pylifecycle.h" // _PyInterpreterConfig_InitFromDict()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII()
#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII() / _PyUnicode_Dedent()

#include "clinic/_testinternalcapi.c.h"

Expand Down Expand Up @@ -1416,6 +1416,17 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg)
return _PyUnicode_TransformDecimalAndSpaceToASCII(arg);
}

/* Test _PyUnicode_Dedent() */
static PyObject *
unicode_dedent(PyObject *self, PyObject *arg)
{
if (arg == Py_None) {
arg = NULL;
}
return _PyUnicode_Dedent(arg);
}


static PyObject *
test_pyobject_is_freed(const char *test_name, PyObject *op)
{
Expand Down Expand Up @@ -2422,6 +2433,7 @@ static PyMethodDef module_functions[] = {
{"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS},
{"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL},
{"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O},
{"_PyUnicode_Dedent", unicode_dedent, METH_O},
{"check_pyobject_forbidden_bytes_is_freed",
check_pyobject_forbidden_bytes_is_freed, METH_NOARGS},
{"check_pyobject_freed_is_freed", check_pyobject_freed_is_freed, METH_NOARGS},
Expand Down
22 changes: 10 additions & 12 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -14309,9 +14309,9 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
}

/*
This function searchs the longest common leading whitespace
This function searches the longest common leading whitespace
of all lines in the [src, end).
It returns the length of the common leading whitespace and sets `output` to
It returns the length of the common leading whitespace and sets *output* to
point to the beginning of the common leading whitespace if length > 0.
*/
static Py_ssize_t
Expand All @@ -14331,7 +14331,7 @@ search_longest_common_leading_whitespace(

// scan the whole line
while (iter < end && *iter != '\n') {
if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
if (!leading_whitespace_end && !Py_ISSPACE(Py_CHARMASK(*iter))) {
/* `iter` points to the first non-whitespace character
in this line */
if (iter == line_start) {
Expand Down Expand Up @@ -14389,8 +14389,8 @@ search_longest_common_leading_whitespace(
}

/* Dedent a string.
Behaviour is expected to be an exact match of `textwrap.dedent`.
Return a new reference on success, NULL with exception set on error.
Behaviour is expected to be an exact match of textwrap.dedent.
Return a new reference on success, NULL with an exception set on error.
*/
PyObject *
_PyUnicode_Dedent(PyObject *unicode)
Expand All @@ -14413,10 +14413,6 @@ _PyUnicode_Dedent(PyObject *unicode)
Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
src, end, &whitespace_start);

if (whitespace_len == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep the fast path.

Copy link
Member Author

@StanFromIreland StanFromIreland Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't, we need to clear lines, see the tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to do it? in general there is nothing to dedent, so it'll slow down normal cases. Even if the comment says that it's meant to match textwrap.dedent(), I don't think it's needed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to respect the behavior of textwrap.dedent, as is noted in What's new 3.14, the docs, and the comments. Then yes.

Copy link
Member

@picnixz picnixz Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I don't think we need to! It's an internal function. textwrap.dedent is implemented in pure Python.

Copy link
Member Author

@StanFromIreland StanFromIreland Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But our documentation says we do:

Whats New 3.14

The auto-dedentation behavior mirrors textwrap.dedent().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you would rather just remove the false claims, we can do: https://github.com/python/cpython/compare/main...StanFromIreland:remove-misleading-notes?expand=1

Though I think this may cause confusion in the future.

Copy link
Member

@picnixz picnixz Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The auto-dedentation behavior mirrors textwrap.dedent().

Yes, but we can just say that we don't normalize whitespaces and only consider spaces and tabs. No one should ever write spaces with other space-like characters. Let's just amend the NEWS. As for str.dedent(), it will need a PEP which still doesn't exist and the discussion seems stalled IMO.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I am working on the PEP :-)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened #138620 with your alternative, though I still think fixing it is better.

return Py_NewRef(unicode);
}

// now we should trigger a dedent
char *dest = PyMem_Malloc(src_len);
if (!dest) {
Expand All @@ -14431,7 +14427,7 @@ _PyUnicode_Dedent(PyObject *unicode)

// iterate over a line to find the end of a line
while (iter < end && *iter != '\n') {
if (in_leading_space && *iter != ' ' && *iter != '\t') {
if (in_leading_space && !Py_ISSPACE(Py_CHARMASK(*iter))) {
in_leading_space = false;
}
++iter;
Expand All @@ -14441,8 +14437,10 @@ _PyUnicode_Dedent(PyObject *unicode)
bool append_newline = iter < end;

// if this line has all white space, write '\n' and continue
if (in_leading_space && append_newline) {
*dest_iter++ = '\n';
if (in_leading_space) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this the issue? or was it *iter != ' ' ...?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There were multiple issues.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please indicate the issues for posterity.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two issues:

  1. Not clearing lines that are only whitespace, whereas textwrap.dedent does
  2. Only considering '\t' and ' ', whereas textwrap.dedent uses str.isspace

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I don't think it's worth changing this function. We should just change the comment. It's an internal function.

if (append_newline) {
*dest_iter++ = '\n';
}
continue;
}

Expand Down
Loading