Skip to content
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
16be08f
Very rough proof-of-concept
Erotemic Apr 29, 2023
e88216b
Cleanups and comments
Erotemic Apr 29, 2023
bcb7c77
Fix bad decref, only trigger if command starts with a newline
Erotemic Apr 29, 2023
fb8985a
wchar dedent
Erotemic Apr 30, 2023
26f27a8
tweaks
Erotemic Apr 30, 2023
417eff8
Use new char* implementation
Erotemic Apr 30, 2023
924e0a6
Rename function
Erotemic Apr 30, 2023
9f95672
tweaks
Erotemic Apr 30, 2023
3f4a78b
More tweaks
Erotemic May 1, 2023
97f2079
Replace strncmp with direct char comparison
Erotemic May 1, 2023
04435eb
Remove debug code
Erotemic May 1, 2023
4c4eca9
Made new function static
Erotemic May 1, 2023
f9c969b
Handwritten char iter and _PyBytesWriter_
Erotemic May 1, 2023
674f1e0
reimplement it to imitate `textwrap.dedent`
sunmy2019 May 1, 2023
05d4169
fix missing initialization
sunmy2019 May 1, 2023
9d53c4e
fix ref leak
sunmy2019 May 1, 2023
689a13a
fix empty string
sunmy2019 May 1, 2023
f0ac7ea
nit: remove unnecessary variable
sunmy2019 May 1, 2023
71cad01
remove unnecessary include
sunmy2019 May 1, 2023
4549de8
Add test cases
Erotemic May 1, 2023
0c3b90b
Fix test on windows
Erotemic May 1, 2023
1f5b746
normalize windows line endings
Erotemic May 1, 2023
ca40589
Merge branch 'main' into dedent_pymain_command
Erotemic May 1, 2023
1f17e23
Update Modules/main.c
sunmy2019 May 3, 2023
2de2e1e
Merge branch 'python:main' into dedent_pymain_command
sunmy2019 Jul 23, 2023
c84616c
refactor code
sunmy2019 Jul 23, 2023
a19b675
Apply suggestions from code review
sunmy2019 Jul 23, 2023
7ce411f
Update Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issu…
sunmy2019 Jul 23, 2023
dea4301
resolve comments
sunmy2019 Jul 23, 2023
e06d40c
Update Modules/main.c
sunmy2019 Jul 23, 2023
a40d028
rename `out` to `dest`
sunmy2019 Jul 23, 2023
9569655
move to _PyUnicode_Dedent
sunmy2019 Jul 24, 2023
1735d0f
Apply suggestions from code review
sunmy2019 Jul 24, 2023
d3681b7
clean up things
sunmy2019 Jul 25, 2023
3b4a7bc
Merge branch 'main' into dedent_pymain_command
Erotemic Apr 2, 2024
f355760
Merge branch 'main' into dedent_pymain_command
Erotemic Apr 19, 2024
b1e89c9
Merge branch 'main' into dedent_pymain_command
AA-Turner Apr 9, 2025
d1b4cd1
Update Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issu…
Erotemic Apr 10, 2025
e556bbf
lint: space in folder name
Erotemic Apr 10, 2025
136c8b0
Explicit include of pycore_unicodeobject.h
Erotemic Apr 10, 2025
8e5cc7f
Merge branch 'main' into dedent_pymain_command
Erotemic Apr 10, 2025
cd14a00
Apply suggestions from code review
sunmy2019 Apr 17, 2025
07d2273
Resolve Comments
sunmy2019 Apr 17, 2025
ed6e17b
Refactor implementation
sunmy2019 Apr 18, 2025
d1edb1b
Merge branch 'main' into dedent_pymain_command
sunmy2019 Apr 18, 2025
4c78c57
Apply suggestions from code review
methane Apr 18, 2025
38d2a4e
add what's new entry
methane Apr 18, 2025
42b6330
Document dedentation of command in version 3.14
methane Apr 18, 2025
98c17e5
Apply suggestions from code review
picnixz Apr 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions Include/internal/pycore_bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,31 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
const void *bytes,
Py_ssize_t size);


/** Dedent a UTF-8 encoded string.
* behavior is expected to match `textwrap.dedent`
*
* return value:
* 0, no need to dedent, writer untouched
* 1, success
* -1, failure
*
* str is the beginning of the string to dedent.
* expecting (str != NULL)
*
* len is the length of the string to dedent.
* expecting (len >= 0)
*
* writer is a _PyBytesWriter object to write the dedented string.
* expecting (writer != NULL)
*
* p points to a char* indicating the current position in the _PyBytesWriter.
* It is updated to the new position after writing the dedented string on exit.
* expecting (p != NULL && *p != NULL)
*/
PyAPI_FUNC(int)
_PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
char **p);
#ifdef __cplusplus
}
#endif
Expand Down
69 changes: 69 additions & 0 deletions Lib/test/test_cmd_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,75 @@ def res2int(res):
)
self.assertEqual(res2int(res), (6000, 6000))

def test_cmd_dedent(self):
# test that -c auto-dedents its arguments
from textwrap import dedent
test_cases = [
{
'code': '''
print('space-auto-dedent')
''',
'expected': b'space-auto-dedent',
},
{
'code': dedent('''
^^^print('tab-auto-dedent')
''').replace('^', '\t'),
'expected': b'tab-auto-dedent',
},
{
'code': dedent('''
^^if 1:
^^^^print('mixed-auto-dedent-1')
^^print('mixed-auto-dedent-2')
''').replace('^', '\t \t'),
'expected': b'mixed-auto-dedent-1\nmixed-auto-dedent-2',
},
{
'code': '''
data = """$

this data has an empty newline above and a newline with spaces below $
$
"""$
if 1: $
print(repr(data))$
'''.replace('$', ''),
# Note: entirely blank lines are normalized to \n, even if they
# are part of a data string. This is consistent with
# textwrap.dedent behavior, but might not be intuitive.
'expected': b"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
},
]
for case in test_cases:
# Run the auto-dedent case
args1 = sys.executable, '-c', case['code']
proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
self.assertEqual(proc1.returncode, 0, proc1)
output1 = proc1.stdout.strip()

# Manually dedent beforehand, check the result is the same.
args2 = sys.executable, '-c', dedent(case['code'])
proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
self.assertEqual(proc2.returncode, 0, proc2)
output2 = proc2.stdout.strip()

self.assertEqual(output1, output2)
self.assertEqual(output1.replace(b'\r\n', b'\n'), case['expected'])

def test_cmd_dedent_failcase(self):
# Mixing tabs and spaces is not allowed
from textwrap import dedent
template = dedent(
'''
-+if 1:
+-++ print('will fail')
''')
code = template.replace('-', ' ').replace('+', '\t')
assert_python_failure('-c', code)
code = template.replace('-', '\t').replace('+', ' ')
assert_python_failure('-c', code)


@unittest.skipIf(interpreter_requires_environment(),
'Cannot run -I tests when PYTHON env vars are required.')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
String arguments passed to "-c" are now automatically dedented as if by
:func:`textwrap.dedent`. This allows "python -c" invocations to be indented
in shell scripts without causing indentation errors.

Add a private API :c:func:`_PyBytes_Dedent`.

(Patch by Jon Crall and Steven Sun)
39 changes: 39 additions & 0 deletions Modules/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "pycore_pathconfig.h" // _PyPathConfig_ComputeSysPath0()
#include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_bytesobject.h" // _PyBytesWriter, _PyBytes_Dedent()

/* Includes for exit_sigint() */
#include <stdio.h> // perror()
Expand Down Expand Up @@ -228,6 +229,37 @@ pymain_import_readline(const PyConfig *config)
}
}

/* Strip common leading whitespace, just as textwrap.dedent.
It returns a new reference. */
static PyObject *
dedent_utf8_bytes(PyObject *bytes)
{
assert(bytes == NULL || !PyBytes_CheckExact(bytes->ob_type));

Py_ssize_t nchars;
char *start;
if (PyBytes_AsStringAndSize(bytes, &start, &nchars) != 0) {
return NULL;
}

_PyBytesWriter writer;
_PyBytesWriter_Init(&writer);
char *p = _PyBytesWriter_Alloc(&writer, nchars);
if (p == NULL) {
return NULL;
}

int ret = _PyBytes_Dedent(start, nchars, &writer, &p);
if (ret < 0) {
return NULL;
}
if (ret == 0) {
Py_INCREF(bytes);
_PyBytesWriter_Dealloc(&writer);
return bytes;
}
return _PyBytesWriter_Finish(&writer, p);
}

static int
pymain_run_command(wchar_t *command)
Expand All @@ -250,6 +282,13 @@ pymain_run_command(wchar_t *command)
goto error;
}

PyObject *new_bytes = dedent_utf8_bytes(bytes);
if (new_bytes == NULL) {
Py_DECREF(bytes);
goto error;
}
Py_SETREF(bytes, new_bytes);

PyCompilerFlags cf = _PyCompilerFlags_INIT;
cf.cf_flags |= PyCF_IGNORE_COOKIE;
ret = PyRun_SimpleStringFlags(PyBytes_AsString(bytes), &cf);
Expand Down
147 changes: 147 additions & 0 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3535,6 +3535,8 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
}


/* Algorithms on bytes */

void
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
const char* src, Py_ssize_t len_src)
Expand All @@ -3558,3 +3560,148 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
}
}

/** Dedent a UTF-8 encoded string.
* behavior is expected to match `textwrap.dedent`
*
* return value:
* 0, no need to dedent, writer untouched
* 1, success
* -1, failure
*
* str is the beginning of the string to dedent.
* expecting (str != NULL)
*
* len is the length of the string to dedent.
* expecting (len >= 0)
*
* writer is a _PyBytesWriter object to write the dedented string.
* expecting (writer != NULL)
*
* p points to a char* indicating the current position in the _PyBytesWriter.
* It is updated to the new position after writing the dedented string on exit.
* expecting (p != NULL && *p != NULL)
*/
int
_PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
char **p)
{
assert(str);
assert(p != NULL && *p != NULL);
assert(writer);

if (len <= 0)
return 0;

const char *end = str + len;
assert(str < end); // prevent overflow when len is too large

const char *candidate_start = NULL;
Py_ssize_t candidate_len = 0;

for (const char *iter = str; iter < end; ++iter) {
const char *line_start = iter;
const char *leading_whitespace_end = NULL;

// scan the whole line
while (iter < end && *iter != '\n') {
if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
if (iter == line_start) {
// some line has no indent, fast exit!
return 0;
}
leading_whitespace_end = iter;
}
++iter;
}

// if this line has all white space, skip it
if (!leading_whitespace_end) {
continue;
}

if (!candidate_start) {
candidate_start = line_start;
candidate_len = leading_whitespace_end - line_start;
assert(candidate_len > 0);
} else {
/* We then compare with the current longest leading whitespace.

[line_start, leading_whitespace_end) is the leading whitespace of
this line,

[candidate_start, candidate_start + candidate_len)
is the leading whitespace of the current longest leading
whitespace. */
Py_ssize_t new_candidate_len = 0;

for (const char *candidate_iter = candidate_start,
*line_iter = line_start;
candidate_iter < candidate_start + candidate_len &&
line_iter < leading_whitespace_end;
++candidate_iter, ++line_iter) {
if (*candidate_iter != *line_iter) {
break;
}
++new_candidate_len;
}

candidate_len = new_candidate_len;
if (candidate_len == 0) {
return 0;
}
}
}

assert(candidate_len >= 0);
if (candidate_len == 0) {
return 0;
}

// trigger a dedent

// prepare the writer
char *p_ = _PyBytesWriter_Prepare(writer, *p, len);
if (p_ == NULL) {
*p = NULL;
return -1;
}

for (const char *iter = str; iter < end; ++iter) {
const char *line_start = iter;
bool in_leading_space = true;

// iterate over a line to find the end of a line
while (iter < end && *iter != '\n') {
if (in_leading_space && *iter != ' ' && *iter != '\t') {
in_leading_space = false;
}
++iter;
}

// invariant: *iter == '\n' or iter == end
bool append_newline = iter < end;

// if this line has all white space, write '\n'
if (in_leading_space && append_newline) {
*p_++ = '\n';
continue;
}

/* copy [new_line_start + candidate_len, iter) to buffer, then
conditionally append '\n' */

Py_ssize_t new_line_len = iter - line_start - candidate_len;
assert(new_line_len >= 0);

memcpy(p_, line_start + candidate_len, new_line_len);

p_ += new_line_len;

if (append_newline) {
*p_++ = '\n';
}
}

*p = p_;
return 1;
}