Skip to content

Commit 76a7163

Browse files
authored
test tidy-import on cpython, and fix a few issues. (#476)
Test tidy-import on cpython, and fix a few issues. This shallow-clone cpython souce and run tidy import on all those files. For speed, this ads the NOTHING action that does not actually change the file but print a message if there is an issue on a file. This also adds more extended error handling, and fix some edge case like removing an import statement that would make a function empty.
1 parent d8c1338 commit 76a7163

File tree

8 files changed

+242
-27
lines changed

8 files changed

+242
-27
lines changed

.github/workflows/test.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,39 @@ jobs:
9292
- uses: codecov/codecov-action@v5
9393
with:
9494
token: ${{ secrets.CODECOV_TOKEN }}
95+
96+
cpython-smoke:
97+
name: tidy-imports smoke test on CPython
98+
runs-on: ubuntu-latest
99+
steps:
100+
- uses: actions/checkout@v6
101+
102+
- name: Install uv
103+
uses: astral-sh/setup-uv@v7
104+
with:
105+
enable-cache: true
106+
cache-dependency-glob: "pyproject.toml"
107+
108+
- name: Install pyflyby
109+
run: |
110+
uv python install 3.14
111+
uv venv
112+
uv pip install meson-python meson ninja "pybind11>=2.10.4" "setuptools-scm>=8"
113+
uv pip install --no-build-isolation -ve .
114+
115+
- name: Shallow-clone CPython
116+
run: git clone --depth=1 --quiet https://github.com/python/cpython.git /tmp/cpython
117+
118+
- name: Run tidy-imports on all CPython .py files
119+
run: |
120+
# Files known to cause issues with tidy-imports (e.g. encoding quirks,
121+
# intentionally broken syntax used in tests, etc.)
122+
rm /tmp/cpython/Lib/test/encoded_modules/module_iso_8859_1.py
123+
rm /tmp/cpython/Lib/test/encoded_modules/module_koi8_r.py
124+
rm /tmp/cpython/Lib/test/test_import/data/syntax_warnings.py
125+
rm /tmp/cpython/Lib/test/test_tarfile.py
126+
rm /tmp/cpython/Lib/test/tokenizedata/bad_coding2.py
127+
rm /tmp/cpython/Lib/test/tokenizedata/badsyntax_3131.py
128+
rm /tmp/cpython/Lib/test/tokenizedata/badsyntax_pep3120.py
129+
130+
uv run python bin/tidy-imports --no-add --no-remove-unused --action=NOTHING /tmp/cpython >/dev/null

lib/python/pyflyby/_autoimp.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,19 @@ def _visit__all__(self, node):
711711
return
712712
if (len(node.targets) == 1 and isinstance(node.targets[0], ast.Name)
713713
and node.targets[0].id == '__all__'):
714+
_DYNAMIC_ALL_TYPES = (
715+
ast.ListComp,
716+
ast.SetComp,
717+
ast.BinOp,
718+
ast.Call,
719+
ast.IfExp,
720+
)
721+
if isinstance(node.value, _DYNAMIC_ALL_TYPES):
722+
logger.info(
723+
"Can't handle dynamic __all__ (%s); skipping",
724+
type(node.value).__name__,
725+
)
726+
return
714727
if not isinstance(node.value, (ast.List, ast.Tuple)):
715728
logger.warning("Don't know how to handle __all__ as (%s)" % node.value)
716729
return

lib/python/pyflyby/_cmdline.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,13 @@ def parse_action(v):
104104
return action_exit1
105105
elif V == "CHANGEDEXIT1":
106106
return action_changedexit1
107+
elif V == "NOTHING":
108+
return action_nothing
107109
else:
108110
raise Exception(
109111
"Bad argument %r to --action; "
110-
"expected PRINT or REPLACE or QUERY or IFCHANGED or EXIT1 "
111-
"or CHANGEDEXIT1 or EXECUTE:..." % (v,)
112+
"expected PRINT,REPLACE,QUERY,IFCHANGED,EXIT1,"
113+
"CHANGEDEXIT1,EXECUTE or NOTHING:..." % (v,)
112114
)
113115

114116
def set_actions(actions):
@@ -127,7 +129,7 @@ def callback(option, opt_str, value, parser):
127129
group.add_option(
128130
"--actions", type='string', action='callback',
129131
callback=action_callback,
130-
metavar="PRINT|REPLACE|IFCHANGED|QUERY|DIFF|EXIT1|CHANGEDEXIT1:EXECUTE:mycommand",
132+
metavar="PRINT|REPLACE|IFCHANGED|QUERY|DIFF|EXIT1|CHANGEDEXIT1|NOTHING|EXECUTE:mycommand",
131133
help=hfmt(
132134
"""
133135
Comma-separated list of action(s) to take. If PRINT, print
@@ -138,7 +140,8 @@ def callback(option, opt_str, value, parser):
138140
If IFCHANGED, then continue actions only if file was
139141
changed. If EXIT1, then exit with exit code 1 after all
140142
files/actions are processed. If CHANGEDEXIT1, then exit
141-
with exit code 1 if any file was changed."""
143+
with exit code 1 if any file was changed. NOTHING can be used
144+
to do nothing and test tidy-import does not crash"""
142145
),
143146
)
144147
group.add_option(
@@ -465,6 +468,14 @@ def on_error_filename_arg(arg):
465468
raise SystemExit(exit_code)
466469

467470

471+
def action_nothing(m):
472+
try:
473+
# side effect
474+
m.output_content
475+
except Exception:
476+
logger.error("Error with file %r", m.filename)
477+
478+
468479
def action_print(m):
469480
output_content = m.output_content
470481
sys.stdout.write(output_content.joined)

lib/python/pyflyby/_file.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -682,8 +682,14 @@ def read_file(filename: Filename) -> FileText:
682682
if filename == Filename.STDIN:
683683
data = sys.stdin.read()
684684
else:
685-
with io.open(str(filename), 'r') as f:
686-
data = f.read()
685+
try:
686+
with io.open(str(filename), 'r') as f:
687+
data = f.read()
688+
except UnicodeDecodeError as e:
689+
raise UnicodeDecodeError(
690+
e.encoding, e.object, e.start, e.end,
691+
f"{e.reason} (while reading {filename})"
692+
) from None
687693
return FileText(data, filename=filename)
688694

689695

lib/python/pyflyby/_imports2s.py

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from pyflyby._importclns import ImportSet, NoSuchImportError
1313
from pyflyby._importdb import ImportDB
1414
from pyflyby._importstmt import (Import, ImportFormatParams,
15-
ImportStatement)
15+
ImportStatement,
16+
NonImportStatementError)
1617
from pyflyby._log import logger
1718
from pyflyby._parse import PythonBlock, PythonStatement
1819
from pyflyby._util import ImportPathCtx, Inf, NullCtx, memoize
@@ -28,7 +29,9 @@
2829
_IMPORT_TYPES = (ast.Import, ast.ImportFrom)
2930

3031

31-
def _group_consecutive_imports(body: list) -> list[list]:
32+
def _group_consecutive_imports(
33+
body: list[ast.stmt],
34+
) -> list[list[Union[ast.Import, ast.ImportFrom]]]:
3235
"""Group consecutive import statements from an AST body.
3336
3437
Parameters
@@ -194,6 +197,48 @@ class NoImportBlockError(Exception):
194197
class ImportAlreadyExistsError(Exception):
195198
pass
196199

200+
201+
def _maybe_insert_pass(
202+
lines: list[str], idx: int, indent: int, lineno: int
203+
) -> None:
204+
"""Insert a ``pass`` statement at *idx* when removing a line would leave a
205+
block-opener (a line ending with ``:``) without a body.
206+
207+
After a line has been deleted at position *idx*, this function walks
208+
backwards to find the nearest non-empty preceding line. If that line ends
209+
with ``:`` (a compound-statement header such as ``def``, ``class``,
210+
``if``, etc.) and the next non-empty line after *idx* is at an equal or
211+
lower indentation level, the block body is gone and a ``pass`` statement
212+
is inserted at *idx* using *indent* spaces.
213+
214+
:param lines: Source lines of the block, already modified (the import line
215+
has been deleted before this call).
216+
:param idx: Index into *lines* where the deleted line used to be.
217+
:param indent: Column offset (number of leading spaces) of the deleted line,
218+
used to indent the inserted ``pass``.
219+
:param lineno: Absolute line number in the file (used only for logging).
220+
"""
221+
prev_idx = idx - 1
222+
while prev_idx >= 0 and lines[prev_idx].strip() == "":
223+
prev_idx -= 1
224+
if prev_idx < 0:
225+
return
226+
prev_line = lines[prev_idx]
227+
if not prev_line.rstrip().endswith(":"):
228+
return
229+
prev_indent = len(prev_line) - len(prev_line.lstrip())
230+
next_idx = idx
231+
while next_idx < len(lines) and lines[next_idx].strip() == "":
232+
next_idx += 1
233+
body_gone = (
234+
next_idx >= len(lines)
235+
or (len(lines[next_idx]) - len(lines[next_idx].lstrip())) <= prev_indent
236+
)
237+
if body_gone:
238+
lines.insert(idx, " " * indent + "pass")
239+
logger.debug("Inserted 'pass' at line %d to preserve empty block", lineno)
240+
241+
197242
class SourceToSourceFileImportsTransformation(SourceToSourceTransformationBase):
198243
blocks: list[Union[SourceToSourceImportBlockTransformation, SourceToSourceTransformation]]
199244
import_blocks: list[Union[SourceToSourceImportBlockTransformation, _LocalImportBlockWrapper]]
@@ -231,7 +276,11 @@ def preprocess(self) -> None:
231276
logger.debug("preprocess: extracted %d total import blocks", len(self.import_blocks))
232277

233278
def _create_import_block_from_group(
234-
self, group: list, lines: list, start_line: int, end_line: int
279+
self,
280+
group: list[Union[ast.Import, ast.ImportFrom]],
281+
lines: list[str],
282+
start_line: int,
283+
end_line: int,
235284
) -> None:
236285
"""Create an import block from a group of import statements.
237286
@@ -240,34 +289,49 @@ def _create_import_block_from_group(
240289
241290
Parameters
242291
----------
243-
group : list
244-
List of consecutive import AST nodes
245-
lines : list
246-
Lines of the full source text
292+
group : list[ast.Import | ast.ImportFrom]
293+
Consecutive import AST nodes to extract.
294+
lines : list[str]
295+
All source lines of the file (1-indexed via ``lines[lineno - 1]``).
247296
start_line : int
248-
Starting line number of the group
297+
First line number of the group (1-indexed).
249298
end_line : int
250-
Ending line number of the group
299+
Last line number of the group, accounting for multiline imports.
251300
"""
252301
import_text_parts = []
253302
semicolon_suffixes = {}
254303

255304
for node in group:
256305
lineno = node.lineno
257-
line: str = lines[lineno - 1]
306+
end_lineno = getattr(node, "end_lineno", lineno)
258307

259308
if hasattr(node, "col_offset") and hasattr(node, "end_col_offset"):
260-
# WARNING offsets seem to be in number of bytes!
261-
import_stmt = line.encode()[
262-
node.col_offset : node.end_col_offset
263-
].decode()
264-
remaining = line.encode()[node.end_col_offset :].decode().lstrip()
265-
if remaining and remaining.startswith(";"):
309+
# WARNING: col_offset/end_col_offset are in bytes, not characters.
310+
if end_lineno > lineno:
311+
# Multiline import (e.g. "from foo import (\n a,\n b\n)").
312+
# end_col_offset refers to the last line, not the first, so we
313+
# must collect all source lines and trim each end independently.
314+
first = lines[lineno - 1].encode()
315+
last = lines[end_lineno - 1].encode()
316+
node_lines = [first[node.col_offset :].decode()]
317+
for mid in range(lineno + 1, end_lineno):
318+
node_lines.append(lines[mid - 1])
319+
node_lines.append(last[: node.end_col_offset].decode())
320+
import_stmt = "\n".join(node_lines)
321+
remaining = last[node.end_col_offset :].decode().lstrip()
322+
else:
323+
line: str = lines[lineno - 1]
324+
import_stmt = line.encode()[
325+
node.col_offset : node.end_col_offset
326+
].decode()
327+
remaining = line.encode()[node.end_col_offset :].decode().lstrip()
328+
329+
if remaining.startswith(";"):
266330
suffix = remaining[1:].lstrip()
267331
if suffix:
268332
semicolon_suffixes[lineno] = suffix
269333
else:
270-
import_stmt = line
334+
import_stmt = lines[lineno - 1]
271335

272336
import_text_parts.append(import_stmt)
273337

@@ -276,7 +340,7 @@ def _create_import_block_from_group(
276340
try:
277341
import_block = PythonBlock(import_text)
278342
trans = SourceToSourceImportBlockTransformation(import_block)
279-
except (SyntaxError, ValueError) as e:
343+
except (SyntaxError, ValueError, NonImportStatementError) as e:
280344
logger.debug(
281345
"Failed to create import block for lines %d-%d: %s",
282346
start_line,
@@ -329,7 +393,7 @@ def _extract_imports_from_statement(
329393

330394
for group in import_groups:
331395
start_line = group[0].lineno
332-
end_line = group[-1].lineno
396+
end_line = getattr(group[-1], "end_lineno", group[-1].lineno)
333397
self._create_import_block_from_group(group, lines, start_line, end_line)
334398

335399
# Recursively check nested function/class definitions
@@ -589,6 +653,7 @@ def remove_import(self, imp, lineno):
589653
return imp
590654

591655
def _remove_local_import_from_blocks(self, imp, lineno):
656+
592657
"""
593658
Remove a local import from the actual code blocks.
594659
This modifies the _output of blocks in self.blocks to remove the import line.
@@ -627,8 +692,9 @@ def _remove_local_import_from_blocks(self, imp, lineno):
627692
offset,
628693
line_to_remove.strip(),
629694
)
630-
# Remove this line
695+
removed_indent = len(line_to_remove) - len(line_to_remove.lstrip())
631696
del lines[relative_lineno]
697+
_maybe_insert_pass(lines, relative_lineno, removed_indent, lineno)
632698

633699
# Track that we removed a line from this block
634700
self._removed_lines_per_block[block_idx] = offset + 1

tests/test_autoimp.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1894,7 +1894,6 @@ def defaultdict():
18941894
assert unused == []
18951895

18961896

1897-
18981897
def test_load_symbol_1():
18991898
assert load_symbol("os.path.join", {"os": os}) is os.path.join
19001899

0 commit comments

Comments
 (0)