diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bd2a1d6d63e..ed0afc50879 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: check-yaml - repo: https://github.com/rhysd/actionlint - rev: v1.7.9 + rev: v1.7.10 hooks: - id: actionlint args: [-ignore, SC] diff --git a/marimo/_convert/ipynb.py b/marimo/_convert/ipynb.py index db59ed0656d..d7bfd178e6f 100644 --- a/marimo/_convert/ipynb.py +++ b/marimo/_convert/ipynb.py @@ -7,6 +7,7 @@ import sys from collections import defaultdict from dataclasses import dataclass, field +from enum import Enum from typing import Any, Callable, Union from marimo._ast.cell import CellConfig @@ -677,6 +678,21 @@ def visit_AugAssign(self, node: ast.AugAssign) -> ast.Assign: return new_sources +class RenameType(Enum): + """Type of variable rename during notebook conversion.""" + + MODIFIED = "modified" # Variable references its previous value + REDEFINED = "redefined" # Variable is completely redefined + + +@dataclass +class RenameInfo: + """Information about a renamed variable.""" + + new_name: str + rename_type: RenameType + + def transform_duplicate_definitions(sources: list[str]) -> list[str]: """ Rename variables with duplicate definitions across multiple cells, @@ -714,11 +730,11 @@ def transform_duplicate_definitions(sources: list[str]) -> list[str]: print(a) # Cell 3 - a_1 = a + a_1 = a # variable `a` was modified in the original Jupyter notebook a_1 = a_1 + 2 # Cell 4 - a_2 = 3 + a_2 = 3 # variable `a` was redefined in the original Jupyter notebook print(a_2) ``` """ @@ -743,6 +759,22 @@ def get_definitions(sources: list[str]) -> dict[str, list[int]]: continue return definitions + # Find all references in the AST + def find_references(source: str) -> set[str]: + try: + tree = ast.parse(source) + visitor = ScopedVisitor("", ignore_local=True) + visitor.visit(tree) + return set(visitor.refs) + except SyntaxError: + return set() + + # Check if a cell modifies a variable (references it before redefining) + def is_modification(source: str, var_name: str) -> bool: + """Check if the cell references the variable before its first definition.""" + refs = find_references(source) + return var_name in refs + # Collect all definitions that are duplicates def get_duplicates( definitions: dict[str, list[int]], @@ -756,9 +788,10 @@ def get_duplicates( # Create mappings for renaming duplicates def create_name_mappings( duplicates: dict[str, list[int]], definitions: set[str] - ) -> dict[int, dict[str, str]]: + ) -> tuple[dict[int, dict[str, str]], dict[int, dict[str, RenameInfo]]]: new_definitions: set[str] = set() name_mappings: dict[int, dict[str, str]] = defaultdict(dict) + rename_info: dict[int, dict[str, RenameInfo]] = defaultdict(dict) for name, cells in duplicates.items(): for i, cell in enumerate(cells[1:], start=1): counter = i @@ -771,7 +804,17 @@ def create_name_mappings( counter += 1 name_mappings[cell][name] = new_name new_definitions.add(new_name) - return name_mappings + # Check if this is a modification (references previous value) + # or a redefinition (completely new value) + rename_type = ( + RenameType.MODIFIED + if is_modification(sources[cell], name) + else RenameType.REDEFINED + ) + rename_info[cell][name] = RenameInfo( + new_name=new_name, rename_type=rename_type + ) + return name_mappings, rename_info definitions = get_definitions(sources) duplicates = get_duplicates(definitions) @@ -782,7 +825,9 @@ def create_name_mappings( sources = _transform_aug_assign(sources) new_sources: list[str] = sources.copy() - name_mappings = create_name_mappings(duplicates, set(definitions.keys())) + name_mappings, rename_info = create_name_mappings( + duplicates, set(definitions.keys()) + ) for cell_idx, source in enumerate(sources): renamer = Renamer(name_mappings) @@ -833,12 +878,76 @@ def on_ref( # new_source_lines.append(f"{definition} = {dep}") # Add the modified source - new_source_lines.append(ast.unparse(new_tree)) + unparsed_source = ast.unparse(new_tree) + + # Add comments to indicate renamed variables + if cell_idx in rename_info: + unparsed_source = _add_rename_comments( + unparsed_source, rename_info[cell_idx] + ) + + new_source_lines.append(unparsed_source) new_sources[cell_idx] = "\n".join(new_source_lines) return new_sources +def _add_rename_comments( + source: str, cell_rename_info: dict[str, RenameInfo] +) -> str: + """ + Add comments to lines where renamed variables are first defined. + + Args: + source: The unparsed source code + cell_rename_info: Mapping from original variable name to RenameInfo + + Returns: + Source code with comments added + """ + lines = source.split("\n") + new_lines: list[str] = [] + commented_vars: set[str] = ( + set() + ) # Track which vars we've already commented + + # Build a reverse mapping: new_name -> (original_name, rename_type) + new_to_original: dict[str, tuple[str, RenameType]] = {} + for original_name, info in cell_rename_info.items(): + new_to_original[info.new_name] = (original_name, info.rename_type) + + for line in lines: + # Check if this line contains an assignment to a renamed variable + # Look for patterns like "var_name = " at the start of the line + comment_to_add = None + for new_name, (original_name, rename_type) in new_to_original.items(): + if new_name in commented_vars: + continue + # Check if this line starts with the renamed variable assignment + # Handle cases like "a_1 = ...", "a_1, b_1 = ...", etc. + stripped = line.lstrip() + if stripped.startswith(f"{new_name} =") or stripped.startswith( + f"{new_name}=" + ): + comment_to_add = ( + f" # variable `{original_name}` was {rename_type.value} " + f"in the original Jupyter notebook" + ) + commented_vars.add(new_name) + break + + if comment_to_add: + # Add comment at end of line, avoiding duplicate comments + if "# variable `" not in line: + new_lines.append(line + comment_to_add) + else: + new_lines.append(line) + else: + new_lines.append(line) + + return "\n".join(new_lines) + + def bind_cell_metadata( sources: list[str], metadata: list[dict[str, Any]], hide_flags: list[bool] ) -> list[CodeCell]: diff --git a/tests/_convert/snapshots/convert_comments_preservation.py.txt b/tests/_convert/snapshots/convert_comments_preservation.py.txt index aec962af943..1abc032b288 100644 --- a/tests/_convert/snapshots/convert_comments_preservation.py.txt +++ b/tests/_convert/snapshots/convert_comments_preservation.py.txt @@ -24,7 +24,7 @@ def _(): def _(): # Cell 3: Redefinition with more comments # This is the second definition of y - y_1 = 20 # Second definition with inline comment + y_1 = 20 # variable `y` was redefined in the original Jupyter notebook # Second definition with inline comment result = y_1 * 2 # Calculate result print(result) # Print the result return (y_1,) diff --git a/tests/_convert/snapshots/convert_duplicate_definitions_and_aug_assign.py.txt b/tests/_convert/snapshots/convert_duplicate_definitions_and_aug_assign.py.txt index 01f46491500..d2708054a78 100644 --- a/tests/_convert/snapshots/convert_duplicate_definitions_and_aug_assign.py.txt +++ b/tests/_convert/snapshots/convert_duplicate_definitions_and_aug_assign.py.txt @@ -17,7 +17,7 @@ def _(x): @app.cell def _(x): - x_1 = x + 1 + x_1 = x + 1 # variable `x` was modified in the original Jupyter notebook return (x_1,) diff --git a/tests/_convert/snapshots/convert_duplicate_definitions_read_before_write.py.txt b/tests/_convert/snapshots/convert_duplicate_definitions_read_before_write.py.txt index 4d7804c201a..0235884fb99 100644 --- a/tests/_convert/snapshots/convert_duplicate_definitions_read_before_write.py.txt +++ b/tests/_convert/snapshots/convert_duplicate_definitions_read_before_write.py.txt @@ -18,7 +18,7 @@ def _(x): @app.cell def _(x): x - x_1 = 2 + x_1 = 2 # variable `x` was modified in the original Jupyter notebook x_1 return (x_1,) diff --git a/tests/_convert/test_ipynb.py b/tests/_convert/test_ipynb.py index 77241e0e62e..4c1e4b5b0b4 100644 --- a/tests/_convert/test_ipynb.py +++ b/tests/_convert/test_ipynb.py @@ -8,6 +8,7 @@ from marimo._convert.ipynb import ( CellsTransform, CodeCell, + RenameType, Transform, convert_from_ipynb_to_notebook_ir, transform_add_marimo_import, @@ -20,6 +21,14 @@ ) +# Helper to generate expected rename comments +def _rename_comment(var_name: str, rename_type: RenameType) -> str: + return ( + f" # variable `{var_name}` was {rename_type.value} " + f"in the original Jupyter notebook" + ) + + def dd(sources: list[str]) -> list[str]: return [dedent(s) for s in sources] @@ -284,10 +293,10 @@ def test_transform_duplicate_definitions(): assert result == [ "a = 1", "print(a)", - "a_1 = 2", + f"a_1 = 2{_rename_comment('a', RenameType.REDEFINED)}", "print(a_1)", "print(a_1)", - "a_2 = 3", + f"a_2 = 3{_rename_comment('a', RenameType.REDEFINED)}", ] @@ -525,9 +534,9 @@ def test_transform_duplicate_definitions_complex(): assert result == [ "x = 1 # comment unaffected", "a = 1\nb = 2\nprint(a, b)", - "a_1 = 3\nc = 4\nprint(a_1, c)", - "b_1 = 5\nd = 6\nprint(b_1, d)", - "a_2 = 7\nb_2 = 8\nc_1 = 9\nprint(a_2, b_2, c_1)", + f"a_1 = 3{_rename_comment('a', RenameType.REDEFINED)}\nc = 4\nprint(a_1, c)", + f"b_1 = 5{_rename_comment('b', RenameType.REDEFINED)}\nd = 6\nprint(b_1, d)", + f"a_2 = 7{_rename_comment('a', RenameType.REDEFINED)}\nb_2 = 8{_rename_comment('b', RenameType.REDEFINED)}\nc_1 = 9{_rename_comment('c', RenameType.REDEFINED)}\nprint(a_2, b_2, c_1)", ] @@ -598,7 +607,7 @@ def test_transform_duplicate_definitions_with_comprehensions(): "[x for x in range(10)]", "x = 5\nprint(x)", "{x: x**2 for x in range(5)}", - "x_1 = 10\nprint(x_1)", + f"x_1 = 10{_rename_comment('x', RenameType.REDEFINED)}\nprint(x_1)", ] @@ -611,7 +620,7 @@ def test_transform_duplicate_definitions_with_reference_to_previous(): result = transform_duplicate_definitions(sources) assert result == [ "x = 1", - "x_1 = x + 1\nprint(x_1)", + f"x_1 = x + 1{_rename_comment('x', RenameType.MODIFIED)}\nprint(x_1)", "print(x_1)", ] @@ -671,10 +680,10 @@ def test_transform_duplicate_definitions_with_re_def(): result = transform_duplicate_definitions(sources) assert result == [ "x = 1", - "x_1 = x + 1\nprint(x_1)", - "x_2 = x_1 + 2\nprint(x_2)", - "x_3 = x_2 + 3\nprint(x_3)", - "x_4 = 10\nprint(x_4)", + f"x_1 = x + 1{_rename_comment('x', RenameType.MODIFIED)}\nprint(x_1)", + f"x_2 = x_1 + 2{_rename_comment('x', RenameType.MODIFIED)}\nprint(x_2)", + f"x_3 = x_2 + 3{_rename_comment('x', RenameType.MODIFIED)}\nprint(x_3)", + f"x_4 = 10{_rename_comment('x', RenameType.REDEFINED)}\nprint(x_4)", "print(x_4)", ] @@ -687,14 +696,18 @@ def test_transform_duplicate_definitions_with_multiple_variables(): "x, y, z = y, z, x\nprint(x, y, z)", ] result = transform_duplicate_definitions(sources) + mod_x = _rename_comment("x", RenameType.MODIFIED) + mod_y = _rename_comment("y", RenameType.MODIFIED) + redef_x = _rename_comment("x", RenameType.REDEFINED) + redef_z = _rename_comment("z", RenameType.REDEFINED) assert_sources_equal( result, dd( [ "x, y = 1, 2", - "x_1 = x + y\ny_1 = y + x_1\nprint(x_1, y_1)", - "z = x_1 + y_1\nx_2 = z\nprint(x_2, y_1, z)", - "x_3, y_2, z_1 = (y_1, z, x_2)\nprint(x_3, y_2, z_1)", + f"x_1 = x + y{mod_x}\ny_1 = y + x_1{mod_y}\nprint(x_1, y_1)", + f"z = x_1 + y_1\nx_2 = z{redef_x}\nprint(x_2, y_1, z)", + f"x_3, y_2, z_1 = (y_1, z, x_2){mod_x}\nprint(x_3, y_2, z_1)", ] ), ) @@ -725,6 +738,7 @@ def another_func(): """, ] ) + redef_x = _rename_comment("x", RenameType.REDEFINED) expected = dd( [ "x = 10", @@ -734,8 +748,8 @@ def func(): x_1 = x + 1 return x """, - """ -x_1 = func() + f""" +x_1 = func(){redef_x} print(x_1) """, """ @@ -743,8 +757,8 @@ def another_func(): x = 20 return x """, - """ -x_2 = another_func() + f""" +x_2 = another_func(){redef_x} print(x_2) """, ] @@ -762,14 +776,16 @@ def test_transform_duplicate_definitions_with_comprehensions_and_lambdas(): "x = lambda x: x**4\nprint([x(i) for i in range(5)])", ] result = transform_duplicate_definitions(sources) + mod_x = _rename_comment("x", RenameType.MODIFIED) + redef_x = _rename_comment("x", RenameType.REDEFINED) assert_sources_equal( result, dd( [ "x = [i for i in range(5)]", - "x_1 = list(map(lambda x: x**2, x))\nprint(x_1)", - "x_2 = {x: x**3 for x in x_1}\nprint(x_2)", - "x_3 = lambda x: x**4\nprint([x_3(i) for i in range(5)])", + f"x_1 = list(map(lambda x: x**2, x)){mod_x}\nprint(x_1)", + f"x_2 = {{x: x**3 for x in x_1}}{mod_x}\nprint(x_2)", + f"x_3 = lambda x: x**4{redef_x}\nprint([x_3(i) for i in range(5)])", ] ), ) @@ -781,12 +797,13 @@ def test_transform_duplicate_definitions_with_simple_lambda(): "x = lambda x: x**2", ] result = transform_duplicate_definitions(sources) + redef_x = _rename_comment("x", RenameType.REDEFINED) assert_sources_equal( result, dd( [ "x = 0", - "x_1 = lambda x: x**2", + f"x_1 = lambda x: x**2{redef_x}", ] ), ) @@ -800,10 +817,11 @@ def test_transform_simple_redefinition() -> None: "x", ] result = transform_duplicate_definitions(sources) + redef_x = _rename_comment("x", RenameType.REDEFINED) assert result == [ "x = 0", "x", - "x_1 = 1", + f"x_1 = 1{redef_x}", "x_1", ] @@ -815,9 +833,10 @@ def test_transform_duplicate_definitions_with_simple_function(): "def f(x): return x", ] result = transform_duplicate_definitions(sources) + redef_x = _rename_comment("x", RenameType.REDEFINED) assert result == [ "x = 0", - "x_1 = 1", + f"x_1 = 1{redef_x}", "def f(x): return x", ] @@ -830,10 +849,11 @@ def test_transform_duplicate_definitions_attrs(): "x", ] result = transform_duplicate_definitions(sources) + mod_x = _rename_comment("x", RenameType.MODIFIED) assert result == [ "x = 0", "x", - "x_1 = x.apply()", + f"x_1 = x.apply(){mod_x}", "x_1", ] @@ -851,13 +871,14 @@ def f(): ] ) result = transform_duplicate_definitions(sources) + redef_x = _rename_comment("x", RenameType.REDEFINED) assert_sources_equal( result, [ "x = 0", "x", - """ - x_1 = 1 + f""" + x_1 = 1{redef_x} def f(): x = 1; """, @@ -878,13 +899,14 @@ def f(x=x): ] ) result = transform_duplicate_definitions(sources) + redef_x = _rename_comment("x", RenameType.REDEFINED) assert_sources_equal( result, [ "x = 0", "x", - """ - x_1 = 1 + f""" + x_1 = 1{redef_x} def f(x=x_1): return x """, @@ -908,13 +930,14 @@ def g(x=x): ] ) result = transform_duplicate_definitions(sources) + mod_x = _rename_comment("x", RenameType.MODIFIED) assert_sources_equal( result, [ "x = 0", "x", - """ - x_1 = 1 + f""" + x_1 = 1{mod_x} def f(x=x_1): x = 2 def g(x=x): @@ -935,12 +958,13 @@ def test_transform_duplicate_function_definitions(): ] ) result = transform_duplicate_definitions(sources) + redef_f = _rename_comment("f", RenameType.REDEFINED) assert_sources_equal( result, [ "def f(): pass", "f()", - "def f_1(): pass", + f"def f_1(): pass{redef_f}", "f_1()", ], ) @@ -956,12 +980,13 @@ def test_transform_duplicate_classes(): ] ) result = transform_duplicate_definitions(sources) + redef_A = _rename_comment("A", RenameType.REDEFINED) assert_sources_equal( result, [ "class A(): ...", "A()", - "class A_1(): ...", + f"class A_1(): ...{redef_A}", "A_1()", ], ) @@ -999,14 +1024,15 @@ def test_transform_duplicate_definitions_numbered(): ] ) result = transform_duplicate_definitions(sources) + redef_df = _rename_comment("df", RenameType.REDEFINED) assert_sources_equal( result, [ "df = 1", "df_1 = 1", - "df_2 = 2", + f"df_2 = 2{redef_df}", "df_2", - "df_3 = 3", + f"df_3 = 3{redef_df}", "df_3", ], )