diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index bcbe429688..5798b1051d 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -530,7 +530,7 @@ def _(update: RemoveSnapshotsUpdate, base_metadata: TableMetadata, context: _Tab if ref.snapshot_id in update.snapshot_ids ) remove_statistics_updates = ( - RemoveStatisticsUpdate(statistics_file.snapshot_id) + RemoveStatisticsUpdate(snapshot_id=statistics_file.snapshot_id) for statistics_file in base_metadata.statistics if statistics_file.snapshot_id in update.snapshot_ids ) diff --git a/tests/table/test_expire_snapshots.py b/tests/table/test_expire_snapshots.py index 51f5ba687a..d11851f246 100644 --- a/tests/table/test_expire_snapshots.py +++ b/tests/table/test_expire_snapshots.py @@ -23,6 +23,7 @@ import pytest from pyiceberg.table import CommitTableResponse, Table +from pyiceberg.table.update import RemoveSnapshotsUpdate, update_table_metadata from pyiceberg.table.update.snapshot import ExpireSnapshots @@ -280,3 +281,39 @@ def worker2() -> None: assert results["expire1_snapshots"] == expected_1, "Worker 1 snapshots contaminated" assert results["expire2_snapshots"] == expected_2, "Worker 2 snapshots contaminated" + + +def test_update_remove_snapshots_with_statistics(table_v2_with_statistics: Table) -> None: + """ + Test removing snapshots from a table that has statistics. + + This test exercises the code path where RemoveStatisticsUpdate is instantiated + within the RemoveSnapshotsUpdate handler. Before the fix for #2558, this would + fail with: TypeError: BaseModel.__init__() takes 1 positional argument but 2 were given + """ + # The table has 2 snapshots with IDs: 3051729675574597004 and 3055729675574597004 + # Both snapshots have statistics associated with them + REMOVE_SNAPSHOT = 3051729675574597004 + KEEP_SNAPSHOT = 3055729675574597004 + + # Verify fixture assumptions + assert len(table_v2_with_statistics.metadata.snapshots) == 2 + assert len(table_v2_with_statistics.metadata.statistics) == 2 + assert any(stat.snapshot_id == REMOVE_SNAPSHOT for stat in table_v2_with_statistics.metadata.statistics), ( + "Snapshot to remove should have statistics" + ) + + # This should trigger RemoveStatisticsUpdate instantiation for the removed snapshot + update = RemoveSnapshotsUpdate(snapshot_ids=[REMOVE_SNAPSHOT]) + new_metadata = update_table_metadata(table_v2_with_statistics.metadata, (update,)) + + # Verify the snapshot was removed + assert len(new_metadata.snapshots) == 1 + assert new_metadata.snapshots[0].snapshot_id == KEEP_SNAPSHOT + + # Verify the statistics for the removed snapshot were also removed + assert len(new_metadata.statistics) == 1 + assert new_metadata.statistics[0].snapshot_id == KEEP_SNAPSHOT + assert not any(stat.snapshot_id == REMOVE_SNAPSHOT for stat in new_metadata.statistics), ( + "Statistics for removed snapshot should be gone" + )