Skip to content

Document older_than of ExpireSnapshots #2324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions mkdocs/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -1296,17 +1296,20 @@ PyIceberg provides table maintenance operations through the `table.maintenance`
Expire old snapshots to clean up table metadata and reduce storage costs:

```python
# Basic usage - expire a specific snapshot by ID
# Expire snapshots older than three days
from datetime import datetime, timedelta
table.maintenance.expire_snapshots().older_than(
datetime.now() - timedelta(days=3)
).commit()

# Expire a specific snapshot by ID
table.maintenance.expire_snapshots().by_id(12345).commit()

# Context manager usage (recommended for multiple operations)
with table.maintenance.expire_snapshots() as expire:
expire.by_id(12345)
expire.by_id(67890)
# Automatically commits when exiting the context

# Method chaining
table.maintenance.expire_snapshots().by_id(12345).commit()
```

#### Real-world Example
Expand Down
21 changes: 11 additions & 10 deletions pyiceberg/table/update/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from abc import abstractmethod
from collections import defaultdict
from concurrent.futures import Future
from datetime import datetime
from functools import cached_property
from typing import TYPE_CHECKING, Callable, Dict, Generic, List, Optional, Set, Tuple

Expand Down Expand Up @@ -82,6 +83,7 @@
)
from pyiceberg.utils.bin_packing import ListPacker
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.datetime import datetime_to_millis
from pyiceberg.utils.properties import property_as_bool, property_as_int

if TYPE_CHECKING:
Expand Down Expand Up @@ -944,13 +946,11 @@ def _get_protected_snapshot_ids(self) -> Set[int]:
Returns:
Set of protected snapshot IDs to exclude from expiration.
"""
protected_ids: Set[int] = set()

for ref in self._transaction.table_metadata.refs.values():
if ref.snapshot_ref_type in [SnapshotRefType.TAG, SnapshotRefType.BRANCH]:
protected_ids.add(ref.snapshot_id)

return protected_ids
return {
ref.snapshot_id
for ref in self._transaction.table_metadata.refs.values()
if ref.snapshot_ref_type in [SnapshotRefType.TAG, SnapshotRefType.BRANCH]
}

def by_id(self, snapshot_id: int) -> ExpireSnapshots:
"""
Expand Down Expand Up @@ -988,18 +988,19 @@ def by_ids(self, snapshot_ids: List[int]) -> "ExpireSnapshots":
self.by_id(snapshot_id)
return self

def older_than(self, timestamp_ms: int) -> "ExpireSnapshots":
def older_than(self, dt: datetime) -> "ExpireSnapshots":
"""
Expire all unprotected snapshots with a timestamp older than a given value.
Args:
timestamp_ms (int): Only snapshots with timestamp_ms < this value will be expired.
dt (datetime): Only snapshots with datetime < this value will be expired.
Returns:
This for method chaining.
"""
protected_ids = self._get_protected_snapshot_ids()
expire_from = datetime_to_millis(dt)
for snapshot in self._transaction.table_metadata.snapshots:
if snapshot.timestamp_ms < timestamp_ms and snapshot.snapshot_id not in protected_ids:
if snapshot.timestamp_ms < expire_from and snapshot.snapshot_id not in protected_ids:
self._snapshot_ids_to_expire.add(snapshot.snapshot_id)
return self
5 changes: 3 additions & 2 deletions tests/table/test_expire_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime
from unittest.mock import MagicMock
from uuid import uuid4

Expand Down Expand Up @@ -142,7 +143,7 @@ def test_expire_snapshots_by_timestamp_skips_protected(table_v2: Table) -> None:
table_v2.catalog = MagicMock()

# Attempt to expire all snapshots before a future timestamp (so both are candidates)
future_timestamp = 9999999999999 # Far in the future, after any real snapshot
future_datetime = datetime.datetime.now() + datetime.timedelta(days=1)

# Mock the catalog's commit_table to return the current metadata (simulate no change)
mock_response = CommitTableResponse(
Expand All @@ -152,7 +153,7 @@ def test_expire_snapshots_by_timestamp_skips_protected(table_v2: Table) -> None:
)
table_v2.catalog.commit_table.return_value = mock_response

table_v2.maintenance.expire_snapshots().older_than(future_timestamp).commit()
table_v2.maintenance.expire_snapshots().older_than(future_datetime).commit()
# Update metadata to reflect the commit (as in other tests)
table_v2.metadata = mock_response.metadata

Expand Down