Skip to content

Commit 640c592

Browse files
authored
Document older_than of ExpireSnapshots (#2324)
# Rationale for this change I was looking into this, and took the liberty of changing the API to a datetime rather than milliseconds to avoid anyone passing in seconds or microseconds. # Are these changes tested? # Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 4b961f7 commit 640c592

File tree

3 files changed

+21
-16
lines changed

3 files changed

+21
-16
lines changed

mkdocs/docs/api.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,17 +1296,20 @@ PyIceberg provides table maintenance operations through the `table.maintenance`
12961296
Expire old snapshots to clean up table metadata and reduce storage costs:
12971297

12981298
```python
1299-
# Basic usage - expire a specific snapshot by ID
1299+
# Expire snapshots older than three days
1300+
from datetime import datetime, timedelta
1301+
table.maintenance.expire_snapshots().older_than(
1302+
datetime.now() - timedelta(days=3)
1303+
).commit()
1304+
1305+
# Expire a specific snapshot by ID
13001306
table.maintenance.expire_snapshots().by_id(12345).commit()
13011307

13021308
# Context manager usage (recommended for multiple operations)
13031309
with table.maintenance.expire_snapshots() as expire:
13041310
expire.by_id(12345)
13051311
expire.by_id(67890)
13061312
# Automatically commits when exiting the context
1307-
1308-
# Method chaining
1309-
table.maintenance.expire_snapshots().by_id(12345).commit()
13101313
```
13111314

13121315
#### Real-world Example

pyiceberg/table/update/snapshot.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from abc import abstractmethod
2323
from collections import defaultdict
2424
from concurrent.futures import Future
25+
from datetime import datetime
2526
from functools import cached_property
2627
from typing import TYPE_CHECKING, Callable, Dict, Generic, List, Optional, Set, Tuple
2728

@@ -82,6 +83,7 @@
8283
)
8384
from pyiceberg.utils.bin_packing import ListPacker
8485
from pyiceberg.utils.concurrent import ExecutorFactory
86+
from pyiceberg.utils.datetime import datetime_to_millis
8587
from pyiceberg.utils.properties import property_as_bool, property_as_int
8688

8789
if TYPE_CHECKING:
@@ -944,13 +946,11 @@ def _get_protected_snapshot_ids(self) -> Set[int]:
944946
Returns:
945947
Set of protected snapshot IDs to exclude from expiration.
946948
"""
947-
protected_ids: Set[int] = set()
948-
949-
for ref in self._transaction.table_metadata.refs.values():
950-
if ref.snapshot_ref_type in [SnapshotRefType.TAG, SnapshotRefType.BRANCH]:
951-
protected_ids.add(ref.snapshot_id)
952-
953-
return protected_ids
949+
return {
950+
ref.snapshot_id
951+
for ref in self._transaction.table_metadata.refs.values()
952+
if ref.snapshot_ref_type in [SnapshotRefType.TAG, SnapshotRefType.BRANCH]
953+
}
954954

955955
def by_id(self, snapshot_id: int) -> ExpireSnapshots:
956956
"""
@@ -988,18 +988,19 @@ def by_ids(self, snapshot_ids: List[int]) -> "ExpireSnapshots":
988988
self.by_id(snapshot_id)
989989
return self
990990

991-
def older_than(self, timestamp_ms: int) -> "ExpireSnapshots":
991+
def older_than(self, dt: datetime) -> "ExpireSnapshots":
992992
"""
993993
Expire all unprotected snapshots with a timestamp older than a given value.
994994
995995
Args:
996-
timestamp_ms (int): Only snapshots with timestamp_ms < this value will be expired.
996+
dt (datetime): Only snapshots with datetime < this value will be expired.
997997
998998
Returns:
999999
This for method chaining.
10001000
"""
10011001
protected_ids = self._get_protected_snapshot_ids()
1002+
expire_from = datetime_to_millis(dt)
10021003
for snapshot in self._transaction.table_metadata.snapshots:
1003-
if snapshot.timestamp_ms < timestamp_ms and snapshot.snapshot_id not in protected_ids:
1004+
if snapshot.timestamp_ms < expire_from and snapshot.snapshot_id not in protected_ids:
10041005
self._snapshot_ids_to_expire.add(snapshot.snapshot_id)
10051006
return self

tests/table/test_expire_snapshots.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
import datetime
1718
from unittest.mock import MagicMock
1819
from uuid import uuid4
1920

@@ -142,7 +143,7 @@ def test_expire_snapshots_by_timestamp_skips_protected(table_v2: Table) -> None:
142143
table_v2.catalog = MagicMock()
143144

144145
# Attempt to expire all snapshots before a future timestamp (so both are candidates)
145-
future_timestamp = 9999999999999 # Far in the future, after any real snapshot
146+
future_datetime = datetime.datetime.now() + datetime.timedelta(days=1)
146147

147148
# Mock the catalog's commit_table to return the current metadata (simulate no change)
148149
mock_response = CommitTableResponse(
@@ -152,7 +153,7 @@ def test_expire_snapshots_by_timestamp_skips_protected(table_v2: Table) -> None:
152153
)
153154
table_v2.catalog.commit_table.return_value = mock_response
154155

155-
table_v2.maintenance.expire_snapshots().older_than(future_timestamp).commit()
156+
table_v2.maintenance.expire_snapshots().older_than(future_datetime).commit()
156157
# Update metadata to reflect the commit (as in other tests)
157158
table_v2.metadata = mock_response.metadata
158159

0 commit comments

Comments
 (0)