@@ -36,11 +36,6 @@ class MaintenanceTable:
36
36
def __init__ (self , tbl : Table ) -> None :
37
37
self .tbl = tbl
38
38
39
- try :
40
- import pyarrow as pa # noqa
41
- except ModuleNotFoundError as e :
42
- raise ModuleNotFoundError ("For metadata operations PyArrow needs to be installed" ) from e
43
-
44
39
def expire_snapshot_by_id (self , snapshot_id : int ) -> None :
45
40
"""Expire a single snapshot by its ID.
46
41
@@ -65,7 +60,7 @@ def expire_snapshot_by_id(self, snapshot_id: int) -> None:
65
60
66
61
txn ._apply ((RemoveSnapshotsUpdate (snapshot_ids = [snapshot_id ]),))
67
62
68
- def expire_snapshots_by_ids (self , snapshot_ids : List [int ]) -> None :
63
+ def _expire_snapshots_by_ids (self , snapshot_ids : List [int ]) -> None :
69
64
"""Expire multiple snapshots by their IDs.
70
65
71
66
Args:
@@ -104,7 +99,7 @@ def expire_snapshots_older_than(self, timestamp_ms: int) -> None:
104
99
snapshots_to_expire .append (snapshot .snapshot_id )
105
100
106
101
if snapshots_to_expire :
107
- self .expire_snapshots_by_ids (snapshots_to_expire )
102
+ self ._expire_snapshots_by_ids (snapshots_to_expire )
108
103
109
104
def expire_snapshots_older_than_with_retention (
110
105
self , timestamp_ms : int , retain_last_n : Optional [int ] = None , min_snapshots_to_keep : Optional [int ] = None
@@ -121,7 +116,7 @@ def expire_snapshots_older_than_with_retention(
121
116
)
122
117
123
118
if snapshots_to_expire :
124
- self .expire_snapshots_by_ids (snapshots_to_expire )
119
+ self ._expire_snapshots_by_ids (snapshots_to_expire )
125
120
126
121
def retain_last_n_snapshots (self , n : int ) -> None :
127
122
"""Keep only the last N snapshots, expiring all others.
@@ -156,7 +151,7 @@ def retain_last_n_snapshots(self, n: int) -> None:
156
151
snapshots_to_expire .append (snapshot .snapshot_id )
157
152
158
153
if snapshots_to_expire :
159
- self .expire_snapshots_by_ids (snapshots_to_expire )
154
+ self ._expire_snapshots_by_ids (snapshots_to_expire )
160
155
161
156
def _get_snapshots_to_expire_with_retention (
162
157
self , timestamp_ms : Optional [int ] = None , retain_last_n : Optional [int ] = None , min_snapshots_to_keep : Optional [int ] = None
@@ -262,7 +257,7 @@ def expire_snapshots_with_retention_policy(
262
257
)
263
258
264
259
if snapshots_to_expire :
265
- self .expire_snapshots_by_ids (snapshots_to_expire )
260
+ self ._expire_snapshots_by_ids (snapshots_to_expire )
266
261
267
262
def _get_protected_snapshot_ids (self , table_metadata : TableMetadata ) -> Set [int ]:
268
263
"""Get the IDs of protected snapshots.
@@ -276,13 +271,7 @@ def _get_protected_snapshot_ids(self, table_metadata: TableMetadata) -> Set[int]
276
271
Returns:
277
272
Set of protected snapshot IDs to exclude from expiration.
278
273
"""
279
- from pyiceberg .table .refs import SnapshotRefType
280
-
281
- protected_ids : Set [int ] = set ()
282
- for ref in table_metadata .refs .values ():
283
- if ref .snapshot_ref_type in [SnapshotRefType .TAG , SnapshotRefType .BRANCH ]:
284
- protected_ids .add (ref .snapshot_id )
285
- return protected_ids
274
+ return set (self .tbl .inspect .refs ()["snapshot_id" ].to_pylist ())
286
275
287
276
def _get_all_datafiles (self ) -> List [DataFile ]:
288
277
"""Collect all DataFiles in the current snapshot only."""
0 commit comments