@@ -213,15 +213,20 @@ def expire_snapshots_with_retention_policy(
213
213
This method provides a unified interface for snapshot expiration with various
214
214
retention policies to ensure operational resilience while allowing space reclamation.
215
215
216
+ The method will use table properties as defaults if they are set:
217
+ - history.expire.max-snapshot-age-ms: Default for timestamp_ms if not provided
218
+ - history.expire.min-snapshots-to-keep: Default for min_snapshots_to_keep if not provided
219
+ - history.expire.max-ref-age-ms: Used for ref expiration (branches/tags)
220
+
216
221
Args:
217
222
timestamp_ms: Only snapshots with timestamp_ms < this value will be considered for expiration.
218
- If None, all snapshots are candidates (subject to other constraints) .
223
+ If None, will use history.expire.max-snapshot-age-ms table property if set .
219
224
retain_last_n: Always keep the last N snapshots regardless of age.
220
225
Useful when regular snapshot creation occurs and users want to keep
221
226
the last few for rollback purposes.
222
227
min_snapshots_to_keep: Minimum number of snapshots to keep in total.
223
- Acts as a guardrail to prevent aggressive expiration logic
224
- from removing too many snapshots.
228
+ Acts as a guardrail to prevent aggressive expiration logic.
229
+ If None, will use history.expire.min- snapshots-to-keep table property if set .
225
230
226
231
Returns:
227
232
List of snapshot IDs that were expired.
@@ -230,48 +235,62 @@ def expire_snapshots_with_retention_policy(
230
235
ValueError: If retain_last_n or min_snapshots_to_keep is less than 1.
231
236
232
237
Examples:
233
- # Keep last 5 snapshots regardless of age
234
- maintenance.expire_snapshots_with_retention_policy(retain_last_n=5)
235
-
236
- # Expire snapshots older than timestamp but keep at least 3 total
237
- maintenance.expire_snapshots_with_retention_policy(
238
- timestamp_ms=1234567890000,
239
- min_snapshots_to_keep=3
240
- )
238
+ # Use table property defaults
239
+ maintenance.expire_snapshots_with_retention_policy()
241
240
242
- # Combined policy: expire old snapshots but keep last 10 and at least 5 total
241
+ # Override defaults with explicit values
243
242
maintenance.expire_snapshots_with_retention_policy(
244
243
timestamp_ms=1234567890000,
245
244
retain_last_n=10,
246
245
min_snapshots_to_keep=5
247
246
)
248
247
"""
248
+ # Get default values from table properties
249
+ default_max_age , default_min_snapshots , _ = self ._get_expiration_properties ()
250
+
251
+ # Use defaults from table properties if not explicitly provided
252
+ if timestamp_ms is None :
253
+ timestamp_ms = default_max_age
254
+
255
+ if min_snapshots_to_keep is None :
256
+ min_snapshots_to_keep = default_min_snapshots
257
+
258
+ # If no expiration criteria are provided, don't expire anything
259
+ if timestamp_ms is None and retain_last_n is None and min_snapshots_to_keep is None :
260
+ return
261
+
249
262
if retain_last_n is not None and retain_last_n < 1 :
250
263
raise ValueError ("retain_last_n must be at least 1" )
251
264
252
265
if min_snapshots_to_keep is not None and min_snapshots_to_keep < 1 :
253
266
raise ValueError ("min_snapshots_to_keep must be at least 1" )
254
267
255
268
snapshots_to_expire = self ._get_snapshots_to_expire_with_retention (
256
- timestamp_ms = timestamp_ms , retain_last_n = retain_last_n , min_snapshots_to_keep = min_snapshots_to_keep
269
+ timestamp_ms = timestamp_ms ,
270
+ retain_last_n = retain_last_n ,
271
+ min_snapshots_to_keep = min_snapshots_to_keep
257
272
)
258
273
259
274
if snapshots_to_expire :
260
275
self ._expire_snapshots_by_ids (snapshots_to_expire )
261
276
262
- def _get_protected_snapshot_ids (self ) -> Set [int ]:
277
+ def _get_protected_snapshot_ids (self , table_metadata : Optional [ TableMetadata ] = None ) -> Set [int ]:
263
278
"""Get the IDs of protected snapshots.
264
279
265
280
These are the HEAD snapshots of all branches and all tagged snapshots.
266
281
These ids are to be excluded from expiration.
267
282
268
283
Args:
269
- table_metadata: The table metadata to check for protected snapshots.
284
+ table_metadata: Optional table metadata to check for protected snapshots.
285
+ If not provided, uses the table's current metadata.
270
286
271
287
Returns:
272
288
Set of protected snapshot IDs to exclude from expiration.
273
289
"""
274
- return set (self .tbl .inspect .refs ()["snapshot_id" ].to_pylist ())
290
+ # Prefer provided metadata, fall back to current table metadata
291
+ metadata = table_metadata or self .tbl .metadata
292
+ refs = metadata .refs if metadata else {}
293
+ return {ref .snapshot_id for ref in refs .values ()}
275
294
276
295
def _get_all_datafiles (self ) -> List [DataFile ]:
277
296
"""Collect all DataFiles in the current snapshot only."""
@@ -359,3 +378,22 @@ def deduplicate_data_files(self) -> List[DataFile]:
359
378
self .tbl = self .tbl .refresh ()
360
379
361
380
return removed
381
+
382
+ def _get_expiration_properties (self ) -> tuple [Optional [int ], Optional [int ], Optional [int ]]:
383
+ """Get the default expiration properties from table properties.
384
+
385
+ Returns:
386
+ Tuple of (max_snapshot_age_ms, min_snapshots_to_keep, max_ref_age_ms)
387
+ """
388
+ properties = self .tbl .properties
389
+
390
+ max_snapshot_age_ms = properties .get ("history.expire.max-snapshot-age-ms" )
391
+ max_snapshot_age = int (max_snapshot_age_ms ) if max_snapshot_age_ms is not None else None
392
+
393
+ min_snapshots = properties .get ("history.expire.min-snapshots-to-keep" )
394
+ min_snapshots_to_keep = int (min_snapshots ) if min_snapshots is not None else None
395
+
396
+ max_ref_age = properties .get ("history.expire.max-ref-age-ms" )
397
+ max_ref_age_ms = int (max_ref_age ) if max_ref_age is not None else None
398
+
399
+ return max_snapshot_age , min_snapshots_to_keep , max_ref_age_ms
0 commit comments