|
64 | 64 | SortField,
|
65 | 65 | SortOrder,
|
66 | 66 | )
|
67 |
| -from pyiceberg.table.statistics import BlobMetadata, StatisticsFile |
| 67 | +from pyiceberg.table.statistics import BlobMetadata, PartitionStatisticsFile, StatisticsFile |
68 | 68 | from pyiceberg.table.update import (
|
69 | 69 | AddSnapshotUpdate,
|
70 | 70 | AddSortOrderUpdate,
|
|
76 | 76 | AssertLastAssignedPartitionId,
|
77 | 77 | AssertRefSnapshotId,
|
78 | 78 | AssertTableUUID,
|
| 79 | + RemovePartitionStatisticsUpdate, |
79 | 80 | RemovePropertiesUpdate,
|
80 | 81 | RemoveSnapshotRefUpdate,
|
81 | 82 | RemoveSnapshotsUpdate,
|
82 | 83 | RemoveStatisticsUpdate,
|
83 | 84 | SetDefaultSortOrderUpdate,
|
| 85 | + SetPartitionStatisticsUpdate, |
84 | 86 | SetPropertiesUpdate,
|
85 | 87 | SetSnapshotRefUpdate,
|
86 | 88 | SetStatisticsUpdate,
|
@@ -1359,3 +1361,79 @@ def test_remove_statistics_update(table_v2_with_statistics: Table) -> None:
|
1359 | 1361 | table_v2_with_statistics.metadata,
|
1360 | 1362 | (RemoveStatisticsUpdate(snapshot_id=123456789),),
|
1361 | 1363 | )
|
| 1364 | + |
| 1365 | + |
| 1366 | +def test_set_partition_statistics_update(table_v2_with_statistics: Table) -> None: |
| 1367 | + snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id |
| 1368 | + |
| 1369 | + partition_statistics_file = PartitionStatisticsFile( |
| 1370 | + snapshot_id=snapshot_id, |
| 1371 | + statistics_path="s3://bucket/warehouse/stats.puffin", |
| 1372 | + file_size_in_bytes=124, |
| 1373 | + ) |
| 1374 | + |
| 1375 | + update = SetPartitionStatisticsUpdate( |
| 1376 | + partition_statistics=partition_statistics_file, |
| 1377 | + ) |
| 1378 | + |
| 1379 | + new_metadata = update_table_metadata( |
| 1380 | + table_v2_with_statistics.metadata, |
| 1381 | + (update,), |
| 1382 | + ) |
| 1383 | + |
| 1384 | + expected = """ |
| 1385 | + { |
| 1386 | + "snapshot-id": 3055729675574597004, |
| 1387 | + "statistics-path": "s3://bucket/warehouse/stats.puffin", |
| 1388 | + "file-size-in-bytes": 124 |
| 1389 | + }""" |
| 1390 | + |
| 1391 | + assert len(new_metadata.partition_statistics) == 1 |
| 1392 | + |
| 1393 | + updated_statistics = [stat for stat in new_metadata.partition_statistics if stat.snapshot_id == snapshot_id] |
| 1394 | + |
| 1395 | + assert len(updated_statistics) == 1 |
| 1396 | + assert json.loads(updated_statistics[0].model_dump_json()) == json.loads(expected) |
| 1397 | + |
| 1398 | + |
| 1399 | +def test_remove_partition_statistics_update(table_v2_with_statistics: Table) -> None: |
| 1400 | + # Add partition statistics file. |
| 1401 | + snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id |
| 1402 | + |
| 1403 | + partition_statistics_file = PartitionStatisticsFile( |
| 1404 | + snapshot_id=snapshot_id, |
| 1405 | + statistics_path="s3://bucket/warehouse/stats.puffin", |
| 1406 | + file_size_in_bytes=124, |
| 1407 | + ) |
| 1408 | + |
| 1409 | + update = SetPartitionStatisticsUpdate( |
| 1410 | + partition_statistics=partition_statistics_file, |
| 1411 | + ) |
| 1412 | + |
| 1413 | + new_metadata = update_table_metadata( |
| 1414 | + table_v2_with_statistics.metadata, |
| 1415 | + (update,), |
| 1416 | + ) |
| 1417 | + assert len(new_metadata.partition_statistics) == 1 |
| 1418 | + |
| 1419 | + # Remove the same partition statistics file. |
| 1420 | + remove_update = RemovePartitionStatisticsUpdate(snapshot_id=snapshot_id) |
| 1421 | + |
| 1422 | + remove_metadata = update_table_metadata( |
| 1423 | + new_metadata, |
| 1424 | + (remove_update,), |
| 1425 | + ) |
| 1426 | + |
| 1427 | + assert len(remove_metadata.partition_statistics) == 0 |
| 1428 | + |
| 1429 | + |
| 1430 | +def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_with_statistics: Table) -> None: |
| 1431 | + # Remove the same partition statistics file. |
| 1432 | + with pytest.raises( |
| 1433 | + ValueError, |
| 1434 | + match="Partition Statistics with snapshot id 123456789 does not exist", |
| 1435 | + ): |
| 1436 | + update_table_metadata( |
| 1437 | + table_v2_with_statistics.metadata, |
| 1438 | + (RemovePartitionStatisticsUpdate(snapshot_id=123456789),), |
| 1439 | + ) |
0 commit comments