Skip to content

Commit 1d4ed06

Browse files
authored
backward compat (#1151)
1 parent d8d509f commit 1d4ed06

File tree

4 files changed

+114
-34
lines changed

4 files changed

+114
-34
lines changed

pyiceberg/table/__init__.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,15 @@
106106
UpgradeFormatVersionUpdate,
107107
update_table_metadata,
108108
)
109-
from pyiceberg.table.update.schema import UpdateSchema
110-
from pyiceberg.table.update.snapshot import ManageSnapshots, UpdateSnapshot
109+
from pyiceberg.table.update.schema import UpdateSchema, _Move, _MoveOperation
110+
from pyiceberg.table.update.snapshot import (
111+
ManageSnapshots,
112+
UpdateSnapshot,
113+
_DeleteFiles,
114+
_FastAppendFiles,
115+
_MergeAppendFiles,
116+
_OverwriteFiles,
117+
)
111118
from pyiceberg.table.update.spec import UpdateSpec
112119
from pyiceberg.typedef import (
113120
EMPTY_DICT,
@@ -1464,3 +1471,57 @@ def _parquet_files_to_data_files(table_metadata: TableMetadata, file_paths: List
14641471
from pyiceberg.io.pyarrow import parquet_files_to_data_files
14651472

14661473
yield from parquet_files_to_data_files(io=io, table_metadata=table_metadata, file_paths=iter(file_paths))
1474+
1475+
1476+
@deprecated(
1477+
deprecated_in="0.8.0",
1478+
removed_in="0.9.0",
1479+
help_message="pyiceberg.table.Move has been changed to private class pyiceberg.table.update.schema._Move",
1480+
)
1481+
def Move(*args: Any, **kwargs: Any) -> _Move:
1482+
return _Move(*args, **kwargs)
1483+
1484+
1485+
@deprecated(
1486+
deprecated_in="0.8.0",
1487+
removed_in="0.9.0",
1488+
help_message="pyiceberg.table.MoveOperation has been changed to private class pyiceberg.table.update.schema._MoveOperation",
1489+
)
1490+
def MoveOperation(*args: Any, **kwargs: Any) -> _MoveOperation:
1491+
return _MoveOperation(*args, **kwargs)
1492+
1493+
1494+
@deprecated(
1495+
deprecated_in="0.8.0",
1496+
removed_in="0.9.0",
1497+
help_message="pyiceberg.table.DeleteFiles has been changed to private class pyiceberg.table.update.snapshot._DeleteFiles",
1498+
)
1499+
def DeleteFiles(*args: Any, **kwargs: Any) -> _DeleteFiles:
1500+
return _DeleteFiles(*args, **kwargs)
1501+
1502+
1503+
@deprecated(
1504+
deprecated_in="0.8.0",
1505+
removed_in="0.9.0",
1506+
help_message="pyiceberg.table.FastAppendFiles has been changed to private class pyiceberg.table.update.snapshot._FastAppendFiles",
1507+
)
1508+
def FastAppendFiles(*args: Any, **kwargs: Any) -> _FastAppendFiles:
1509+
return _FastAppendFiles(*args, **kwargs)
1510+
1511+
1512+
@deprecated(
1513+
deprecated_in="0.8.0",
1514+
removed_in="0.9.0",
1515+
help_message="pyiceberg.table.MergeAppendFiles has been changed to private class pyiceberg.table.update.snapshot._MergeAppendFiles",
1516+
)
1517+
def MergeAppendFiles(*args: Any, **kwargs: Any) -> _MergeAppendFiles:
1518+
return _MergeAppendFiles(*args, **kwargs)
1519+
1520+
1521+
@deprecated(
1522+
deprecated_in="0.8.0",
1523+
removed_in="0.9.0",
1524+
help_message="pyiceberg.table.OverwriteFiles has been changed to private class pyiceberg.table.update.snapshot._OverwriteFiles",
1525+
)
1526+
def OverwriteFiles(*args: Any, **kwargs: Any) -> _OverwriteFiles:
1527+
return _OverwriteFiles(*args, **kwargs)

pyiceberg/table/update/schema.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,17 @@
5757
TABLE_ROOT_ID = -1
5858

5959

60-
class MoveOperation(Enum):
60+
class _MoveOperation(Enum):
6161
First = 1
6262
Before = 2
6363
After = 3
6464

6565

6666
@dataclass
67-
class Move:
67+
class _Move:
6868
field_id: int
6969
full_name: str
70-
op: MoveOperation
70+
op: _MoveOperation
7171
other_field_id: Optional[int] = None
7272

7373

@@ -79,7 +79,7 @@ class UpdateSchema(UpdateTableMetadata["UpdateSchema"]):
7979
_adds: Dict[int, List[NestedField]] = {}
8080
_updates: Dict[int, NestedField] = {}
8181
_deletes: Set[int] = set()
82-
_moves: Dict[int, List[Move]] = {}
82+
_moves: Dict[int, List[_Move]] = {}
8383

8484
_added_name_to_id: Dict[str, int] = {}
8585
# Part of https://github.com/apache/iceberg/pull/8393
@@ -146,7 +146,7 @@ def union_by_name(self, new_schema: Union[Schema, "pa.Schema"]) -> UpdateSchema:
146146
visit_with_partner(
147147
Catalog._convert_schema_if_needed(new_schema),
148148
-1,
149-
UnionByNameVisitor(update_schema=self, existing_schema=self._schema, case_sensitive=self._case_sensitive),
149+
_UnionByNameVisitor(update_schema=self, existing_schema=self._schema, case_sensitive=self._case_sensitive),
150150
# type: ignore
151151
PartnerIdByNameAccessor(partner_schema=self._schema, case_sensitive=self._case_sensitive),
152152
)
@@ -410,13 +410,13 @@ def _find_for_move(self, name: str) -> Optional[int]:
410410

411411
return self._added_name_to_id.get(name)
412412

413-
def _move(self, move: Move) -> None:
413+
def _move(self, move: _Move) -> None:
414414
if parent_name := self._id_to_parent.get(move.field_id):
415415
parent_field = self._schema.find_field(parent_name, case_sensitive=self._case_sensitive)
416416
if not parent_field.field_type.is_struct:
417417
raise ValueError(f"Cannot move fields in non-struct type: {parent_field.field_type}")
418418

419-
if move.op == MoveOperation.After or move.op == MoveOperation.Before:
419+
if move.op == _MoveOperation.After or move.op == _MoveOperation.Before:
420420
if move.other_field_id is None:
421421
raise ValueError("Expected other field when performing before/after move")
422422

@@ -426,7 +426,7 @@ def _move(self, move: Move) -> None:
426426
self._moves[parent_field.field_id] = self._moves.get(parent_field.field_id, []) + [move]
427427
else:
428428
# In the top level field
429-
if move.op == MoveOperation.After or move.op == MoveOperation.Before:
429+
if move.op == _MoveOperation.After or move.op == _MoveOperation.Before:
430430
if move.other_field_id is None:
431431
raise ValueError("Expected other field when performing before/after move")
432432

@@ -451,7 +451,7 @@ def move_first(self, path: Union[str, Tuple[str, ...]]) -> UpdateSchema:
451451
if field_id is None:
452452
raise ValueError(f"Cannot move missing column: {full_name}")
453453

454-
self._move(Move(field_id=field_id, full_name=full_name, op=MoveOperation.First))
454+
self._move(_Move(field_id=field_id, full_name=full_name, op=_MoveOperation.First))
455455

456456
return self
457457

@@ -485,7 +485,7 @@ def move_before(self, path: Union[str, Tuple[str, ...]], before_path: Union[str,
485485
if field_id == before_field_id:
486486
raise ValueError(f"Cannot move {full_name} before itself")
487487

488-
self._move(Move(field_id=field_id, full_name=full_name, other_field_id=before_field_id, op=MoveOperation.Before))
488+
self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=before_field_id, op=_MoveOperation.Before))
489489

490490
return self
491491

@@ -514,7 +514,7 @@ def move_after(self, path: Union[str, Tuple[str, ...]], after_name: Union[str, T
514514
if field_id == after_field_id:
515515
raise ValueError(f"Cannot move {full_name} after itself")
516516

517-
self._move(Move(field_id=field_id, full_name=full_name, other_field_id=after_field_id, op=MoveOperation.After))
517+
self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=after_field_id, op=_MoveOperation.After))
518518

519519
return self
520520

@@ -592,10 +592,14 @@ class _ApplyChanges(SchemaVisitor[Optional[IcebergType]]):
592592
_adds: Dict[int, List[NestedField]]
593593
_updates: Dict[int, NestedField]
594594
_deletes: Set[int]
595-
_moves: Dict[int, List[Move]]
595+
_moves: Dict[int, List[_Move]]
596596

597597
def __init__(
598-
self, adds: Dict[int, List[NestedField]], updates: Dict[int, NestedField], deletes: Set[int], moves: Dict[int, List[Move]]
598+
self,
599+
adds: Dict[int, List[NestedField]],
600+
updates: Dict[int, NestedField],
601+
deletes: Set[int],
602+
moves: Dict[int, List[_Move]],
599603
) -> None:
600604
self._adds = adds
601605
self._updates = updates
@@ -715,7 +719,7 @@ def primitive(self, primitive: PrimitiveType) -> Optional[IcebergType]:
715719
return primitive
716720

717721

718-
class UnionByNameVisitor(SchemaWithPartnerVisitor[int, bool]):
722+
class _UnionByNameVisitor(SchemaWithPartnerVisitor[int, bool]):
719723
update_schema: UpdateSchema
720724
existing_schema: Schema
721725
case_sensitive: bool
@@ -873,20 +877,20 @@ def _add_fields(fields: Tuple[NestedField, ...], adds: Optional[List[NestedField
873877
return fields + tuple(adds)
874878

875879

876-
def _move_fields(fields: Tuple[NestedField, ...], moves: List[Move]) -> Tuple[NestedField, ...]:
880+
def _move_fields(fields: Tuple[NestedField, ...], moves: List[_Move]) -> Tuple[NestedField, ...]:
877881
reordered = list(copy(fields))
878882
for move in moves:
879883
# Find the field that we're about to move
880884
field = next(field for field in reordered if field.field_id == move.field_id)
881885
# Remove the field that we're about to move from the list
882886
reordered = [field for field in reordered if field.field_id != move.field_id]
883887

884-
if move.op == MoveOperation.First:
888+
if move.op == _MoveOperation.First:
885889
reordered = [field] + reordered
886-
elif move.op == MoveOperation.Before or move.op == MoveOperation.After:
890+
elif move.op == _MoveOperation.Before or move.op == _MoveOperation.After:
887891
other_field_id = move.other_field_id
888892
other_field_pos = next(i for i, field in enumerate(reordered) if field.field_id == other_field_id)
889-
if move.op == MoveOperation.Before:
893+
if move.op == _MoveOperation.Before:
890894
reordered.insert(other_field_pos, field)
891895
else:
892896
reordered.insert(other_field_pos + 1, field)
@@ -897,7 +901,7 @@ def _move_fields(fields: Tuple[NestedField, ...], moves: List[Move]) -> Tuple[Ne
897901

898902

899903
def _add_and_move_fields(
900-
fields: Tuple[NestedField, ...], adds: List[NestedField], moves: List[Move]
904+
fields: Tuple[NestedField, ...], adds: List[NestedField], moves: List[_Move]
901905
) -> Optional[Tuple[NestedField, ...]]:
902906
if len(adds) > 0:
903907
# always apply adds first so that added fields can be moved

pyiceberg/table/update/snapshot.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def fetch_manifest_entry(self, manifest: ManifestFile, discard_deleted: bool = T
307307
return manifest.fetch_manifest_entry(io=self._io, discard_deleted=discard_deleted)
308308

309309

310-
class DeleteFiles(_SnapshotProducer["DeleteFiles"]):
310+
class _DeleteFiles(_SnapshotProducer["_DeleteFiles"]):
311311
"""Will delete manifest entries from the current snapshot based on the predicate.
312312
313313
This will produce a DELETE snapshot:
@@ -443,7 +443,7 @@ def files_affected(self) -> bool:
443443
return len(self._deleted_entries()) > 0
444444

445445

446-
class FastAppendFiles(_SnapshotProducer["FastAppendFiles"]):
446+
class _FastAppendFiles(_SnapshotProducer["_FastAppendFiles"]):
447447
def _existing_manifests(self) -> List[ManifestFile]:
448448
"""To determine if there are any existing manifest files.
449449
@@ -472,7 +472,7 @@ def _deleted_entries(self) -> List[ManifestEntry]:
472472
return []
473473

474474

475-
class MergeAppendFiles(FastAppendFiles):
475+
class _MergeAppendFiles(_FastAppendFiles):
476476
_target_size_bytes: int
477477
_min_count_to_merge: int
478478
_merge_enabled: bool
@@ -507,7 +507,7 @@ def __init__(
507507
def _process_manifests(self, manifests: List[ManifestFile]) -> List[ManifestFile]:
508508
"""To perform any post-processing on the manifests before writing them to the new snapshot.
509509
510-
In MergeAppendFiles, we merge manifests based on the target size and the minimum count to merge
510+
In _MergeAppendFiles, we merge manifests based on the target size and the minimum count to merge
511511
if automatic merge is enabled.
512512
"""
513513
unmerged_data_manifests = [manifest for manifest in manifests if manifest.content == ManifestContent.DATA]
@@ -523,7 +523,7 @@ def _process_manifests(self, manifests: List[ManifestFile]) -> List[ManifestFile
523523
return data_manifest_merge_manager.merge_manifests(unmerged_data_manifests) + unmerged_deletes_manifests
524524

525525

526-
class OverwriteFiles(_SnapshotProducer["OverwriteFiles"]):
526+
class _OverwriteFiles(_SnapshotProducer["_OverwriteFiles"]):
527527
"""Overwrites data from the table. This will produce an OVERWRITE snapshot.
528528
529529
Data and delete files were added and removed in a logical overwrite operation.
@@ -610,18 +610,18 @@ def __init__(self, transaction: Transaction, io: FileIO, snapshot_properties: Di
610610
self._io = io
611611
self._snapshot_properties = snapshot_properties
612612

613-
def fast_append(self) -> FastAppendFiles:
614-
return FastAppendFiles(
613+
def fast_append(self) -> _FastAppendFiles:
614+
return _FastAppendFiles(
615615
operation=Operation.APPEND, transaction=self._transaction, io=self._io, snapshot_properties=self._snapshot_properties
616616
)
617617

618-
def merge_append(self) -> MergeAppendFiles:
619-
return MergeAppendFiles(
618+
def merge_append(self) -> _MergeAppendFiles:
619+
return _MergeAppendFiles(
620620
operation=Operation.APPEND, transaction=self._transaction, io=self._io, snapshot_properties=self._snapshot_properties
621621
)
622622

623-
def overwrite(self, commit_uuid: Optional[uuid.UUID] = None) -> OverwriteFiles:
624-
return OverwriteFiles(
623+
def overwrite(self, commit_uuid: Optional[uuid.UUID] = None) -> _OverwriteFiles:
624+
return _OverwriteFiles(
625625
commit_uuid=commit_uuid,
626626
operation=Operation.OVERWRITE
627627
if self._transaction.table_metadata.current_snapshot() is not None
@@ -631,8 +631,8 @@ def overwrite(self, commit_uuid: Optional[uuid.UUID] = None) -> OverwriteFiles:
631631
snapshot_properties=self._snapshot_properties,
632632
)
633633

634-
def delete(self) -> DeleteFiles:
635-
return DeleteFiles(
634+
def delete(self) -> _DeleteFiles:
635+
return _DeleteFiles(
636636
operation=Operation.DELETE,
637637
transaction=self._transaction,
638638
io=self._io,

tests/table/test_init.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,3 +1243,18 @@ def test_update_metadata_log_overflow(table_v2: Table) -> None:
12431243
table_v2.metadata_location,
12441244
)
12451245
assert len(new_metadata.metadata_log) == 1
1246+
1247+
1248+
def test_table_module_refactoring_backward_compatibility() -> None:
1249+
# TODO: Remove this in 0.9.0
1250+
try:
1251+
from pyiceberg.table import ( # noqa: F401
1252+
DeleteFiles,
1253+
FastAppendFiles,
1254+
MergeAppendFiles,
1255+
Move,
1256+
MoveOperation,
1257+
OverwriteFiles,
1258+
)
1259+
except Exception as exc:
1260+
raise pytest.fail("Importing moved modules should not raise an exception") from exc

0 commit comments

Comments
 (0)