108
108
)
109
109
from pyiceberg .table .name_mapping import (
110
110
NameMapping ,
111
- parse_mapping_from_json ,
112
111
update_mapping ,
113
112
)
114
113
from pyiceberg .table .refs import MAIN_BRANCH , SnapshotRef
@@ -1220,7 +1219,8 @@ def scan(
1220
1219
limit : Optional [int ] = None ,
1221
1220
) -> DataScan :
1222
1221
return DataScan (
1223
- table = self ,
1222
+ table_metadata = self .metadata ,
1223
+ io = self .io ,
1224
1224
row_filter = row_filter ,
1225
1225
selected_fields = selected_fields ,
1226
1226
case_sensitive = case_sensitive ,
@@ -1317,10 +1317,7 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
1317
1317
1318
1318
def name_mapping (self ) -> Optional [NameMapping ]:
1319
1319
"""Return the table's field-id NameMapping."""
1320
- if name_mapping_json := self .properties .get (TableProperties .DEFAULT_NAME_MAPPING ):
1321
- return parse_mapping_from_json (name_mapping_json )
1322
- else :
1323
- return None
1320
+ return self .metadata .name_mapping ()
1324
1321
1325
1322
def append (self , df : pa .Table , snapshot_properties : Dict [str , str ] = EMPTY_DICT ) -> None :
1326
1323
"""
@@ -1513,10 +1510,11 @@ def projection(self) -> Schema:
1513
1510
snapshot = self .table_metadata .snapshot_by_id (self .snapshot_id )
1514
1511
if snapshot is not None :
1515
1512
if snapshot .schema_id is not None :
1516
- snapshot_schema = self .table_metadata .schemas ().get (snapshot .schema_id )
1517
- if snapshot_schema is not None :
1518
- current_schema = snapshot_schema
1519
- else :
1513
+ try :
1514
+ current_schema = next (
1515
+ schema for schema in self .table_metadata .schemas if schema .schema_id == snapshot .schema_id
1516
+ )
1517
+ except StopIteration :
1520
1518
warnings .warn (f"Metadata does not contain schema with id: { snapshot .schema_id } " )
1521
1519
else :
1522
1520
raise ValueError (f"Snapshot not found: { self .snapshot_id } " )
@@ -1542,7 +1540,7 @@ def update(self: S, **overrides: Any) -> S:
1542
1540
def use_ref (self : S , name : str ) -> S :
1543
1541
if self .snapshot_id :
1544
1542
raise ValueError (f"Cannot override ref, already set snapshot id={ self .snapshot_id } " )
1545
- if snapshot := self .table .snapshot_by_name (name ):
1543
+ if snapshot := self .table_metadata .snapshot_by_name (name ):
1546
1544
return self .update (snapshot_id = snapshot .snapshot_id )
1547
1545
1548
1546
raise ValueError (f"Cannot scan unknown ref={ name } " )
@@ -1636,20 +1634,20 @@ def _match_deletes_to_data_file(data_entry: ManifestEntry, positional_delete_ent
1636
1634
class DataScan (TableScan ):
1637
1635
1638
1636
def _build_partition_projection (self , spec_id : int ) -> BooleanExpression :
1639
- project = inclusive_projection (self .table .schema (), self .table .specs ()[spec_id ])
1637
+ project = inclusive_projection (self .table_metadata .schema (), self .table_metadata .specs ()[spec_id ])
1640
1638
return project (self .row_filter )
1641
1639
1642
1640
@cached_property
1643
1641
def partition_filters (self ) -> KeyDefaultDict [int , BooleanExpression ]:
1644
1642
return KeyDefaultDict (self ._build_partition_projection )
1645
1643
1646
1644
def _build_manifest_evaluator (self , spec_id : int ) -> Callable [[ManifestFile ], bool ]:
1647
- spec = self .table .specs ()[spec_id ]
1648
- return manifest_evaluator (spec , self .table .schema (), self .partition_filters [spec_id ], self .case_sensitive )
1645
+ spec = self .table_metadata .specs ()[spec_id ]
1646
+ return manifest_evaluator (spec , self .table_metadata .schema (), self .partition_filters [spec_id ], self .case_sensitive )
1649
1647
1650
1648
def _build_partition_evaluator (self , spec_id : int ) -> Callable [[DataFile ], bool ]:
1651
- spec = self .table .specs ()[spec_id ]
1652
- partition_type = spec .partition_type (self .table .schema ())
1649
+ spec = self .table_metadata .specs ()[spec_id ]
1650
+ partition_type = spec .partition_type (self .table_metadata .schema ())
1653
1651
partition_schema = Schema (* partition_type .fields )
1654
1652
partition_expr = self .partition_filters [spec_id ]
1655
1653
@@ -1684,16 +1682,14 @@ def plan_files(self) -> Iterable[FileScanTask]:
1684
1682
if not snapshot :
1685
1683
return iter ([])
1686
1684
1687
- io = self .table .io
1688
-
1689
1685
# step 1: filter manifests using partition summaries
1690
1686
# the filter depends on the partition spec used to write the manifest file, so create a cache of filters for each spec id
1691
1687
1692
1688
manifest_evaluators : Dict [int , Callable [[ManifestFile ], bool ]] = KeyDefaultDict (self ._build_manifest_evaluator )
1693
1689
1694
1690
manifests = [
1695
1691
manifest_file
1696
- for manifest_file in snapshot .manifests (io )
1692
+ for manifest_file in snapshot .manifests (self . io )
1697
1693
if manifest_evaluators [manifest_file .partition_spec_id ](manifest_file )
1698
1694
]
1699
1695
@@ -1702,7 +1698,7 @@ def plan_files(self) -> Iterable[FileScanTask]:
1702
1698
1703
1699
partition_evaluators : Dict [int , Callable [[DataFile ], bool ]] = KeyDefaultDict (self ._build_partition_evaluator )
1704
1700
metrics_evaluator = _InclusiveMetricsEvaluator (
1705
- self .table .schema (), self .row_filter , self .case_sensitive , self .options .get ("include_empty_files" ) == "true"
1701
+ self .table_metadata .schema (), self .row_filter , self .case_sensitive , self .options .get ("include_empty_files" ) == "true"
1706
1702
).eval
1707
1703
1708
1704
min_data_sequence_number = _min_data_file_sequence_number (manifests )
@@ -1716,7 +1712,7 @@ def plan_files(self) -> Iterable[FileScanTask]:
1716
1712
lambda args : _open_manifest (* args ),
1717
1713
[
1718
1714
(
1719
- io ,
1715
+ self . io ,
1720
1716
manifest ,
1721
1717
partition_evaluators [manifest .partition_spec_id ],
1722
1718
metrics_evaluator ,
@@ -1752,7 +1748,8 @@ def to_arrow(self) -> pa.Table:
1752
1748
1753
1749
return project_table (
1754
1750
self .plan_files (),
1755
- self .table ,
1751
+ self .table_metadata ,
1752
+ self .io ,
1756
1753
self .row_filter ,
1757
1754
self .projection (),
1758
1755
case_sensitive = self .case_sensitive ,
0 commit comments