44# Distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
55
66"""
7- Iceberg Connector - Refactored Architecture
7+ Opteryx Connector - Refactored Architecture
88
99Architecture:
10- - IcebergConnector : Long-lived catalog gateway (handles catalog operations, views, introspection)
11- - IcebergTable : Transient table-specific engine (handles data reading for one table)
10+ - OpteryxConnector : Long-lived catalog gateway (handles catalog operations, views, introspection)
11+ - OpteryxTable : Transient table-specific engine (handles data reading for one table)
1212"""
1313
1414import datetime
@@ -105,7 +105,7 @@ def __init__(self, dataset: str, catalog, workspace: str, **kwargs):
105105
106106 # Call FileSystemTable.__init__ which calls BaseTable.__init__
107107 FileSystemTable .__init__ (
108- self , dataset = dataset , filesystem = filesystem , storage_type = "ICEBERG " , ** kwargs
108+ self , dataset = dataset , filesystem = filesystem , storage_type = "OPTERYX " , ** kwargs
109109 )
110110 Diachronic .__init__ (self , ** kwargs )
111111 Statistics .__init__ (self , ** kwargs )
@@ -180,7 +180,7 @@ def get_dataset_schema(self) -> RelationSchema:
180180 # Use Parquet manifest reader instead of Opteryx inspect API to avoid Avro
181181 try :
182182 import pyarrow as pa
183- from opteryx_catalof .parquet_manifest import read_parquet_manifest
183+ from opteryx_catalog .parquet_manifest import read_parquet_manifest
184184
185185 parquet_records = read_parquet_manifest (
186186 self .table .metadata ,
@@ -230,16 +230,6 @@ def get_dataset_schema(self) -> RelationSchema:
230230
231231 relation_statistics .record_count = pyarrow .compute .sum (files .column ("record_count" )).as_py ()
232232
233- if "distinct_counts" in files .columns :
234- for file in files .column ("distinct_counts" ):
235- for k , v in file :
236- relation_statistics .set_cardinality_estimate (column_names [k ], v )
237-
238- if "value_counts" in files .columns :
239- for file in files .column ("value_counts" ):
240- for k , v in file :
241- relation_statistics .add_count (column_names [k ], v )
242-
243233 self .relation_statistics = relation_statistics
244234
245235 return self .schema
@@ -250,6 +240,7 @@ def get_list_of_blob_names(self, *, prefix: str = None, predicates: list = []) -
250240 # Get the list of data files to read
251241 data_files = self .table .scan (
252242 #row_filter=pushed_filters,
243+ row_limit = self .limit ,
253244 snapshot_id = self .snapshot_id ,
254245 )
255246 return [data_file .file_path for data_file in data_files ]
@@ -461,9 +452,8 @@ def get_view(self, view_name: str):
461452 # Parse relative_id into collection and name
462453 # For "clickbench.q01": collection="clickbench", name="q01"
463454 parts = relative_id .split ("." )
464- if len (parts ) >= 2 :
465- name = parts [- 1 ]
466- collection = "." .join (parts [:- 1 ])
455+ name = parts [- 1 ]
456+ collection = "." .join (parts [:- 1 ])
467457
468458 identifier = (collection , name )
469459 view = catalog .load_view (identifier )
0 commit comments