Skip to content

Commit 7fc9705

Browse files
author
Tom McCormick
committed
read/write and tests
1 parent 1039cda commit 7fc9705

File tree

7 files changed

+300
-32
lines changed

7 files changed

+300
-32
lines changed

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13.0

poetry.lock

Lines changed: 26 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/table/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,13 +1053,22 @@ def __init__(
10531053
catalog: Catalog,
10541054
config: Dict[str, str] = EMPTY_DICT,
10551055
) -> None:
1056+
self._validate_table_metadata(metadata)
10561057
self._identifier = identifier
10571058
self.metadata = metadata
10581059
self.metadata_location = metadata_location
10591060
self.io = io
10601061
self.catalog = catalog
10611062
self.config = config
10621063

1064+
def _validate_table_metadata(self, table_metadata: TableMetadata) -> None:
1065+
from pyiceberg.manifest import FileFormat
1066+
file_format = FileFormat(table_metadata.properties.get(
1067+
TableProperties.WRITE_FILE_FORMAT,
1068+
TableProperties.WRITE_FILE_FORMAT_DEFAULT))
1069+
if file_format not in (FileFormat.PARQUET, FileFormat.ORC):
1070+
raise ValueError(f"Unsupported file format: {file_format}")
1071+
10631072
def transaction(self) -> Transaction:
10641073
"""Create a new transaction object to first stage the changes, and then commit them to the catalog.
10651074

tests/conftest.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,6 +2382,30 @@ def example_task(data_file: str) -> FileScanTask:
23822382
)
23832383

23842384

2385+
@pytest.fixture
2386+
def data_file_orc(table_schema_simple: Schema, tmp_path: str) -> str:
2387+
import pyarrow as pa
2388+
import pyarrow.orc as orc
2389+
2390+
from pyiceberg.io.pyarrow import schema_to_pyarrow
2391+
2392+
table = pa.table(
2393+
{"foo": ["a", "b", "c"], "bar": [1, 2, 3], "baz": [True, False, None]},
2394+
schema=schema_to_pyarrow(table_schema_simple),
2395+
)
2396+
2397+
file_path = f"{tmp_path}/0000-data.orc"
2398+
orc.write_table(table=table, where=file_path)
2399+
return file_path
2400+
2401+
2402+
@pytest.fixture
2403+
def example_task_orc(data_file_orc: str) -> FileScanTask:
2404+
return FileScanTask(
2405+
data_file=DataFile.from_args(file_path=data_file_orc, file_format=FileFormat.ORC, file_size_in_bytes=1925),
2406+
)
2407+
2408+
23852409
@pytest.fixture(scope="session")
23862410
def warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path:
23872411
return tmp_path_factory.mktemp("test_sql")
@@ -2410,6 +2434,20 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
24102434
catalog=NoopCatalog("NoopCatalog"),
24112435
)
24122436

2437+
@pytest.fixture
2438+
def table_v2_orc(example_table_metadata_v2: Dict[str, Any]) -> Table:
2439+
if not example_table_metadata_v2["properties"]:
2440+
example_table_metadata_v2["properties"] = {}
2441+
example_table_metadata_v2["properties"]["write.format.default"] = "ORC"
2442+
table_metadata = TableMetadataV2(**example_table_metadata_v2)
2443+
return Table(
2444+
identifier=("database", "table_orc"),
2445+
metadata=table_metadata,
2446+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2447+
io=load_file_io(),
2448+
catalog=NoopCatalog("NoopCatalog"),
2449+
)
2450+
24132451

24142452
@pytest.fixture
24152453
def table_v2_with_fixed_and_decimal_types(

0 commit comments

Comments
 (0)