Skip to content

Commit 289f9f6

Browse files
author
Tom McCormick
committed
read/write and tests
1 parent 76a8e3e commit 289f9f6

File tree

7 files changed

+324
-56
lines changed

7 files changed

+324
-56
lines changed

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13.0

poetry.lock

Lines changed: 50 additions & 50 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/table/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,13 +1049,22 @@ def __init__(
10491049
catalog: Catalog,
10501050
config: Dict[str, str] = EMPTY_DICT,
10511051
) -> None:
1052+
self._validate_table_metadata(metadata)
10521053
self._identifier = identifier
10531054
self.metadata = metadata
10541055
self.metadata_location = metadata_location
10551056
self.io = io
10561057
self.catalog = catalog
10571058
self.config = config
10581059

1060+
def _validate_table_metadata(self, table_metadata: TableMetadata) -> None:
1061+
from pyiceberg.manifest import FileFormat
1062+
file_format = FileFormat(table_metadata.properties.get(
1063+
TableProperties.WRITE_FILE_FORMAT,
1064+
TableProperties.WRITE_FILE_FORMAT_DEFAULT))
1065+
if file_format not in (FileFormat.PARQUET, FileFormat.ORC):
1066+
raise ValueError(f"Unsupported file format: {file_format}")
1067+
10591068
def transaction(self) -> Transaction:
10601069
"""Create a new transaction object to first stage the changes, and then commit them to the catalog.
10611070

tests/conftest.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,30 @@ def example_task(data_file: str) -> FileScanTask:
23522352
)
23532353

23542354

2355+
@pytest.fixture
2356+
def data_file_orc(table_schema_simple: Schema, tmp_path: str) -> str:
2357+
import pyarrow as pa
2358+
import pyarrow.orc as orc
2359+
2360+
from pyiceberg.io.pyarrow import schema_to_pyarrow
2361+
2362+
table = pa.table(
2363+
{"foo": ["a", "b", "c"], "bar": [1, 2, 3], "baz": [True, False, None]},
2364+
schema=schema_to_pyarrow(table_schema_simple),
2365+
)
2366+
2367+
file_path = f"{tmp_path}/0000-data.orc"
2368+
orc.write_table(table=table, where=file_path)
2369+
return file_path
2370+
2371+
2372+
@pytest.fixture
2373+
def example_task_orc(data_file_orc: str) -> FileScanTask:
2374+
return FileScanTask(
2375+
data_file=DataFile.from_args(file_path=data_file_orc, file_format=FileFormat.ORC, file_size_in_bytes=1925),
2376+
)
2377+
2378+
23552379
@pytest.fixture(scope="session")
23562380
def warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path:
23572381
return tmp_path_factory.mktemp("test_sql")
@@ -2380,6 +2404,20 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
23802404
catalog=NoopCatalog("NoopCatalog"),
23812405
)
23822406

2407+
@pytest.fixture
2408+
def table_v2_orc(example_table_metadata_v2: Dict[str, Any]) -> Table:
2409+
if not example_table_metadata_v2["properties"]:
2410+
example_table_metadata_v2["properties"] = {}
2411+
example_table_metadata_v2["properties"]["write.format.default"] = "ORC"
2412+
table_metadata = TableMetadataV2(**example_table_metadata_v2)
2413+
return Table(
2414+
identifier=("database", "table_orc"),
2415+
metadata=table_metadata,
2416+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2417+
io=load_file_io(),
2418+
catalog=NoopCatalog("NoopCatalog"),
2419+
)
2420+
23832421

23842422
@pytest.fixture
23852423
def table_v2_with_fixed_and_decimal_types(

0 commit comments

Comments
 (0)