Skip to content

Commit 3855f64

Browse files
authored
Core: Improve error for null/unknown schema types in table creation (apache#2843)
What changed - Add a client-side validation error when a PyArrow `null` type is used with format version < 3, including the field path and a hint to use a concrete type or format-version 3. - Add a unit test to verify the clear error message when converting a PyArrow schema with a null field. Why - Prevents misleading REST errors like “Cannot parse type string… unknown” and points directly to the offending field. Testing - make lint - uv run pytest tests/catalog/test_base.py -k rejects_null_type -v - make test (timed out at ~42%) Closes apache#2539 --------- Co-authored-by: Soham <010Soham@users.noreply.github.com>
1 parent 85c9cb6 commit 3855f64

File tree

2 files changed

+17
-0
lines changed

2 files changed

+17
-0
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,6 +1435,13 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
14351435
elif pa.types.is_null(primitive):
14361436
# PyArrow null type (pa.null()) is converted to Iceberg UnknownType
14371437
# UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec
1438+
if self._format_version < 3:
1439+
field_path = ".".join(self._field_names) if self._field_names else "<root>"
1440+
raise ValueError(
1441+
"Null type (pa.null()) is not supported in Iceberg format version "
1442+
f"{self._format_version}. Field: {field_path}. "
1443+
"Requires format-version=3+ or use a concrete type (string, int, boolean, etc.)."
1444+
)
14381445
return UnknownType()
14391446
elif isinstance(primitive, pa.UuidType):
14401447
return UUIDType()

tests/catalog/test_base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,16 @@ def test_convert_schema_if_needed(
210210
assert expected == catalog._convert_schema_if_needed(schema)
211211

212212

213+
def test_convert_schema_if_needed_rejects_null_type(catalog: InMemoryCatalog) -> None:
214+
schema = pa.schema([pa.field("n1", pa.null())])
215+
with pytest.raises(ValueError) as exc_info:
216+
catalog._convert_schema_if_needed(schema)
217+
message = str(exc_info.value)
218+
assert "Null type" in message
219+
assert "n1" in message
220+
assert "format-version=3" in message
221+
222+
213223
def test_create_table_pyarrow_schema(catalog: InMemoryCatalog, pyarrow_schema_simple_without_ids: pa.Schema) -> None:
214224
catalog.create_namespace(TEST_TABLE_NAMESPACE)
215225
table = catalog.create_table(

0 commit comments

Comments
 (0)