Skip to content

Commit 9a6a9a1

Browse files
authored
PyArrow: Pass in null-mask (#1264)
* PyArrow: Pass in null-mask * Add missing flag
1 parent 58a7be3 commit 9a6a9a1

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1807,7 +1807,11 @@ def struct(
18071807
else:
18081808
raise ResolveError(f"Field is required, and could not be found in the file: {field}")
18091809

1810-
return pa.StructArray.from_arrays(arrays=field_arrays, fields=pa.struct(fields))
1810+
return pa.StructArray.from_arrays(
1811+
arrays=field_arrays,
1812+
fields=pa.struct(fields),
1813+
mask=struct_array.is_null() if isinstance(struct_array, pa.StructArray) else None,
1814+
)
18111815

18121816
def field(self, field: NestedField, _: Optional[pa.Array], field_array: Optional[pa.Array]) -> Optional[pa.Array]:
18131817
return field_array

tests/integration/test_writes/test_writes.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,47 @@ def test_rewrite_manifest_after_partition_evolution(session_catalog: Catalog) ->
14501450
)
14511451

14521452

1453+
@pytest.mark.integration
1454+
def test_writing_null_structs(session_catalog: Catalog) -> None:
1455+
import pyarrow as pa
1456+
1457+
schema = pa.schema([
1458+
pa.field(
1459+
"struct_field_1",
1460+
pa.struct([
1461+
pa.field("string_nested_1", pa.string()),
1462+
pa.field("int_item_2", pa.int32()),
1463+
pa.field("float_item_2", pa.float32()),
1464+
]),
1465+
),
1466+
])
1467+
1468+
records = [
1469+
{
1470+
"struct_field_1": {
1471+
"string_nested_1": "nest_1",
1472+
"int_item_2": 1234,
1473+
"float_item_2": 1.234,
1474+
},
1475+
},
1476+
{},
1477+
]
1478+
1479+
try:
1480+
session_catalog.drop_table(
1481+
identifier="default.test_writing_null_structs",
1482+
)
1483+
except NoSuchTableError:
1484+
pass
1485+
1486+
table = session_catalog.create_table("default.test_writing_null_structs", schema)
1487+
1488+
pyarrow_table: pa.Table = pa.Table.from_pylist(records, schema=schema)
1489+
table.append(pyarrow_table)
1490+
1491+
assert pyarrow_table.to_pandas()["struct_field_1"].tolist() == table.scan().to_pandas()["struct_field_1"].tolist()
1492+
1493+
14531494
@pytest.mark.integration
14541495
@pytest.mark.parametrize("format_version", [1, 2])
14551496
def test_abort_table_transaction_on_exception(

0 commit comments

Comments
 (0)