Skip to content

Commit 3d08776

Browse files
authored
Set field-id when needed (#1867)
Fixes #1798 <!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change # Are these changes tested? # Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 77c8951 commit 3d08776

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1777,7 +1777,7 @@ def struct(
17771777
field_arrays.append(array)
17781778
fields.append(self._construct_field(field, array.type))
17791779
elif field.optional:
1780-
arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=False)
1780+
arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=self._include_field_ids)
17811781
field_arrays.append(pa.nulls(len(struct_array), type=arrow_type))
17821782
fields.append(self._construct_field(field, arrow_type))
17831783
else:

tests/integration/test_writes/test_writes.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
DateType,
5353
DoubleType,
5454
IntegerType,
55+
ListType,
5556
LongType,
5657
NestedField,
5758
StringType,
@@ -1647,3 +1648,38 @@ def test_abort_table_transaction_on_exception(
16471648

16481649
# Validate the transaction is aborted and no partial update is applied
16491650
assert len(tbl.scan().to_pandas()) == table_size # type: ignore
1651+
1652+
1653+
@pytest.mark.integration
1654+
def test_write_optional_list(session_catalog: Catalog) -> None:
1655+
identifier = "default.test_write_optional_list"
1656+
schema = Schema(
1657+
NestedField(field_id=1, name="name", field_type=StringType(), required=False),
1658+
NestedField(
1659+
field_id=3,
1660+
name="my_list",
1661+
field_type=ListType(element_id=45, element=StringType(), element_required=False),
1662+
required=False,
1663+
),
1664+
)
1665+
session_catalog.create_table_if_not_exists(identifier, schema)
1666+
1667+
df_1 = pa.Table.from_pylist(
1668+
[
1669+
{"name": "one", "my_list": ["test"]},
1670+
{"name": "another", "my_list": ["test"]},
1671+
]
1672+
)
1673+
session_catalog.load_table(identifier).append(df_1)
1674+
1675+
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 2
1676+
1677+
df_2 = pa.Table.from_pylist(
1678+
[
1679+
{"name": "one"},
1680+
{"name": "another"},
1681+
]
1682+
)
1683+
session_catalog.load_table(identifier).append(df_2)
1684+
1685+
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 4

0 commit comments

Comments
 (0)