Skip to content

Commit 92a29e8

Browse files
committed
partition field names validation against schema field conflicts
1 parent 24b12dd commit 92a29e8

File tree

2 files changed

+43
-6
lines changed

2 files changed

+43
-6
lines changed

pyiceberg/table/update/spec.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,16 +174,21 @@ def _commit(self) -> UpdatesAndRequirements:
174174
return updates, requirements
175175

176176
def _apply(self) -> PartitionSpec:
177-
def _check_and_add_partition_name(schema: Schema, name: str, source_id: int, partition_names: Set[str]) -> None:
177+
def _check_and_add_partition_name(
178+
schema: Schema, name: str, source_id: int, transform: Transform[Any, Any], partition_names: Set[str]
179+
) -> None:
178180
try:
179181
field = schema.find_field(name)
180182
except ValueError:
181183
field = None
182184

183-
if source_id is not None and field is not None and field.field_id != source_id:
184-
raise ValueError(f"Cannot create identity partition from a different field in the schema {name}")
185-
elif field is not None and source_id != field.field_id:
186-
raise ValueError(f"Cannot create partition from name that exists in schema {name}")
185+
if field is not None:
186+
if isinstance(transform, (IdentityTransform, VoidTransform)):
187+
# For identity transforms allow name conflict only if sourced from the same schema field
188+
if field.field_id != source_id:
189+
raise ValueError(f"Cannot create identity partition from a different field in the schema: {name}")
190+
else:
191+
raise ValueError(f"Cannot create partition from name that exists in schema: {name}")
187192
if not name:
188193
raise ValueError("Undefined name")
189194
if name in partition_names:
@@ -193,7 +198,7 @@ def _check_and_add_partition_name(schema: Schema, name: str, source_id: int, par
193198
def _add_new_field(
194199
schema: Schema, source_id: int, field_id: int, name: str, transform: Transform[Any, Any], partition_names: Set[str]
195200
) -> PartitionField:
196-
_check_and_add_partition_name(schema, name, source_id, partition_names)
201+
_check_and_add_partition_name(schema, name, source_id, transform, partition_names)
197202
return PartitionField(source_id, field_id, transform, name)
198203

199204
partition_fields = []
@@ -244,6 +249,13 @@ def _add_new_field(
244249
partition_fields.append(new_field)
245250

246251
for added_field in self._adds:
252+
_check_and_add_partition_name(
253+
self._transaction.table_metadata.schema(),
254+
added_field.name,
255+
added_field.source_id,
256+
added_field.transform,
257+
partition_names,
258+
)
247259
new_field = PartitionField(
248260
source_id=added_field.source_id,
249261
field_id=added_field.field_id,

tests/integration/test_partition_evolution.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,3 +564,28 @@ def _validate_new_partition_fields(
564564
assert len(spec.fields) == len(expected_partition_fields)
565565
for i in range(len(spec.fields)):
566566
assert spec.fields[i] == expected_partition_fields[i]
567+
568+
569+
@pytest.mark.integration
570+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
571+
def test_partition_schema_field_name_conflict(catalog: Catalog) -> None:
572+
schema = Schema(
573+
NestedField(1, "id", LongType(), required=False),
574+
NestedField(2, "event_ts", TimestampType(), required=False),
575+
NestedField(3, "another_ts", TimestampType(), required=False),
576+
NestedField(4, "str", StringType(), required=False),
577+
)
578+
table = _create_table_with_schema(catalog, schema, "2")
579+
580+
with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: another_ts"):
581+
table.update_spec().add_field("event_ts", YearTransform(), "another_ts").commit()
582+
with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: id"):
583+
table.update_spec().add_field("event_ts", DayTransform(), "id").commit()
584+
585+
with pytest.raises(ValueError, match="Cannot create identity partition from a different field in the schema: another_ts"):
586+
table.update_spec().add_field("event_ts", IdentityTransform(), "another_ts").commit()
587+
with pytest.raises(ValueError, match="Cannot create identity partition from a different field in the schema: str"):
588+
table.update_spec().add_field("id", IdentityTransform(), "str").commit()
589+
590+
table.update_spec().add_field("id", IdentityTransform(), "id").commit()
591+
table.update_spec().add_field("event_ts", YearTransform(), "event_year").commit()

0 commit comments

Comments
 (0)