Skip to content

Commit eb00834

Browse files
authored
Add RemoveSchemasUpdate event (#2200)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change This is the last non-view related TableMetadata update event missing from Java. It allows users to remove schemas, as long as they exist and aren't the default. # Are these changes tested? Added unit tests. # Are there any user-facing changes? - Adds RemoveSchemasUpdate event. <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent a8df020 commit eb00834

File tree

2 files changed

+53
-0
lines changed

2 files changed

+53
-0
lines changed

pyiceberg/table/update/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@ class RemoveStatisticsUpdate(IcebergBaseModel):
193193
snapshot_id: int = Field(alias="snapshot-id")
194194

195195

196+
class RemoveSchemasUpdate(IcebergBaseModel):
197+
action: Literal["remove-schemas"] = Field(default="remove-schemas")
198+
schema_ids: List[int] = Field(alias="schema-ids")
199+
200+
196201
class SetPartitionStatisticsUpdate(IcebergBaseModel):
197202
action: Literal["set-partition-statistics"] = Field(default="set-partition-statistics")
198203
partition_statistics: PartitionStatisticsFile
@@ -222,6 +227,7 @@ class RemovePartitionStatisticsUpdate(IcebergBaseModel):
222227
RemovePropertiesUpdate,
223228
SetStatisticsUpdate,
224229
RemoveStatisticsUpdate,
230+
RemoveSchemasUpdate,
225231
SetPartitionStatisticsUpdate,
226232
RemovePartitionStatisticsUpdate,
227233
],
@@ -589,6 +595,23 @@ def _(update: RemoveStatisticsUpdate, base_metadata: TableMetadata, context: _Ta
589595
return base_metadata.model_copy(update={"statistics": statistics})
590596

591597

598+
@_apply_table_update.register(RemoveSchemasUpdate)
599+
def _(update: RemoveSchemasUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
600+
# This method should error if any schemas do not exist.
601+
# It should error if the default schema is being removed.
602+
# Otherwise, remove the schemas listed in update.schema_ids.
603+
for remove_schema_id in update.schema_ids:
604+
if not any(schema.schema_id == remove_schema_id for schema in base_metadata.schemas):
605+
raise ValueError(f"Schema with schema id {remove_schema_id} does not exist")
606+
if base_metadata.current_schema_id == remove_schema_id:
607+
raise ValueError(f"Cannot remove current schema with id {remove_schema_id}")
608+
609+
schemas = [schema for schema in base_metadata.schemas if schema.schema_id not in update.schema_ids]
610+
context.add_update(update)
611+
612+
return base_metadata.model_copy(update={"schemas": schemas})
613+
614+
592615
@_apply_table_update.register(SetPartitionStatisticsUpdate)
593616
def _(update: SetPartitionStatisticsUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
594617
partition_statistics = filter_statistics_by_snapshot_id(

tests/table/test_init.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
AssertTableUUID,
7979
RemovePartitionStatisticsUpdate,
8080
RemovePropertiesUpdate,
81+
RemoveSchemasUpdate,
8182
RemoveSnapshotRefUpdate,
8283
RemoveSnapshotsUpdate,
8384
RemoveStatisticsUpdate,
@@ -1286,6 +1287,35 @@ def test_update_metadata_log_overflow(table_v2: Table) -> None:
12861287
assert len(new_metadata.metadata_log) == 1
12871288

12881289

1290+
def test_remove_schemas_update(table_v2: Table) -> None:
1291+
base_metadata = table_v2.metadata
1292+
assert len(base_metadata.schemas) == 2
1293+
1294+
update = RemoveSchemasUpdate(schema_ids=[0])
1295+
updated_metadata = update_table_metadata(
1296+
base_metadata,
1297+
(update,),
1298+
)
1299+
1300+
assert len(updated_metadata.schemas) == 1
1301+
1302+
1303+
def test_remove_schemas_update_schema_does_not_exist(table_v2: Table) -> None:
1304+
update = RemoveSchemasUpdate(
1305+
schema_ids=[123],
1306+
)
1307+
with pytest.raises(ValueError, match="Schema with schema id 123 does not exist"):
1308+
update_table_metadata(table_v2.metadata, (update,))
1309+
1310+
1311+
def test_remove_schemas_update_current_schema(table_v2: Table) -> None:
1312+
update = RemoveSchemasUpdate(
1313+
schema_ids=[1],
1314+
)
1315+
with pytest.raises(ValueError, match="Cannot remove current schema with id 1"):
1316+
update_table_metadata(table_v2.metadata, (update,))
1317+
1318+
12891319
def test_set_statistics_update(table_v2_with_statistics: Table) -> None:
12901320
snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id
12911321

0 commit comments

Comments
 (0)