Skip to content

Commit f8ccd82

Browse files
authored
Hiv: Fix renaming to a table that already exists (#2336)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Hive catalog returned a `NoSuchNamespaceError` when it tried to rename to a table that already exists in the catalog. This change will first check for the destination table existence before executing the alter table. # Are these changes tested? Yes, added one unit test and one integration test # Are there any user-facing changes? Yes? Users using the hive catalog will get a new exception for this edge case. <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent cc642b7 commit f8ccd82

File tree

4 files changed

+52
-1
lines changed

4 files changed

+52
-1
lines changed

mkdocs/docs/api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,7 @@ Expert Iceberg users may choose to commit existing parquet files to the Iceberg
10311031
### Example
10321032

10331033
Add files to Iceberg table:
1034+
10341035
```python
10351036
# Given that these parquet files have schema consistent with the Iceberg table
10361037
@@ -1047,6 +1048,7 @@ tbl.add_files(file_paths=file_paths)
10471048
```
10481049

10491050
Add files to Iceberg table with custom snapshot properties:
1051+
10501052
```python
10511053
# Assume an existing Iceberg table object `tbl`
10521054

pyiceberg/catalog/hive.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,9 +651,14 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
651651
ValueError: When from table identifier is invalid.
652652
NoSuchTableError: When a table with the name does not exist.
653653
NoSuchNamespaceError: When the destination namespace doesn't exist.
654+
TableAlreadyExistsError: When the destination table already exists.
654655
"""
655656
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
656657
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
658+
659+
if self.table_exists(to_identifier):
660+
raise TableAlreadyExistsError(f"Table already exists: {to_table_name}")
661+
657662
try:
658663
with self._client as open_client:
659664
tbl = open_client.get_table(dbname=from_database_name, tbl_name=from_table_name)

tests/catalog/test_hive.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
NamespaceNotEmptyError,
6262
NoSuchNamespaceError,
6363
NoSuchTableError,
64+
TableAlreadyExistsError,
6465
WaitingForLockException,
6566
)
6667
from pyiceberg.partitioning import PartitionField, PartitionSpec
@@ -883,6 +884,7 @@ def test_load_table_from_self_identifier(hive_table: HiveTable) -> None:
883884

884885
def test_rename_table(hive_table: HiveTable) -> None:
885886
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
887+
catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign]
886888

887889
renamed_table = copy.deepcopy(hive_table)
888890
renamed_table.dbName = "default"
@@ -910,6 +912,7 @@ def test_rename_table(hive_table: HiveTable) -> None:
910912

911913
def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
912914
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
915+
catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign]
913916

914917
catalog._client = MagicMock()
915918
catalog._client.__enter__().get_table.return_value = hive_table
@@ -941,6 +944,7 @@ def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
941944

942945
def test_rename_table_from_does_not_exists() -> None:
943946
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
947+
catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign]
944948

945949
catalog._client = MagicMock()
946950
catalog._client.__enter__().alter_table_with_environment_context.side_effect = NoSuchObjectException(
@@ -955,6 +959,7 @@ def test_rename_table_from_does_not_exists() -> None:
955959

956960
def test_rename_table_to_namespace_does_not_exists() -> None:
957961
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
962+
catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign]
958963

959964
catalog._client = MagicMock()
960965
catalog._client.__enter__().alter_table_with_environment_context.side_effect = InvalidOperationException(
@@ -967,6 +972,16 @@ def test_rename_table_to_namespace_does_not_exists() -> None:
967972
assert "Database does not exists: default_does_not_exists" in str(exc_info.value)
968973

969974

975+
def test_rename_table_to_table_already_exists(hive_table: HiveTable) -> None:
976+
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
977+
catalog.load_table = MagicMock(return_value=hive_table) # type: ignore[method-assign]
978+
979+
with pytest.raises(TableAlreadyExistsError) as exc_info:
980+
catalog.rename_table(("default", "some_table"), ("default", "new_tabl2e"))
981+
982+
assert "Table already exists: new_tabl2e" in str(exc_info.value)
983+
984+
970985
def test_drop_database_does_not_empty() -> None:
971986
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
972987

tests/integration/test_catalog.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,19 +169,48 @@ def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_
169169
new_database_name = f"{database_name}_new"
170170
test_catalog.create_namespace(database_name)
171171
test_catalog.create_namespace(new_database_name)
172-
new_table_name = f"rename-{table_name}"
172+
173173
identifier = (database_name, table_name)
174174
table = test_catalog.create_table(identifier, table_schema_nested)
175175
assert table.name() == identifier
176+
177+
new_table_name = f"rename-{table_name}"
176178
new_identifier = (new_database_name, new_table_name)
177179
test_catalog.rename_table(identifier, new_identifier)
178180
new_table = test_catalog.load_table(new_identifier)
181+
179182
assert new_table.name() == new_identifier
180183
assert new_table.metadata_location == table.metadata_location
184+
181185
with pytest.raises(NoSuchTableError):
182186
test_catalog.load_table(identifier)
183187

184188

189+
@pytest.mark.integration
190+
@pytest.mark.parametrize("test_catalog", CATALOGS)
191+
def test_rename_table_already_exists(
192+
test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str
193+
) -> None:
194+
new_database_name = f"{database_name}_new"
195+
test_catalog.create_namespace(database_name)
196+
test_catalog.create_namespace(new_database_name)
197+
198+
identifier = (database_name, table_name)
199+
table = test_catalog.create_table(identifier, table_schema_nested)
200+
assert table.name() == identifier
201+
202+
new_table_name = f"rename-{table_name}"
203+
new_identifier = (new_database_name, new_table_name)
204+
new_table = test_catalog.create_table(new_identifier, table_schema_nested)
205+
assert new_table.name() == new_identifier
206+
207+
with pytest.raises(TableAlreadyExistsError):
208+
test_catalog.rename_table(identifier, new_identifier)
209+
210+
assert test_catalog.table_exists(identifier)
211+
assert test_catalog.table_exists(new_identifier)
212+
213+
185214
@pytest.mark.integration
186215
@pytest.mark.parametrize("test_catalog", CATALOGS)
187216
def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:

0 commit comments

Comments
 (0)