Skip to content

Commit fb78904

Browse files
maint: common catalog integration test suite (#2090)
In pursuit of #813 --------- Co-authored-by: Kevin Liu <[email protected]>
1 parent e9c0253 commit fb78904

File tree

7 files changed

+322
-5
lines changed

7 files changed

+322
-5
lines changed

dev/docker-compose-integration.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ services:
5353
- CATALOG_WAREHOUSE=s3://warehouse/
5454
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
5555
- CATALOG_S3_ENDPOINT=http://minio:9000
56+
- CATALOG_JDBC_STRICT__MODE=true
5657
minio:
5758
image: minio/minio
5859
container_name: pyiceberg-minio

pyiceberg/catalog/hive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ def update_namespace_properties(
800800
if removals:
801801
for key in removals:
802802
if key in parameters:
803-
parameters[key] = None
803+
parameters.pop(key)
804804
removed.add(key)
805805
if updates:
806806
for key, value in updates.items():

pyiceberg/catalog/rest/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ def _create_table(
505505
try:
506506
response.raise_for_status()
507507
except HTTPError as exc:
508-
_handle_non_200_response(exc, {409: TableAlreadyExistsError})
508+
_handle_non_200_response(exc, {409: TableAlreadyExistsError, 404: NoSuchNamespaceError})
509509
return TableResponse.model_validate_json(response.text)
510510

511511
@retry(**_RETRY_ARGS)

tests/catalog/test_hive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def test_update_namespace_properties(hive_database: HiveDatabase) -> None:
11561156
name="default",
11571157
description=None,
11581158
locationUri=hive_database.locationUri,
1159-
parameters={"test": None, "label": "core"},
1159+
parameters={"label": "core"},
11601160
privileges=None,
11611161
ownerName=None,
11621162
ownerType=1,

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2324,7 +2324,7 @@ def clean_up(test_catalog: Catalog) -> None:
23242324
database_name = database_tuple[0]
23252325
if "my_iceberg_database-" in database_name:
23262326
for identifier in test_catalog.list_tables(database_name):
2327-
test_catalog.purge_table(identifier)
2327+
test_catalog.drop_table(identifier)
23282328
test_catalog.drop_namespace(database_name)
23292329

23302330

tests/integration/test_catalog.py

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from pathlib import Path, PosixPath
19+
from typing import Generator, List
20+
21+
import pytest
22+
23+
from pyiceberg.catalog import Catalog, MetastoreCatalog
24+
from pyiceberg.catalog.hive import HiveCatalog
25+
from pyiceberg.catalog.memory import InMemoryCatalog
26+
from pyiceberg.catalog.rest import RestCatalog
27+
from pyiceberg.catalog.sql import SqlCatalog
28+
from pyiceberg.exceptions import (
29+
NamespaceAlreadyExistsError,
30+
NamespaceNotEmptyError,
31+
NoSuchNamespaceError,
32+
NoSuchTableError,
33+
TableAlreadyExistsError,
34+
)
35+
from pyiceberg.io import WAREHOUSE
36+
from pyiceberg.schema import Schema
37+
from tests.conftest import clean_up
38+
39+
40+
@pytest.fixture(scope="function")
41+
def memory_catalog(tmp_path: PosixPath) -> Generator[Catalog, None, None]:
42+
test_catalog = InMemoryCatalog(
43+
"test.in_memory.catalog", **{WAREHOUSE: tmp_path.absolute().as_posix(), "test.key": "test.value"}
44+
)
45+
yield test_catalog
46+
47+
clean_up(test_catalog)
48+
49+
50+
@pytest.fixture(scope="function")
51+
def sqlite_catalog_memory(warehouse: Path) -> Generator[Catalog, None, None]:
52+
test_catalog = SqlCatalog("sqlitememory", uri="sqlite:///:memory:", warehouse=f"file://{warehouse}")
53+
54+
yield test_catalog
55+
56+
clean_up(test_catalog)
57+
58+
59+
@pytest.fixture(scope="function")
60+
def sqlite_catalog_file(warehouse: Path) -> Generator[Catalog, None, None]:
61+
test_catalog = SqlCatalog("sqlitefile", uri=f"sqlite:////{warehouse}/sql-catalog.db", warehouse=f"file://{warehouse}")
62+
63+
yield test_catalog
64+
65+
clean_up(test_catalog)
66+
67+
68+
@pytest.fixture(scope="function")
69+
def rest_catalog() -> Generator[Catalog, None, None]:
70+
test_catalog = RestCatalog("rest", uri="http://localhost:8181")
71+
72+
yield test_catalog
73+
74+
clean_up(test_catalog)
75+
76+
77+
@pytest.fixture(scope="function")
78+
def hive_catalog() -> Generator[Catalog, None, None]:
79+
test_catalog = HiveCatalog(
80+
"test_hive_catalog",
81+
**{
82+
"uri": "http://localhost:9083",
83+
"s3.endpoint": "http://localhost:9000",
84+
"s3.access-key-id": "admin",
85+
"s3.secret-access-key": "password",
86+
},
87+
)
88+
yield test_catalog
89+
clean_up(test_catalog)
90+
91+
92+
CATALOGS = [
93+
pytest.lazy_fixture("memory_catalog"),
94+
pytest.lazy_fixture("sqlite_catalog_memory"),
95+
pytest.lazy_fixture("sqlite_catalog_file"),
96+
pytest.lazy_fixture("rest_catalog"),
97+
pytest.lazy_fixture("hive_catalog"),
98+
]
99+
100+
101+
@pytest.mark.integration
102+
@pytest.mark.parametrize("test_catalog", CATALOGS)
103+
def test_create_table_with_default_location(
104+
test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str
105+
) -> None:
106+
identifier = (database_name, table_name)
107+
test_catalog.create_namespace(database_name)
108+
test_catalog.create_table(identifier, table_schema_nested)
109+
table = test_catalog.load_table(identifier)
110+
assert table.name() == identifier
111+
assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0
112+
113+
114+
@pytest.mark.integration
115+
@pytest.mark.parametrize("test_catalog", CATALOGS)
116+
def test_create_table_with_invalid_database(test_catalog: Catalog, table_schema_nested: Schema, table_name: str) -> None:
117+
identifier = ("invalid", table_name)
118+
with pytest.raises(NoSuchNamespaceError):
119+
test_catalog.create_table(identifier, table_schema_nested)
120+
121+
122+
@pytest.mark.integration
123+
@pytest.mark.parametrize("test_catalog", CATALOGS)
124+
def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
125+
test_catalog.create_namespace(database_name)
126+
test_catalog.create_table((database_name, table_name), table_schema_nested)
127+
with pytest.raises(TableAlreadyExistsError):
128+
test_catalog.create_table((database_name, table_name), table_schema_nested)
129+
130+
131+
@pytest.mark.integration
132+
@pytest.mark.parametrize("test_catalog", CATALOGS)
133+
def test_create_table_if_not_exists_duplicated_table(
134+
test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str
135+
) -> None:
136+
test_catalog.create_namespace(database_name)
137+
table1 = test_catalog.create_table((database_name, table_name), table_schema_nested)
138+
table2 = test_catalog.create_table_if_not_exists((database_name, table_name), table_schema_nested)
139+
assert table1.name() == table2.name()
140+
141+
142+
@pytest.mark.integration
143+
@pytest.mark.parametrize("test_catalog", CATALOGS)
144+
def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
145+
identifier = (database_name, table_name)
146+
test_catalog.create_namespace(database_name)
147+
table = test_catalog.create_table(identifier, table_schema_nested)
148+
loaded_table = test_catalog.load_table(identifier)
149+
assert table.name() == loaded_table.name()
150+
assert table.metadata_location == loaded_table.metadata_location
151+
assert table.metadata == loaded_table.metadata
152+
153+
154+
@pytest.mark.integration
155+
@pytest.mark.parametrize("test_catalog", CATALOGS)
156+
def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_list: List[str]) -> None:
157+
test_catalog.create_namespace(database_name)
158+
for table_name in table_list:
159+
test_catalog.create_table((database_name, table_name), table_schema_nested)
160+
identifier_list = test_catalog.list_tables(database_name)
161+
assert len(identifier_list) == len(table_list)
162+
for table_name in table_list:
163+
assert (database_name, table_name) in identifier_list
164+
165+
166+
@pytest.mark.integration
167+
@pytest.mark.parametrize("test_catalog", CATALOGS)
168+
def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
169+
new_database_name = f"{database_name}_new"
170+
test_catalog.create_namespace(database_name)
171+
test_catalog.create_namespace(new_database_name)
172+
new_table_name = f"rename-{table_name}"
173+
identifier = (database_name, table_name)
174+
table = test_catalog.create_table(identifier, table_schema_nested)
175+
assert table.name() == identifier
176+
new_identifier = (new_database_name, new_table_name)
177+
test_catalog.rename_table(identifier, new_identifier)
178+
new_table = test_catalog.load_table(new_identifier)
179+
assert new_table.name() == new_identifier
180+
assert new_table.metadata_location == table.metadata_location
181+
with pytest.raises(NoSuchTableError):
182+
test_catalog.load_table(identifier)
183+
184+
185+
@pytest.mark.integration
186+
@pytest.mark.parametrize("test_catalog", CATALOGS)
187+
def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
188+
identifier = (database_name, table_name)
189+
test_catalog.create_namespace(database_name)
190+
table = test_catalog.create_table(identifier, table_schema_nested)
191+
assert table.name() == identifier
192+
test_catalog.drop_table(identifier)
193+
with pytest.raises(NoSuchTableError):
194+
test_catalog.load_table(identifier)
195+
196+
197+
@pytest.mark.integration
198+
@pytest.mark.parametrize("test_catalog", CATALOGS)
199+
def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
200+
if isinstance(test_catalog, HiveCatalog):
201+
pytest.skip("HiveCatalog does not support purge_table operation yet")
202+
203+
identifier = (database_name, table_name)
204+
test_catalog.create_namespace(database_name)
205+
test_catalog.create_table(identifier, table_schema_nested)
206+
table = test_catalog.load_table(identifier)
207+
assert table.name() == identifier
208+
test_catalog.purge_table(identifier)
209+
with pytest.raises(NoSuchTableError):
210+
test_catalog.load_table(identifier)
211+
212+
213+
@pytest.mark.integration
214+
@pytest.mark.parametrize("test_catalog", CATALOGS)
215+
def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
216+
test_catalog.create_namespace(database_name)
217+
test_catalog.create_table((database_name, table_name), table_schema_nested)
218+
assert test_catalog.table_exists((database_name, table_name)) is True
219+
220+
221+
@pytest.mark.integration
222+
@pytest.mark.parametrize("test_catalog", CATALOGS)
223+
def test_create_namespace(test_catalog: Catalog, database_name: str) -> None:
224+
test_catalog.create_namespace(database_name)
225+
assert (database_name,) in test_catalog.list_namespaces()
226+
227+
228+
@pytest.mark.integration
229+
@pytest.mark.parametrize("test_catalog", CATALOGS)
230+
def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) -> None:
231+
test_catalog.create_namespace(database_name)
232+
with pytest.raises(NamespaceAlreadyExistsError):
233+
test_catalog.create_namespace(database_name)
234+
235+
236+
@pytest.mark.integration
237+
@pytest.mark.parametrize("test_catalog", CATALOGS)
238+
def test_create_namepsace_if_not_exists(test_catalog: Catalog, database_name: str) -> None:
239+
test_catalog.create_namespace(database_name)
240+
test_catalog.create_namespace_if_not_exists(database_name)
241+
assert (database_name,) in test_catalog.list_namespaces()
242+
243+
244+
@pytest.mark.integration
245+
@pytest.mark.parametrize("test_catalog", CATALOGS)
246+
def test_create_namespace_with_comment(test_catalog: Catalog, database_name: str) -> None:
247+
test_properties = {
248+
"comment": "this is a test description",
249+
}
250+
test_catalog.create_namespace(namespace=database_name, properties=test_properties)
251+
loaded_database_list = test_catalog.list_namespaces()
252+
assert (database_name,) in loaded_database_list
253+
properties = test_catalog.load_namespace_properties(database_name)
254+
assert properties["comment"] == "this is a test description"
255+
256+
257+
@pytest.mark.integration
258+
@pytest.mark.parametrize("test_catalog", CATALOGS)
259+
def test_list_namespaces(test_catalog: Catalog, database_list: List[str]) -> None:
260+
for database_name in database_list:
261+
test_catalog.create_namespace(database_name)
262+
db_list = test_catalog.list_namespaces()
263+
for database_name in database_list:
264+
assert (database_name,) in db_list
265+
assert len(test_catalog.list_namespaces(list(database_list)[0])) == 0
266+
267+
268+
@pytest.mark.integration
269+
@pytest.mark.parametrize("test_catalog", CATALOGS)
270+
def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
271+
test_catalog.create_namespace(database_name)
272+
assert (database_name,) in test_catalog.list_namespaces()
273+
test_catalog.create_table((database_name, table_name), table_schema_nested)
274+
with pytest.raises(NamespaceNotEmptyError):
275+
test_catalog.drop_namespace(database_name)
276+
test_catalog.drop_table((database_name, table_name))
277+
test_catalog.drop_namespace(database_name)
278+
assert (database_name,) not in test_catalog.list_namespaces()
279+
280+
281+
@pytest.mark.integration
282+
@pytest.mark.parametrize("test_catalog", CATALOGS)
283+
def test_load_namespace_properties(test_catalog: Catalog, database_name: str) -> None:
284+
test_properties = {
285+
"comment": "this is a test description",
286+
"test_property1": "1",
287+
"test_property2": "2",
288+
"test_property3": "3",
289+
}
290+
test_catalog.create_namespace(database_name, test_properties)
291+
listed_properties = test_catalog.load_namespace_properties(database_name)
292+
for k, v in test_properties.items():
293+
assert v == listed_properties[k]
294+
295+
296+
@pytest.mark.integration
297+
@pytest.mark.parametrize("test_catalog", CATALOGS)
298+
def test_update_namespace_properties(test_catalog: Catalog, database_name: str) -> None:
299+
test_properties = {
300+
"comment": "this is a test description",
301+
"test_property1": "1",
302+
"test_property2": "2",
303+
"test_property3": "3",
304+
}
305+
removals = {"test_property1", "test_property2", "test_property3", "should_not_removed"}
306+
updates = {"test_property4": "4", "test_property5": "5", "comment": "updated test description"}
307+
test_catalog.create_namespace(database_name, test_properties)
308+
update_report = test_catalog.update_namespace_properties(database_name, removals, updates)
309+
for k in updates.keys():
310+
assert k in update_report.updated
311+
for k in removals:
312+
if k == "should_not_removed":
313+
assert k in update_report.missing
314+
else:
315+
assert k in update_report.removed
316+
assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]

tests/integration/test_writes/test_writes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1540,7 +1540,7 @@ def test_rest_catalog_with_empty_catalog_name_append_data(session_catalog: Catal
15401540

15411541
@pytest.mark.integration
15421542
def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None:
1543-
identifier = "default.lower.table_v1_with_null_nested_namespace"
1543+
identifier = "default.table_v1_with_null_nested_namespace"
15441544
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_null])
15451545
assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}"
15461546

0 commit comments

Comments
 (0)