Skip to content

Commit aa5a136

Browse files
authored
[FEAT]register table using iceberg metadata file via pyiceberg (#711)
1 parent 82df57e commit aa5a136

File tree

3 files changed

+38
-1
lines changed

3 files changed

+38
-1
lines changed

pyiceberg/catalog/glue.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,14 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location:
417417
Raises:
418418
TableAlreadyExistsError: If the table already exists
419419
"""
420-
raise NotImplementedError
420+
database_name, table_name = self.identifier_to_database_and_table(identifier)
421+
properties = EMPTY_DICT
422+
io = self._load_file_io(location=metadata_location)
423+
file = io.new_input(metadata_location)
424+
metadata = FromInputFile.table_metadata(file)
425+
table_input = _construct_table_input(table_name, metadata_location, properties, metadata)
426+
self._create_glue_table(database_name=database_name, table_name=table_name, table_input=table_input)
427+
return self.load_table(identifier=identifier)
421428

422429
def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse:
423430
"""Update the table.

tests/catalog/integration_test_glue.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,3 +570,19 @@ def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_
570570
test_catalog.create_namespace(database_name)
571571
test_catalog.create_table((database_name, table_name), table_schema_nested)
572572
assert test_catalog.table_exists((database_name, table_name)) is True
573+
574+
575+
def test_register_table_with_given_location(
576+
test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str
577+
) -> None:
578+
identifier = (database_name, table_name)
579+
new_identifier = (database_name, f"new_{table_name}")
580+
test_catalog.create_namespace(database_name)
581+
tbl = test_catalog.create_table(identifier, table_schema_nested)
582+
location = tbl.metadata_location
583+
test_catalog.drop_table(identifier) # drops the table but keeps the metadata file
584+
assert not test_catalog.table_exists(identifier)
585+
table = test_catalog.register_table(new_identifier, location)
586+
assert table.identifier == (CATALOG_NAME,) + new_identifier
587+
assert table.metadata_location == location
588+
assert test_catalog.table_exists(new_identifier)

tests/catalog/test_glue.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,3 +848,17 @@ def test_table_exists(
848848
assert test_catalog.table_exists(identifier) is True
849849
# Act and Assert for a non-existing table
850850
assert test_catalog.table_exists(('non', 'exist')) is False
851+
852+
853+
@mock_aws
854+
def test_register_table_with_given_location(
855+
_bucket_initialize: None, moto_endpoint_url: str, metadata_location: str, database_name: str, table_name: str
856+
) -> None:
857+
catalog_name = "glue"
858+
identifier = (database_name, table_name)
859+
location = metadata_location
860+
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
861+
test_catalog.create_namespace(namespace=database_name, properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"})
862+
table = test_catalog.register_table(identifier, location)
863+
assert table.identifier == (catalog_name,) + identifier
864+
assert test_catalog.table_exists(identifier) is True

0 commit comments

Comments
 (0)