Skip to content

Commit f5978bb

Browse files
authored
Adds support for creating a GlueCatalog with own client (#1920)
Closes #1910 # Rationale for this change When working with the GlueCatalog, I may already have a GlueClient that I've instantiated from elsewhere, and perhaps wish to keep. This allows passing our client to the GlueCatalog constructor so that we aren't forced into getting a new client. This is slightly interesting because it's the only catalog that now has a different constructor signature. Also it may be odd for users to pass a client, but then none of their properties (which may have retry configs) are applied. An alternative to consider is having a `from_client` or `with_client` staticmethod, but I did not see precedence elsewhere. I will leave it to the maintainers to decide which they prefer and will update accordingly. Similarly, I can do the same for dynamodb 🙂 I've also skipped the event_handler for a user-provided client because I wouldn't want to impede on their existing events, also the param is optional. Something to consider is using the [unique_id arg](https://github.com/boto/botocore/blob/aaa6690e45c8dabcde3a8d2d1aa34b5fd399fba7/botocore/hooks.py#L89) when registering an event. > If a ``unique_id`` is given, the handler will not be registered > if a handler with the ``unique_id`` has already been registered. # Are these changes tested? Basic unit test to assert the client passed is the client used. # Are there any user-facing changes? I believe so since this is an addition to the public API.
1 parent 0d56a3b commit f5978bb

File tree

2 files changed

+45
-23
lines changed

2 files changed

+45
-23
lines changed

pyiceberg/catalog/glue.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -303,32 +303,46 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs: Any) -> None:
303303

304304

305305
class GlueCatalog(MetastoreCatalog):
306-
def __init__(self, name: str, **properties: Any):
307-
super().__init__(name, **properties)
306+
glue: GlueClient
308307

309-
retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE)
308+
def __init__(self, name: str, client: Optional[GlueClient] = None, **properties: Any):
309+
"""Glue Catalog.
310310
311-
session = boto3.Session(
312-
profile_name=properties.get(GLUE_PROFILE_NAME),
313-
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION),
314-
botocore_session=properties.get(BOTOCORE_SESSION),
315-
aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
316-
aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
317-
aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
318-
)
319-
self.glue: GlueClient = session.client(
320-
"glue",
321-
endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT),
322-
config=Config(
323-
retries={
324-
"max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES),
325-
"mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE,
326-
}
327-
),
328-
)
311+
You either need to provide a boto3 glue client, or one will be constructed from the properties.
312+
313+
Args:
314+
name: Name to identify the catalog.
315+
client: An optional boto3 glue client.
316+
properties: Properties for glue client construction and configuration.
317+
"""
318+
super().__init__(name, **properties)
319+
320+
if client:
321+
self.glue = client
322+
else:
323+
retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE)
324+
325+
session = boto3.Session(
326+
profile_name=properties.get(GLUE_PROFILE_NAME),
327+
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION),
328+
botocore_session=properties.get(BOTOCORE_SESSION),
329+
aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
330+
aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
331+
aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
332+
)
333+
self.glue: GlueClient = session.client(
334+
"glue",
335+
endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT),
336+
config=Config(
337+
retries={
338+
"max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES),
339+
"mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE,
340+
}
341+
),
342+
)
329343

330-
if glue_catalog_id := properties.get(GLUE_ID):
331-
_register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id)
344+
if glue_catalog_id := properties.get(GLUE_ID):
345+
_register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id)
332346

333347
def _convert_glue_to_iceberg(self, glue_table: TableTypeDef) -> Table:
334348
properties: Properties = glue_table["Parameters"]

tests/catalog/test_glue.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,3 +932,11 @@ def test_glue_endpoint_override(_bucket_initialize: None, moto_endpoint_url: str
932932
catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}", "glue.endpoint": test_endpoint}
933933
)
934934
assert test_catalog.glue.meta.endpoint_url == test_endpoint
935+
936+
937+
@mock_aws
938+
def test_glue_client_override() -> None:
939+
catalog_name = "glue"
940+
test_client = boto3.client("glue", region_name="us-west-2")
941+
test_catalog = GlueCatalog(catalog_name, test_client)
942+
assert test_catalog.glue is test_client

0 commit comments

Comments
 (0)