Skip to content

Commit d0bfb4a

Browse files
authored
Standardize AWS credential names (#922)
* first try * glue + s3 * add dynamo * change from aws. to client. * add missing label * change to PropertyUtil.get_first_property_value * add unit tests for glue and dynamodb * add unit tests for fileio * add doc * revise doc * resolve review comments * adopt kevinjqliu's suggestions
1 parent 4282d2f commit d0bfb4a

File tree

13 files changed

+487
-79
lines changed

13 files changed

+487
-79
lines changed

mkdocs/docs/configuration.md

Lines changed: 139 additions & 55 deletions
Large diffs are not rendered by default.

pyiceberg/catalog/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
RecursiveDict,
6767
)
6868
from pyiceberg.utils.config import Config, merge_config
69+
from pyiceberg.utils.deprecated import deprecated
6970

7071
if TYPE_CHECKING:
7172
import pyarrow as pa
@@ -100,6 +101,21 @@
100101
re.X,
101102
)
102103

104+
DEPRECATED_PROFILE_NAME = "profile_name"
105+
DEPRECATED_REGION = "region_name"
106+
DEPRECATED_BOTOCORE_SESSION = "botocore_session"
107+
DEPRECATED_ACCESS_KEY_ID = "aws_access_key_id"
108+
DEPRECATED_SECRET_ACCESS_KEY = "aws_secret_access_key"
109+
DEPRECATED_SESSION_TOKEN = "aws_session_token"
110+
DEPRECATED_PROPERTY_NAMES = {
111+
DEPRECATED_PROFILE_NAME,
112+
DEPRECATED_REGION,
113+
DEPRECATED_BOTOCORE_SESSION,
114+
DEPRECATED_ACCESS_KEY_ID,
115+
DEPRECATED_SECRET_ACCESS_KEY,
116+
DEPRECATED_SESSION_TOKEN,
117+
}
118+
103119

104120
class CatalogType(Enum):
105121
REST = "rest"
@@ -692,6 +708,17 @@ def __repr__(self) -> str:
692708

693709

694710
class MetastoreCatalog(Catalog, ABC):
711+
def __init__(self, name: str, **properties: str):
712+
super().__init__(name, **properties)
713+
714+
for property_name in DEPRECATED_PROPERTY_NAMES:
715+
if self.properties.get(property_name):
716+
deprecated(
717+
deprecated_in="0.7.0",
718+
removed_in="0.8.0",
719+
help_message=f"The property {property_name} is deprecated. Please use properties that start with client., glue., and dynamo. instead",
720+
)(lambda: None)()
721+
695722
def create_table_transaction(
696723
self,
697724
identifier: Union[str, Identifier],

pyiceberg/catalog/dynamodb.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@
2929
import boto3
3030

3131
from pyiceberg.catalog import (
32+
DEPRECATED_ACCESS_KEY_ID,
33+
DEPRECATED_BOTOCORE_SESSION,
34+
DEPRECATED_PROFILE_NAME,
35+
DEPRECATED_REGION,
36+
DEPRECATED_SECRET_ACCESS_KEY,
37+
DEPRECATED_SESSION_TOKEN,
3238
ICEBERG,
3339
METADATA_LOCATION,
3440
PREVIOUS_METADATA_LOCATION,
@@ -47,7 +53,7 @@
4753
NoSuchTableError,
4854
TableAlreadyExistsError,
4955
)
50-
from pyiceberg.io import load_file_io
56+
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, load_file_io
5157
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
5258
from pyiceberg.schema import Schema
5359
from pyiceberg.serializers import FromInputFile
@@ -78,17 +84,32 @@
7884
ACTIVE = "ACTIVE"
7985
ITEM = "Item"
8086

87+
DYNAMODB_PROFILE_NAME = "dynamodb.profile-name"
88+
DYNAMODB_REGION = "dynamodb.region"
89+
DYNAMODB_ACCESS_KEY_ID = "dynamodb.access-key-id"
90+
DYNAMODB_SECRET_ACCESS_KEY = "dynamodb.secret-access-key"
91+
DYNAMODB_SESSION_TOKEN = "dynamodb.session-token"
92+
8193

8294
class DynamoDbCatalog(MetastoreCatalog):
8395
def __init__(self, name: str, **properties: str):
8496
super().__init__(name, **properties)
97+
98+
from pyiceberg.table import PropertyUtil
99+
85100
session = boto3.Session(
86-
profile_name=properties.get("profile_name"),
87-
region_name=properties.get("region_name"),
88-
botocore_session=properties.get("botocore_session"),
89-
aws_access_key_id=properties.get("aws_access_key_id"),
90-
aws_secret_access_key=properties.get("aws_secret_access_key"),
91-
aws_session_token=properties.get("aws_session_token"),
101+
profile_name=PropertyUtil.get_first_property_value(properties, DYNAMODB_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
102+
region_name=PropertyUtil.get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION, DEPRECATED_REGION),
103+
botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
104+
aws_access_key_id=PropertyUtil.get_first_property_value(
105+
properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, DEPRECATED_ACCESS_KEY_ID
106+
),
107+
aws_secret_access_key=PropertyUtil.get_first_property_value(
108+
properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, DEPRECATED_SECRET_ACCESS_KEY
109+
),
110+
aws_session_token=PropertyUtil.get_first_property_value(
111+
properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN, DEPRECATED_SESSION_TOKEN
112+
),
92113
)
93114
self.dynamodb = session.client(DYNAMODB_CLIENT)
94115
self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, DYNAMODB_TABLE_NAME_DEFAULT)

pyiceberg/catalog/glue.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@
3939
)
4040

4141
from pyiceberg.catalog import (
42+
DEPRECATED_ACCESS_KEY_ID,
43+
DEPRECATED_BOTOCORE_SESSION,
44+
DEPRECATED_PROFILE_NAME,
45+
DEPRECATED_REGION,
46+
DEPRECATED_SECRET_ACCESS_KEY,
47+
DEPRECATED_SESSION_TOKEN,
4248
EXTERNAL_TABLE,
4349
ICEBERG,
4450
LOCATION,
@@ -58,6 +64,7 @@
5864
NoSuchTableError,
5965
TableAlreadyExistsError,
6066
)
67+
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
6168
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
6269
from pyiceberg.schema import Schema, SchemaVisitor, visit
6370
from pyiceberg.serializers import FromInputFile
@@ -117,6 +124,12 @@
117124
ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional"
118125
ICEBERG_FIELD_CURRENT = "iceberg.field.current"
119126

127+
GLUE_PROFILE_NAME = "glue.profile-name"
128+
GLUE_REGION = "glue.region"
129+
GLUE_ACCESS_KEY_ID = "glue.access-key-id"
130+
GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key"
131+
GLUE_SESSION_TOKEN = "glue.session-token"
132+
120133

121134
def _construct_parameters(
122135
metadata_location: str, glue_table: Optional[TableTypeDef] = None, prev_metadata_location: Optional[str] = None
@@ -285,13 +298,21 @@ class GlueCatalog(MetastoreCatalog):
285298
def __init__(self, name: str, **properties: Any):
286299
super().__init__(name, **properties)
287300

301+
from pyiceberg.table import PropertyUtil
302+
288303
session = boto3.Session(
289-
profile_name=properties.get("profile_name"),
290-
region_name=properties.get("region_name"),
291-
botocore_session=properties.get("botocore_session"),
292-
aws_access_key_id=properties.get("aws_access_key_id"),
293-
aws_secret_access_key=properties.get("aws_secret_access_key"),
294-
aws_session_token=properties.get("aws_session_token"),
304+
profile_name=PropertyUtil.get_first_property_value(properties, GLUE_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
305+
region_name=PropertyUtil.get_first_property_value(properties, GLUE_REGION, AWS_REGION, DEPRECATED_REGION),
306+
botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
307+
aws_access_key_id=PropertyUtil.get_first_property_value(
308+
properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, DEPRECATED_ACCESS_KEY_ID
309+
),
310+
aws_secret_access_key=PropertyUtil.get_first_property_value(
311+
properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, DEPRECATED_SECRET_ACCESS_KEY
312+
),
313+
aws_session_token=PropertyUtil.get_first_property_value(
314+
properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN, DEPRECATED_SESSION_TOKEN
315+
),
295316
)
296317
self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT))
297318

pyiceberg/io/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646

4747
logger = logging.getLogger(__name__)
4848

49+
AWS_REGION = "client.region"
50+
AWS_ACCESS_KEY_ID = "client.access-key-id"
51+
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
52+
AWS_SESSION_TOKEN = "client.session-token"
4953
S3_ENDPOINT = "s3.endpoint"
5054
S3_ACCESS_KEY_ID = "s3.access-key-id"
5155
S3_SECRET_ACCESS_KEY = "s3.secret-access-key"

pyiceberg/io/fsspec.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
ADLFS_CONNECTION_STRING,
4747
ADLFS_SAS_TOKEN,
4848
ADLFS_TENANT_ID,
49+
AWS_ACCESS_KEY_ID,
50+
AWS_REGION,
51+
AWS_SECRET_ACCESS_KEY,
52+
AWS_SESSION_TOKEN,
4953
GCS_ACCESS,
5054
GCS_CACHE_TIMEOUT,
5155
GCS_CONSISTENCY,
@@ -114,12 +118,14 @@ def _file(_: Properties) -> LocalFileSystem:
114118
def _s3(properties: Properties) -> AbstractFileSystem:
115119
from s3fs import S3FileSystem
116120

121+
from pyiceberg.table import PropertyUtil
122+
117123
client_kwargs = {
118124
"endpoint_url": properties.get(S3_ENDPOINT),
119-
"aws_access_key_id": properties.get(S3_ACCESS_KEY_ID),
120-
"aws_secret_access_key": properties.get(S3_SECRET_ACCESS_KEY),
121-
"aws_session_token": properties.get(S3_SESSION_TOKEN),
122-
"region_name": properties.get(S3_REGION),
125+
"aws_access_key_id": PropertyUtil.get_first_property_value(properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
126+
"aws_secret_access_key": PropertyUtil.get_first_property_value(properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
127+
"aws_session_token": PropertyUtil.get_first_property_value(properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
128+
"region_name": PropertyUtil.get_first_property_value(properties, S3_REGION, AWS_REGION),
123129
}
124130
config_kwargs = {}
125131
register_events: Dict[str, Callable[[Properties], None]] = {}

pyiceberg/io/pyarrow.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@
8787
)
8888
from pyiceberg.expressions.visitors import visit as boolean_expression_visit
8989
from pyiceberg.io import (
90+
AWS_ACCESS_KEY_ID,
91+
AWS_REGION,
92+
AWS_SECRET_ACCESS_KEY,
93+
AWS_SESSION_TOKEN,
9094
GCS_DEFAULT_LOCATION,
9195
GCS_ENDPOINT,
9296
GCS_TOKEN,
@@ -345,12 +349,14 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
345349
if scheme in {"s3", "s3a", "s3n"}:
346350
from pyarrow.fs import S3FileSystem
347351

352+
from pyiceberg.table import PropertyUtil
353+
348354
client_kwargs: Dict[str, Any] = {
349355
"endpoint_override": self.properties.get(S3_ENDPOINT),
350-
"access_key": self.properties.get(S3_ACCESS_KEY_ID),
351-
"secret_key": self.properties.get(S3_SECRET_ACCESS_KEY),
352-
"session_token": self.properties.get(S3_SESSION_TOKEN),
353-
"region": self.properties.get(S3_REGION),
356+
"access_key": PropertyUtil.get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
357+
"secret_key": PropertyUtil.get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
358+
"session_token": PropertyUtil.get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
359+
"region": PropertyUtil.get_first_property_value(self.properties, S3_REGION, AWS_REGION),
354360
}
355361

356362
if proxy_uri := self.properties.get(S3_PROXY_URI):

pyiceberg/table/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,13 @@ def property_as_bool(properties: Dict[str, str], property_name: str, default: bo
253253
return value.lower() == "true"
254254
return default
255255

256+
@staticmethod
257+
def get_first_property_value(properties: Properties, *property_names: str) -> Optional[Any]:
258+
for property_name in property_names:
259+
if property_value := properties.get(property_name):
260+
return property_value
261+
return None
262+
256263

257264
class Transaction:
258265
_table: Table

tests/catalog/test_dynamodb.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,13 @@
4242
TableAlreadyExistsError,
4343
)
4444
from pyiceberg.schema import Schema
45-
from tests.conftest import BUCKET_NAME, TABLE_METADATA_LOCATION_REGEX
45+
from pyiceberg.typedef import Properties
46+
from tests.conftest import (
47+
BUCKET_NAME,
48+
DEPRECATED_AWS_SESSION_PROPERTIES,
49+
TABLE_METADATA_LOCATION_REGEX,
50+
UNIFIED_AWS_SESSION_PROPERTIES,
51+
)
4652

4753

4854
@mock_aws
@@ -579,6 +585,60 @@ def test_passing_provided_profile() -> None:
579585
assert test_catalog.dynamodb is mock_session().client()
580586

581587

588+
@mock_aws
589+
def test_passing_glue_session_properties() -> None:
590+
session_properties: Properties = {
591+
"dynamodb.access-key-id": "dynamodb.access-key-id",
592+
"dynamodb.secret-access-key": "dynamodb.secret-access-key",
593+
"dynamodb.profile-name": "dynamodb.profile-name",
594+
"dynamodb.region": "dynamodb.region",
595+
"dynamodb.session-token": "dynamodb.session-token",
596+
**UNIFIED_AWS_SESSION_PROPERTIES,
597+
**DEPRECATED_AWS_SESSION_PROPERTIES,
598+
}
599+
600+
with mock.patch("boto3.Session") as mock_session:
601+
mock_client = mock.Mock()
602+
mock_session.return_value.client.return_value = mock_client
603+
mock_client.describe_table.return_value = {"Table": {"TableStatus": "ACTIVE"}}
604+
test_catalog = DynamoDbCatalog("dynamodb", **session_properties)
605+
606+
mock_session.assert_called_with(
607+
aws_access_key_id="dynamodb.access-key-id",
608+
aws_secret_access_key="dynamodb.secret-access-key",
609+
aws_session_token="dynamodb.session-token",
610+
region_name="dynamodb.region",
611+
profile_name="dynamodb.profile-name",
612+
botocore_session=None,
613+
)
614+
assert test_catalog.dynamodb is mock_session().client()
615+
616+
617+
@mock_aws
618+
def test_passing_unified_session_properties_to_dynamodb() -> None:
619+
session_properties: Properties = {
620+
"dynamodb.profile-name": "dynamodb.profile-name",
621+
**UNIFIED_AWS_SESSION_PROPERTIES,
622+
**DEPRECATED_AWS_SESSION_PROPERTIES,
623+
}
624+
625+
with mock.patch("boto3.Session") as mock_session:
626+
mock_client = mock.Mock()
627+
mock_session.return_value.client.return_value = mock_client
628+
mock_client.describe_table.return_value = {"Table": {"TableStatus": "ACTIVE"}}
629+
test_catalog = DynamoDbCatalog("dynamodb", **session_properties)
630+
631+
mock_session.assert_called_with(
632+
aws_access_key_id="client.access-key-id",
633+
aws_secret_access_key="client.secret-access-key",
634+
aws_session_token="client.session-token",
635+
region_name="client.region",
636+
profile_name="dynamodb.profile-name",
637+
botocore_session=None,
638+
)
639+
assert test_catalog.dynamodb is mock_session().client()
640+
641+
582642
@mock_aws
583643
def test_table_exists(
584644
_bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str

0 commit comments

Comments
 (0)