Skip to content

Commit 68e0470

Browse files
authored
fix: revert lake formation deprecation (#2194)
* fix: revert LF deprecation
1 parent 0140344 commit 68e0470

37 files changed

+4635
-2461
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ To run all database MySQL tests (Using 8 parallel processes):
224224

225225
``pytest -n 8 tests/unit/test_mysql.py``
226226

227-
To run all tests for all python versions (assuming Amazon QuickSight is activated and the optional stack deployed):
227+
To run all tests for all python versions (assuming Amazon QuickSight is activated and the optional stacks deployed):
228228

229229
``./test.sh``
230230

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ The quickest way to get started is to use AWS Glue with Ray. Read our [docs](htt
150150
- [029 - S3 Select](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/029%20-%20S3%20Select.ipynb)
151151
- [030 - Data Api](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/030%20-%20Data%20Api.ipynb)
152152
- [031 - OpenSearch](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/031%20-%20OpenSearch.ipynb)
153+
- [032 - Lake Formation Governed Tables](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/032%20-%20Lake%20Formation%20Governed%20Tables.ipynb)
153154
- [033 - Amazon Neptune](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/033%20-%20Amazon%20Neptune.ipynb)
154155
- [034 - Distributing Calls Using Ray](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/034%20-%20Distributing%20Calls%20using%20Ray.ipynb)
155156
- [035 - Distributing Calls on Ray Remote Cluster](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/035%20-%20Distributing%20Calls%20on%20Ray%20Remote%20Cluster.ipynb)
@@ -161,6 +162,7 @@ The quickest way to get started is to use AWS Glue with Ray. Read our [docs](htt
161162
- [Amazon S3](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#amazon-s3)
162163
- [AWS Glue Catalog](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#aws-glue-catalog)
163164
- [Amazon Athena](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#amazon-athena)
165+
- [AWS Lake Formation](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#aws-lake-formation)
164166
- [Amazon Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#amazon-redshift)
165167
- [PostgreSQL](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#postgresql)
166168
- [MySQL](https://aws-sdk-pandas.readthedocs.io/en/3.0.0/api.html#mysql)

awswrangler/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
dynamodb,
1818
emr,
1919
exceptions,
20+
lakeformation,
2021
mysql,
2122
neptune,
2223
opensearch,
@@ -53,6 +54,7 @@
5354
"s3",
5455
"sts",
5556
"redshift",
57+
"lakeformation",
5658
"mysql",
5759
"neptune",
5860
"postgresql",

awswrangler/_config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class _ConfigArg(NamedTuple):
4141
"max_local_cache_entries": _ConfigArg(dtype=int, nullable=False, parent_parameter_key="athena_cache_settings"),
4242
"athena_query_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
4343
"cloudwatch_query_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
44+
"lakeformation_query_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
4445
"s3_block_size": _ConfigArg(dtype=int, nullable=False, enforced=True),
4546
"workgroup": _ConfigArg(dtype=str, nullable=False, enforced=True),
4647
"chunksize": _ConfigArg(dtype=int, nullable=False, enforced=True),
@@ -53,6 +54,7 @@ class _ConfigArg(NamedTuple):
5354
"redshift_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
5455
"kms_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
5556
"emr_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
57+
"lakeformation_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
5658
"dynamodb_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
5759
"secretsmanager_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
5860
"timestream_query_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
@@ -344,6 +346,15 @@ def cloudwatch_query_wait_polling_delay(self) -> float:
344346
def cloudwatch_query_wait_polling_delay(self, value: float) -> None:
345347
self._set_config_value(key="cloudwatch_query_wait_polling_delay", value=value)
346348

349+
@property
350+
def lakeformation_query_wait_polling_delay(self) -> float:
351+
"""Property lakeformation_query_wait_polling_delay."""
352+
return cast(float, self["lakeformation_query_wait_polling_delay"])
353+
354+
@lakeformation_query_wait_polling_delay.setter
355+
def lakeformation_query_wait_polling_delay(self, value: float) -> None:
356+
self._set_config_value(key="lakeformation_query_wait_polling_delay", value=value)
357+
347358
@property
348359
def s3_block_size(self) -> int:
349360
"""Property s3_block_size."""
@@ -443,6 +454,15 @@ def emr_endpoint_url(self) -> Optional[str]:
443454
def emr_endpoint_url(self, value: Optional[str]) -> None:
444455
self._set_config_value(key="emr_endpoint_url", value=value)
445456

457+
@property
458+
def lakeformation_endpoint_url(self) -> Optional[str]:
459+
"""Property lakeformation_endpoint_url."""
460+
return cast(Optional[str], self["lakeformation_endpoint_url"])
461+
462+
@lakeformation_endpoint_url.setter
463+
def lakeformation_endpoint_url(self, value: Optional[str]) -> None:
464+
self._set_config_value(key="lakeformation_endpoint_url", value=value)
465+
446466
@property
447467
def dynamodb_endpoint_url(self) -> Optional[str]:
448468
"""Property dynamodb_endpoint_url."""

awswrangler/_utils.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@
5050
from mypy_boto3_emr.client import EMRClient
5151
from mypy_boto3_glue import GlueClient
5252
from mypy_boto3_kms.client import KMSClient
53+
from mypy_boto3_lakeformation.client import LakeFormationClient
5354
from mypy_boto3_logs.client import CloudWatchLogsClient
5455
from mypy_boto3_opensearch.client import OpenSearchServiceClient
5556
from mypy_boto3_opensearchserverless.client import OpenSearchServiceServerlessClient
56-
from mypy_boto3_opensearchserverless.literals import ServiceName
5757
from mypy_boto3_quicksight.client import QuickSightClient
5858
from mypy_boto3_rds_data.client import RDSDataServiceClient
5959
from mypy_boto3_redshift.client import RedshiftClient
@@ -65,6 +65,28 @@
6565
from mypy_boto3_timestream_write.client import TimestreamWriteClient
6666
from typing_extensions import Literal
6767

68+
ServiceName = Literal[
69+
"athena",
70+
"dynamodb",
71+
"ec2",
72+
"emr",
73+
"glue",
74+
"kms",
75+
"lakeformation",
76+
"logs",
77+
"opensearch",
78+
"opensearchserverless",
79+
"quicksight",
80+
"rds-data",
81+
"redshift-data",
82+
"redshift",
83+
"s3",
84+
"secretsmanager",
85+
"sts",
86+
"timestream-query",
87+
"timestream-write",
88+
]
89+
6890
_logger: logging.Logger = logging.getLogger(__name__)
6991

7092
Boto3PrimitivesType = Dict[str, Optional[str]]
@@ -233,6 +255,8 @@ def _get_endpoint_url(service_name: str) -> Optional[str]:
233255
endpoint_url = _config.config.kms_endpoint_url
234256
elif service_name == "emr" and _config.config.emr_endpoint_url is not None:
235257
endpoint_url = _config.config.emr_endpoint_url
258+
elif service_name == "lakeformation" and _config.config.lakeformation_endpoint_url is not None:
259+
endpoint_url = _config.config.lakeformation_endpoint_url
236260
elif service_name == "dynamodb" and _config.config.dynamodb_endpoint_url is not None:
237261
endpoint_url = _config.config.dynamodb_endpoint_url
238262
elif service_name == "secretsmanager" and _config.config.secretsmanager_endpoint_url is not None:
@@ -255,6 +279,16 @@ def client(
255279
...
256280

257281

282+
@overload
283+
def client(
284+
service_name: 'Literal["lakeformation"]',
285+
session: Optional[boto3.Session] = None,
286+
botocore_config: Optional[Config] = None,
287+
verify: Optional[Union[str, bool]] = None,
288+
) -> "LakeFormationClient":
289+
...
290+
291+
258292
@overload
259293
def client(
260294
service_name: 'Literal["logs"]',

awswrangler/athena/_utils.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from awswrangler import _data_types, _utils, catalog, exceptions, s3, sts, typing
3232
from awswrangler._config import apply_configs
3333
from awswrangler._sql_formatter import _process_sql_params
34-
from awswrangler.catalog._utils import _catalog_id
34+
from awswrangler.catalog._utils import _catalog_id, _transaction_id
3535

3636
from ._cache import _cache_manager, _CacheInfo, _check_for_cached_results, _LocalMetadataCacheManager
3737

@@ -1034,6 +1034,8 @@ def show_create_table(
10341034
def generate_create_query(
10351035
table: str,
10361036
database: Optional[str] = None,
1037+
transaction_id: Optional[str] = None,
1038+
query_as_of_time: Optional[str] = None,
10371039
catalog_id: Optional[str] = None,
10381040
boto3_session: Optional[boto3.Session] = None,
10391041
) -> str:
@@ -1047,6 +1049,11 @@ def generate_create_query(
10471049
Table name.
10481050
database : str
10491051
Database name.
1052+
transaction_id: str, optional
1053+
The ID of the transaction.
1054+
query_as_of_time: str, optional
1055+
The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
1056+
Cannot be specified alongside transaction_id.
10501057
catalog_id : str, optional
10511058
The ID of the Data Catalog from which to retrieve Databases.
10521059
If none is provided, the AWS account ID is used by default.
@@ -1084,9 +1091,14 @@ def parse_properties(parameters: Dict[str, str]) -> str:
10841091
return ", \n".join(properties_str)
10851092

10861093
client_glue = _utils.client(service_name="glue", session=boto3_session)
1087-
table_detail = client_glue.get_table(**_catalog_id(catalog_id=catalog_id, DatabaseName=database, Name=table))[
1088-
"Table"
1089-
]
1094+
table_detail = client_glue.get_table(
1095+
**_catalog_id(
1096+
catalog_id=catalog_id,
1097+
**_transaction_id(
1098+
transaction_id=transaction_id, query_as_of_time=query_as_of_time, DatabaseName=database, Name=table
1099+
),
1100+
)
1101+
)["Table"]
10901102
if table_detail["TableType"] == "VIRTUAL_VIEW":
10911103
glue_base64_query: str = table_detail["ViewOriginalText"].replace("/* Presto View: ", "").replace(" */", "")
10921104
glue_query: str = json.loads(base64.b64decode(glue_base64_query))["originalSql"]

awswrangler/catalog/_add.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
_parquet_partition_definition,
1515
_update_table_definition,
1616
)
17-
from awswrangler.catalog._utils import _catalog_id, sanitize_table_name
17+
from awswrangler.catalog._utils import _catalog_id, _transaction_id, sanitize_table_name
1818

1919
_logger: logging.Logger = logging.getLogger(__name__)
2020

@@ -301,6 +301,7 @@ def add_column(
301301
column_name: str,
302302
column_type: str = "string",
303303
column_comment: Optional[str] = None,
304+
transaction_id: Optional[str] = None,
304305
boto3_session: Optional[boto3.Session] = None,
305306
catalog_id: Optional[str] = None,
306307
) -> None:
@@ -318,6 +319,8 @@ def add_column(
318319
Column type.
319320
column_comment : str
320321
Column Comment
322+
transaction_id: str, optional
323+
The ID of the transaction (i.e. used with GOVERNED tables).
321324
boto3_session : boto3.Session(), optional
322325
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
323326
catalog_id : str, optional
@@ -341,13 +344,21 @@ def add_column(
341344
"""
342345
if _check_column_type(column_type):
343346
client_glue = _utils.client(service_name="glue", session=boto3_session)
344-
table_res = client_glue.get_table(**_catalog_id(catalog_id=catalog_id, DatabaseName=database, Name=table))
347+
table_res = client_glue.get_table(
348+
**_catalog_id(
349+
catalog_id=catalog_id,
350+
**_transaction_id(transaction_id=transaction_id, DatabaseName=database, Name=table),
351+
)
352+
)
345353
table_input: Dict[str, Any] = _update_table_definition(table_res)
346354
table_input["StorageDescriptor"]["Columns"].append(
347355
{"Name": column_name, "Type": column_type, "Comment": column_comment}
348356
)
349357
res: Dict[str, Any] = client_glue.update_table(
350-
**_catalog_id(catalog_id=catalog_id, DatabaseName=database, TableInput=table_input)
358+
**_catalog_id(
359+
catalog_id=catalog_id,
360+
**_transaction_id(transaction_id=transaction_id, DatabaseName=database, TableInput=table_input),
361+
)
351362
)
352363
if ("Errors" in res) and res["Errors"]:
353364
for error in res["Errors"]:

0 commit comments

Comments
 (0)