Skip to content

Commit e33bb34

Browse files
authored
Use botocore for s3 kwargs (#832)
* Use botocore for s3 kwargs * Remove docs on valid s3 kwargs
1 parent 034f572 commit e33bb34

File tree

12 files changed

+39
-117
lines changed

12 files changed

+39
-117
lines changed

awswrangler/athena/_read.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ def read_sql_query(
779779
The dict needs to contain the information in the form {'name': 'value'} and the SQL query needs to contain
780780
`:name;`. Note that for varchar columns and similar, you must surround the value in single quotes.
781781
s3_additional_kwargs : Optional[Dict[str, Any]]
782-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
782+
Forwarded to botocore requests.
783783
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
784784
785785
Returns
@@ -1043,7 +1043,7 @@ def read_sql_table(
10431043
data_source : str, optional
10441044
Data Source / Catalog name. If None, 'AwsDataCatalog' will be used by default.
10451045
s3_additional_kwargs : Optional[Dict[str, Any]]
1046-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
1046+
Forwarded to botocore requests.
10471047
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
10481048
10491049
Returns

awswrangler/athena/_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def describe_table(
595595
kms_key : str, optional
596596
For SSE-KMS and CSE-KMS , this is the KMS key ARN or ID.
597597
s3_additional_kwargs : Optional[Dict[str, Any]]
598-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
598+
Forwarded to botocore requests.
599599
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
600600
boto3_session : boto3.Session(), optional
601601
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
@@ -666,7 +666,7 @@ def show_create_table(
666666
kms_key : str, optional
667667
For SSE-KMS and CSE-KMS , this is the KMS key ARN or ID.
668668
s3_additional_kwargs : Optional[Dict[str, Any]]
669-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
669+
Forwarded to botocore requests.
670670
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
671671
boto3_session : boto3.Session(), optional
672672
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

awswrangler/mysql.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
_logger: logging.Logger = logging.getLogger(__name__)
1919

2020

21-
def _validate_connection(con: pymysql.connections.Connection) -> None:
21+
def _validate_connection(con: "pymysql.connections.Connection[Any]") -> None:
2222
if not isinstance(con, pymysql.connections.Connection):
2323
raise exceptions.InvalidConnection(
2424
"Invalid 'conn' argument, please pass a "
@@ -77,7 +77,7 @@ def connect(
7777
read_timeout: Optional[int] = None,
7878
write_timeout: Optional[int] = None,
7979
connect_timeout: int = 10,
80-
) -> pymysql.connections.Connection:
80+
) -> "pymysql.connections.Connection[Any]":
8181
"""Return a pymysql connection from a Glue Catalog Connection or Secrets Manager.
8282
8383
https://pymysql.readthedocs.io
@@ -150,7 +150,7 @@ def connect(
150150

151151
def read_sql_query(
152152
sql: str,
153-
con: pymysql.connections.Connection,
153+
con: "pymysql.connections.Connection[Any]",
154154
index_col: Optional[Union[str, List[str]]] = None,
155155
params: Optional[Union[List[Any], Tuple[Any, ...], Dict[Any, Any]]] = None,
156156
chunksize: Optional[int] = None,
@@ -206,7 +206,7 @@ def read_sql_query(
206206

207207
def read_sql_table(
208208
table: str,
209-
con: pymysql.connections.Connection,
209+
con: "pymysql.connections.Connection[Any]",
210210
schema: Optional[str] = None,
211211
index_col: Optional[Union[str, List[str]]] = None,
212212
params: Optional[Union[List[Any], Tuple[Any, ...], Dict[Any, Any]]] = None,
@@ -268,7 +268,7 @@ def read_sql_table(
268268
@apply_configs
269269
def to_sql(
270270
df: pd.DataFrame,
271-
con: pymysql.connections.Connection,
271+
con: "pymysql.connections.Connection[Any]",
272272
table: str,
273273
schema: str,
274274
mode: str = "append",

awswrangler/redshift.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,9 +1253,7 @@ def copy_from_files( # pylint: disable=too-many-locals,too-many-arguments
12531253
boto3_session : boto3.Session(), optional
12541254
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
12551255
s3_additional_kwargs:
1256-
Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
1257-
"SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
1258-
"RequestPayer", "ExpectedBucketOwner".
1256+
Forwarded to botocore requests.
12591257
e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
12601258
12611259
Returns
@@ -1452,9 +1450,7 @@ def copy( # pylint: disable=too-many-arguments
14521450
boto3_session : boto3.Session(), optional
14531451
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
14541452
s3_additional_kwargs:
1455-
Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
1456-
"SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
1457-
"RequestPayer", "ExpectedBucketOwner".
1453+
Forwarded to botocore requests.
14581454
e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
14591455
max_rows_by_file : int
14601456
Max number of rows in each file.

awswrangler/s3/_copy.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,7 @@ def merge_datasets(
8585
boto3_session : boto3.Session(), optional
8686
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
8787
s3_additional_kwargs : Optional[Dict[str, Any]]
88-
Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
89-
"SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
90-
"RequestPayer", "ExpectedBucketOwner".
88+
Forwarded to botocore requests.
9189
e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
9290
9391
Returns
@@ -189,9 +187,7 @@ def copy_objects(
189187
boto3_session : boto3.Session(), optional
190188
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
191189
s3_additional_kwargs : Optional[Dict[str, Any]]
192-
Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
193-
"SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
194-
"RequestPayer", "ExpectedBucketOwner".
190+
Forwarded to botocore requests.
195191
e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
196192
197193
Returns

awswrangler/s3/_delete.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def delete_objects(
121121
Filter the s3 files by the Last modified date of the object.
122122
The filter is applied only after list all s3 files.
123123
s3_additional_kwargs : Optional[Dict[str, Any]]
124-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
124+
Forwarded to botocore requests.
125125
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
126126
boto3_session : boto3.Session(), optional
127127
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

awswrangler/s3/_describe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def describe_objects(
100100
Filter the s3 files by the Last modified date of the object.
101101
The filter is applied only after list all s3 files.
102102
s3_additional_kwargs : Optional[Dict[str, Any]]
103-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
103+
Forwarded to botocore requests.
104104
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
105105
boto3_session : boto3.Session(), optional
106106
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
@@ -197,7 +197,7 @@ def size_objects(
197197
True to enable concurrent requests, False to disable multiple threads.
198198
If enabled os.cpu_count() will be used as the max number of threads.
199199
s3_additional_kwargs : Optional[Dict[str, Any]]
200-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
200+
Forwarded to botocore requests.
201201
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
202202
boto3_session : boto3.Session(), optional
203203
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

awswrangler/s3/_fs.py

Lines changed: 13 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
import socket
99
from contextlib import contextmanager
1010
from errno import ESPIPE
11-
from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Set, Tuple, Union, cast
11+
from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Tuple, Union, cast
1212

1313
import boto3
1414
from botocore.exceptions import ReadTimeoutError
15+
from botocore.loaders import Loader
16+
from botocore.model import ServiceModel
1517

1618
from awswrangler import _utils, exceptions
1719
from awswrangler._config import apply_configs
@@ -24,84 +26,20 @@
2426
_MIN_WRITE_BLOCK: int = 5_242_880 # 5 MB (5 * 2**20)
2527
_MIN_PARALLEL_READ_BLOCK: int = 5_242_880 # 5 MB (5 * 2**20)
2628

27-
BOTOCORE_ACCEPTED_KWARGS: Dict[str, Set[str]] = {
28-
"get_object": {
29-
"SSECustomerAlgorithm",
30-
"SSECustomerKey",
31-
"RequestPayer",
32-
"ExpectedBucketOwner",
33-
"VersionId",
34-
},
35-
"copy_object": {
36-
"ACL",
37-
"Metadata",
38-
"ServerSideEncryption",
39-
"StorageClass",
40-
"SSECustomerAlgorithm",
41-
"SSECustomerKey",
42-
"SSEKMSKeyId",
43-
"SSEKMSEncryptionContext",
44-
"Tagging",
45-
"RequestPayer",
46-
"ExpectedBucketOwner",
47-
"CopySource",
48-
},
49-
"create_multipart_upload": {
50-
"ACL",
51-
"Metadata",
52-
"ServerSideEncryption",
53-
"StorageClass",
54-
"SSECustomerAlgorithm",
55-
"SSECustomerKey",
56-
"SSEKMSKeyId",
57-
"SSEKMSEncryptionContext",
58-
"Tagging",
59-
"RequestPayer",
60-
"ExpectedBucketOwner",
61-
},
62-
"upload_part": {
63-
"SSECustomerAlgorithm",
64-
"SSECustomerKey",
65-
"RequestPayer",
66-
"ExpectedBucketOwner",
67-
},
68-
"complete_multipart_upload": {
69-
"RequestPayer",
70-
"ExpectedBucketOwner",
71-
},
72-
"put_object": {
73-
"ACL",
74-
"Metadata",
75-
"ServerSideEncryption",
76-
"StorageClass",
77-
"SSECustomerAlgorithm",
78-
"SSECustomerKey",
79-
"SSEKMSKeyId",
80-
"SSEKMSEncryptionContext",
81-
"Tagging",
82-
"RequestPayer",
83-
"ExpectedBucketOwner",
84-
},
85-
"list_objects_v2": {
86-
"RequestPayer",
87-
"ExpectedBucketOwner",
88-
},
89-
"delete_objects": {
90-
"RequestPayer",
91-
"ExpectedBucketOwner",
92-
"Objects",
93-
},
94-
"head_object": {
95-
"RequestPayer",
96-
"ExpectedBucketOwner",
97-
"VersionId",
98-
},
99-
}
29+
_BOTOCORE_LOADER = Loader()
30+
_S3_JSON_MODEL = _BOTOCORE_LOADER.load_service_model(service_name="s3", type_name="service-2")
31+
_S3_SERVICE_MODEL = ServiceModel(_S3_JSON_MODEL, service_name="s3")
32+
33+
34+
def _snake_to_camel_case(s: str) -> str:
35+
return "".join(c.title() for c in s.split("_"))
10036

10137

10238
def get_botocore_valid_kwargs(function_name: str, s3_additional_kwargs: Dict[str, Any]) -> Dict[str, Any]:
10339
"""Filter and keep only the valid botocore key arguments."""
104-
return {k: v for k, v in s3_additional_kwargs.items() if k in BOTOCORE_ACCEPTED_KWARGS[function_name]}
40+
s3_operation_model = _S3_SERVICE_MODEL.operation_model(_snake_to_camel_case(function_name))
41+
allowed_kwargs = s3_operation_model.input_shape.members.keys() # pylint: disable=E1101
42+
return {k: v for k, v in s3_additional_kwargs.items() if k in allowed_kwargs}
10543

10644

10745
def _fetch_range(

awswrangler/s3/_list.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,12 @@ def does_object_exist(
149149
path: str
150150
S3 path (e.g. s3://bucket/key).
151151
s3_additional_kwargs : Optional[Dict[str, Any]]
152-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
152+
Forwarded to botocore requests.
153153
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
154154
boto3_session : boto3.Session(), optional
155155
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
156+
version_id: str, optional
157+
Specific version of the object that should exist.
156158
157159
Returns
158160
-------
@@ -216,7 +218,7 @@ def list_directories(
216218
path : str
217219
S3 path (e.g. s3://bucket/prefix).
218220
s3_additional_kwargs : Optional[Dict[str, Any]]
219-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
221+
Forwarded to botocore requests.
220222
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
221223
boto3_session : boto3.Session(), optional
222224
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
@@ -286,7 +288,7 @@ def list_objects(
286288
ignore_empty: bool
287289
Ignore files with 0 bytes.
288290
s3_additional_kwargs : Optional[Dict[str, Any]]
289-
Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
291+
Forwarded to botocore requests.
290292
e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
291293
boto3_session : boto3.Session(), optional
292294
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

awswrangler/s3/_write_excel.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ def to_excel(
4646
boto3_session : boto3.Session(), optional
4747
Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
4848
s3_additional_kwargs : Optional[Dict[str, Any]]
49-
Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
50-
"SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
51-
"RequestPayer", "ExpectedBucketOwner".
49+
Forwarded to botocore requests.
5250
e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
5351
use_threads : bool
5452
True to enable concurrent requests, False to disable multiple threads.

0 commit comments

Comments
 (0)