Skip to content

Commit 1827528

Browse files
committed
Fix conflicts with main
2 parents a07c073 + 9ec082c commit 1827528

File tree

5 files changed

+91
-5
lines changed

5 files changed

+91
-5
lines changed

awswrangler/athena/_read.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,8 @@ def read_sql_table(
995995
table: str,
996996
database: str,
997997
ctas_approach: bool = True,
998+
unload_approach: bool = False,
999+
unload_parameters: Optional[Dict[str, Any]] = None,
9981000
categories: Optional[List[str]] = None,
9991001
chunksize: Optional[Union[int, bool]] = None,
10001002
s3_output: Optional[str] = None,
@@ -1116,6 +1118,11 @@ def read_sql_table(
11161118
ctas_approach: bool
11171119
Wraps the query using a CTAS, and read the resulted parquet data on S3.
11181120
If false, read the regular CSV on S3.
1121+
unload_approach: bool
1122+
Wraps the query using UNLOAD, and read the results from S3.
1123+
Only PARQUET format is supported.
1124+
unload_parameters : Optional[Dict[str, Any]]
1125+
Params of the UNLOAD such as format, compression, field_delimiter, and partitioned_by.
11191126
categories: List[str], optional
11201127
List of columns names that should be returned as pandas.Categorical.
11211128
Recommended for memory restricted environments.
@@ -1203,6 +1210,8 @@ def read_sql_table(
12031210
database=database,
12041211
data_source=data_source,
12051212
ctas_approach=ctas_approach,
1213+
unload_approach=unload_approach,
1214+
unload_parameters=unload_parameters,
12061215
categories=categories,
12071216
chunksize=chunksize,
12081217
s3_output=s3_output,

awswrangler/catalog/_create.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def _create_table( # pylint: disable=too-many-branches,too-many-statements,too-
4444
projection_values: Optional[Dict[str, str]],
4545
projection_intervals: Optional[Dict[str, str]],
4646
projection_digits: Optional[Dict[str, str]],
47+
projection_formats: Optional[Dict[str, str]],
4748
projection_storage_location_template: Optional[str],
4849
catalog_id: Optional[str],
4950
) -> None:
@@ -67,11 +68,13 @@ def _create_table( # pylint: disable=too-many-branches,too-many-statements,too-
6768
projection_values = projection_values if projection_values else {}
6869
projection_intervals = projection_intervals if projection_intervals else {}
6970
projection_digits = projection_digits if projection_digits else {}
71+
projection_formats = projection_formats if projection_formats else {}
7072
projection_types = {sanitize_column_name(k): v for k, v in projection_types.items()}
7173
projection_ranges = {sanitize_column_name(k): v for k, v in projection_ranges.items()}
7274
projection_values = {sanitize_column_name(k): v for k, v in projection_values.items()}
7375
projection_intervals = {sanitize_column_name(k): v for k, v in projection_intervals.items()}
7476
projection_digits = {sanitize_column_name(k): v for k, v in projection_digits.items()}
77+
projection_formats = {sanitize_column_name(k): v for k, v in projection_formats.items()}
7578
for k, v in projection_types.items():
7679
dtype: Optional[str] = partitions_types.get(k)
7780
if dtype is None and projection_storage_location_template is None:
@@ -98,6 +101,10 @@ def _create_table( # pylint: disable=too-many-branches,too-many-statements,too-
98101
mode = _update_if_necessary(
99102
dic=table_input["Parameters"], key=f"projection.{k}.digits", value=str(v), mode=mode
100103
)
104+
for k, v in projection_formats.items():
105+
mode = _update_if_necessary(
106+
dic=table_input["Parameters"], key=f"projection.{k}.format", value=str(v), mode=mode
107+
)
101108
mode = _update_if_necessary(
102109
table_input["Parameters"],
103110
key="storage.location.template",
@@ -266,6 +273,7 @@ def _create_parquet_table(
266273
projection_values: Optional[Dict[str, str]],
267274
projection_intervals: Optional[Dict[str, str]],
268275
projection_digits: Optional[Dict[str, str]],
276+
projection_formats: Optional[Dict[str, str]],
269277
projection_storage_location_template: Optional[str],
270278
boto3_session: Optional[boto3.Session],
271279
catalog_table_input: Optional[Dict[str, Any]],
@@ -318,6 +326,7 @@ def _create_parquet_table(
318326
projection_values=projection_values,
319327
projection_intervals=projection_intervals,
320328
projection_digits=projection_digits,
329+
projection_formats=projection_formats,
321330
projection_storage_location_template=projection_storage_location_template,
322331
catalog_id=catalog_id,
323332
)
@@ -350,6 +359,7 @@ def _create_csv_table( # pylint: disable=too-many-arguments,too-many-locals
350359
projection_values: Optional[Dict[str, str]],
351360
projection_intervals: Optional[Dict[str, str]],
352361
projection_digits: Optional[Dict[str, str]],
362+
projection_formats: Optional[Dict[str, str]],
353363
projection_storage_location_template: Optional[str],
354364
catalog_table_input: Optional[Dict[str, Any]],
355365
catalog_id: Optional[str],
@@ -398,6 +408,7 @@ def _create_csv_table( # pylint: disable=too-many-arguments,too-many-locals
398408
projection_values=projection_values,
399409
projection_intervals=projection_intervals,
400410
projection_digits=projection_digits,
411+
projection_formats=projection_formats,
401412
projection_storage_location_template=projection_storage_location_template,
402413
catalog_id=catalog_id,
403414
)
@@ -428,6 +439,7 @@ def _create_json_table( # pylint: disable=too-many-arguments
428439
projection_values: Optional[Dict[str, str]],
429440
projection_intervals: Optional[Dict[str, str]],
430441
projection_digits: Optional[Dict[str, str]],
442+
projection_formats: Optional[Dict[str, str]],
431443
projection_storage_location_template: Optional[str],
432444
catalog_table_input: Optional[Dict[str, Any]],
433445
catalog_id: Optional[str],
@@ -474,6 +486,7 @@ def _create_json_table( # pylint: disable=too-many-arguments
474486
projection_values=projection_values,
475487
projection_intervals=projection_intervals,
476488
projection_digits=projection_digits,
489+
projection_formats=projection_formats,
477490
projection_storage_location_template=projection_storage_location_template,
478491
catalog_id=catalog_id,
479492
)
@@ -676,6 +689,7 @@ def create_parquet_table(
676689
projection_values: Optional[Dict[str, str]] = None,
677690
projection_intervals: Optional[Dict[str, str]] = None,
678691
projection_digits: Optional[Dict[str, str]] = None,
692+
projection_formats: Optional[Dict[str, str]] = None,
679693
projection_storage_location_template: Optional[str] = None,
680694
boto3_session: Optional[boto3.Session] = None,
681695
) -> None:
@@ -741,6 +755,10 @@ def create_parquet_table(
741755
Dictionary of partitions names and Athena projections digits.
742756
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
743757
(e.g. {'col_name': '1', 'col2_name': '2'})
758+
projection_formats: Optional[Dict[str, str]]
759+
Dictionary of partitions names and Athena projections formats.
760+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
761+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
744762
projection_storage_location_template: Optional[str]
745763
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
746764
a typical `.../column=value/...` pattern.
@@ -796,14 +814,15 @@ def create_parquet_table(
796814
projection_values=projection_values,
797815
projection_intervals=projection_intervals,
798816
projection_digits=projection_digits,
817+
projection_formats=projection_formats,
799818
projection_storage_location_template=projection_storage_location_template,
800819
boto3_session=boto3_session,
801820
catalog_table_input=catalog_table_input,
802821
)
803822

804823

805824
@apply_configs
806-
def create_csv_table( # pylint: disable=too-many-arguments
825+
def create_csv_table( # pylint: disable=too-many-arguments,too-many-locals
807826
database: str,
808827
table: str,
809828
path: str,
@@ -830,6 +849,7 @@ def create_csv_table( # pylint: disable=too-many-arguments
830849
projection_values: Optional[Dict[str, str]] = None,
831850
projection_intervals: Optional[Dict[str, str]] = None,
832851
projection_digits: Optional[Dict[str, str]] = None,
852+
projection_formats: Optional[Dict[str, str]] = None,
833853
projection_storage_location_template: Optional[str] = None,
834854
catalog_id: Optional[str] = None,
835855
) -> None:
@@ -908,6 +928,10 @@ def create_csv_table( # pylint: disable=too-many-arguments
908928
Dictionary of partitions names and Athena projections digits.
909929
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
910930
(e.g. {'col_name': '1', 'col2_name': '2'})
931+
projection_formats: Optional[Dict[str, str]]
932+
Dictionary of partitions names and Athena projections formats.
933+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
934+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
911935
projection_storage_location_template: Optional[str]
912936
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
913937
a typical `.../column=value/...` pattern.
@@ -967,6 +991,7 @@ def create_csv_table( # pylint: disable=too-many-arguments
967991
projection_values=projection_values,
968992
projection_intervals=projection_intervals,
969993
projection_digits=projection_digits,
994+
projection_formats=projection_formats,
970995
projection_storage_location_template=projection_storage_location_template,
971996
boto3_session=boto3_session,
972997
catalog_table_input=catalog_table_input,
@@ -1003,6 +1028,7 @@ def create_json_table( # pylint: disable=too-many-arguments
10031028
projection_values: Optional[Dict[str, str]] = None,
10041029
projection_intervals: Optional[Dict[str, str]] = None,
10051030
projection_digits: Optional[Dict[str, str]] = None,
1031+
projection_formats: Optional[Dict[str, str]] = None,
10061032
projection_storage_location_template: Optional[str] = None,
10071033
catalog_id: Optional[str] = None,
10081034
) -> None:
@@ -1077,6 +1103,10 @@ def create_json_table( # pylint: disable=too-many-arguments
10771103
Dictionary of partitions names and Athena projections digits.
10781104
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
10791105
(e.g. {'col_name': '1', 'col2_name': '2'})
1106+
projection_formats: Optional[Dict[str, str]]
1107+
Dictionary of partitions names and Athena projections formats.
1108+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
1109+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
10801110
projection_storage_location_template: Optional[str]
10811111
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
10821112
a typical `.../column=value/...` pattern.
@@ -1135,6 +1165,7 @@ def create_json_table( # pylint: disable=too-many-arguments
11351165
projection_values=projection_values,
11361166
projection_intervals=projection_intervals,
11371167
projection_digits=projection_digits,
1168+
projection_formats=projection_formats,
11381169
projection_storage_location_template=projection_storage_location_template,
11391170
boto3_session=boto3_session,
11401171
catalog_table_input=catalog_table_input,

awswrangler/s3/_write_parquet.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,8 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b
301301
projection_values: Optional[Dict[str, str]] = None,
302302
projection_intervals: Optional[Dict[str, str]] = None,
303303
projection_digits: Optional[Dict[str, str]] = None,
304+
projection_formats: Optional[Dict[str, str]] = None,
305+
projection_storage_location_template: Optional[str] = None,
304306
catalog_id: Optional[str] = None,
305307
) -> Dict[str, Union[List[str], Dict[str, List[str]]]]:
306308
"""Write Parquet file or dataset on Amazon S3.
@@ -432,6 +434,15 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b
432434
Dictionary of partitions names and Athena projections digits.
433435
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
434436
(e.g. {'col_name': '1', 'col2_name': '2'})
437+
projection_formats: Optional[Dict[str, str]]
438+
Dictionary of partitions names and Athena projections formats.
439+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
440+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
441+
projection_storage_location_template: Optional[str]
442+
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
443+
a typical `.../column=value/...` pattern.
444+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-setting-up.html
445+
(e.g. s3://bucket/table_root/a=${a}/${b}/some_static_subdirectory/${c}/)
435446
catalog_id : str, optional
436447
The ID of the Data Catalog from which to retrieve Databases.
437448
If none is provided, the AWS account ID is used by default.
@@ -774,7 +785,7 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b
774785

775786

776787
@apply_configs
777-
def store_parquet_metadata( # pylint: disable=too-many-arguments
788+
def store_parquet_metadata( # pylint: disable=too-many-arguments,too-many-locals
778789
path: str,
779790
database: str,
780791
table: str,
@@ -799,6 +810,8 @@ def store_parquet_metadata( # pylint: disable=too-many-arguments
799810
projection_values: Optional[Dict[str, str]] = None,
800811
projection_intervals: Optional[Dict[str, str]] = None,
801812
projection_digits: Optional[Dict[str, str]] = None,
813+
projection_formats: Optional[Dict[str, str]] = None,
814+
projection_storage_location_template: Optional[str] = None,
802815
s3_additional_kwargs: Optional[Dict[str, Any]] = None,
803816
boto3_session: Optional[boto3.Session] = None,
804817
) -> Tuple[Dict[str, str], Optional[Dict[str, str]], Optional[Dict[str, List[str]]]]:
@@ -895,6 +908,15 @@ def store_parquet_metadata( # pylint: disable=too-many-arguments
895908
Dictionary of partitions names and Athena projections digits.
896909
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
897910
(e.g. {'col_name': '1', 'col2_name': '2'})
911+
projection_formats: Optional[Dict[str, str]]
912+
Dictionary of partitions names and Athena projections formats.
913+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
914+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
915+
projection_storage_location_template: Optional[str]
916+
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
917+
a typical `.../column=value/...` pattern.
918+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-setting-up.html
919+
(e.g. s3://bucket/table_root/a=${a}/${b}/some_static_subdirectory/${c}/)
898920
s3_additional_kwargs : Optional[Dict[str, Any]]
899921
Forwarded to botocore requests.
900922
e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
@@ -963,6 +985,8 @@ def store_parquet_metadata( # pylint: disable=too-many-arguments
963985
projection_values=projection_values,
964986
projection_intervals=projection_intervals,
965987
projection_digits=projection_digits,
988+
projection_formats=projection_formats,
989+
projection_storage_location_template=projection_storage_location_template,
966990
boto3_session=session,
967991
catalog_id=catalog_id,
968992
)

awswrangler/s3/_write_text.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
113113
projection_values: Optional[Dict[str, str]] = None,
114114
projection_intervals: Optional[Dict[str, str]] = None,
115115
projection_digits: Optional[Dict[str, str]] = None,
116+
projection_formats: Optional[Dict[str, str]] = None,
117+
projection_storage_location_template: Optional[str] = None,
116118
catalog_id: Optional[str] = None,
117119
**pandas_kwargs: Any,
118120
) -> Dict[str, Union[List[str], Dict[str, List[str]]]]:
@@ -242,6 +244,15 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
242244
Dictionary of partitions names and Athena projections digits.
243245
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
244246
(e.g. {'col_name': '1', 'col2_name': '2'})
247+
projection_formats: Optional[Dict[str, str]]
248+
Dictionary of partitions names and Athena projections formats.
249+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
250+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
251+
projection_storage_location_template: Optional[str]
252+
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
253+
a typical `.../column=value/...` pattern.
254+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-setting-up.html
255+
(e.g. s3://bucket/table_root/a=${a}/${b}/some_static_subdirectory/${c}/)
245256
catalog_id : str, optional
246257
The ID of the Data Catalog from which to retrieve Databases.
247258
If none is provided, the AWS account ID is used by default.
@@ -679,6 +690,8 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
679690
projection_values: Optional[Dict[str, str]] = None,
680691
projection_intervals: Optional[Dict[str, str]] = None,
681692
projection_digits: Optional[Dict[str, str]] = None,
693+
projection_formats: Optional[Dict[str, str]] = None,
694+
projection_storage_location_template: Optional[str] = None,
682695
catalog_id: Optional[str] = None,
683696
**pandas_kwargs: Any,
684697
) -> Union[List[str], Dict[str, Union[List[str], Dict[str, List[str]]]]]:
@@ -791,6 +804,15 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
791804
Dictionary of partitions names and Athena projections digits.
792805
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
793806
(e.g. {'col_name': '1', 'col2_name': '2'})
807+
projection_formats: Optional[Dict[str, str]]
808+
Dictionary of partitions names and Athena projections formats.
809+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html
810+
(e.g. {'col_date': 'yyyy-MM-dd', 'col2_timestamp': 'yyyy-MM-dd HH:mm:ss'})
811+
projection_storage_location_template: Optional[str]
812+
Value which is allows Athena to properly map partition values if the S3 file locations do not follow
813+
a typical `.../column=value/...` pattern.
814+
https://docs.aws.amazon.com/athena/latest/ug/partition-projection-setting-up.html
815+
(e.g. s3://bucket/table_root/a=${a}/${b}/some_static_subdirectory/${c}/)
794816
catalog_id : str, optional
795817
The ID of the Data Catalog from which to retrieve Databases.
796818
If none is provided, the AWS account ID is used by default.

tests/unit/test_athena_parquet.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -798,10 +798,10 @@ def test_read_sql_query_unload(path, glue_table, glue_database, file_format):
798798
table=glue_table,
799799
database=glue_database,
800800
)
801-
df_out = wr.athena.read_sql_query(
802-
sql=f"SELECT * FROM {glue_database}.{glue_table}",
803-
s3_output=f"{path}unload/",
801+
df_out = wr.athena.read_sql_table(
802+
table=glue_table,
804803
database=glue_database,
804+
s3_output=f"{path}unload/",
805805
ctas_approach=False,
806806
unload_approach=True,
807807
unload_parameters={"file_format": file_format},

0 commit comments

Comments
 (0)