Skip to content

Commit ed87ba6

Browse files
committed
Defining ctas_approach False by default
1 parent eae3f91 commit ed87ba6

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

awswrangler/pandas.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -499,17 +499,17 @@ def read_sql_athena(self,
499499
workgroup: Optional[str] = None,
500500
encryption: Optional[str] = None,
501501
kms_key: Optional[str] = None,
502-
ctas_approach: bool = True,
502+
ctas_approach: bool = False,
503503
procs_cpu_bound: Optional[int] = None,
504504
max_result_size: Optional[int] = None):
505505
"""
506506
Executes any SQL query on AWS Athena and return a Dataframe of the result.
507507
There are two approaches to be defined through ctas_approach parameter:
508-
1 - ctas_approach True (Default):
508+
1 - ctas_approach True (For Huge results):
509509
Wrap the query with a CTAS and then reads the table data as parquet directly from s3.
510510
PROS: Faster and has a better handle of nested types
511-
CONS: Can't use max_result_size.
512-
2 - ctas_approach False:
511+
CONS: Can't use max_result_size and must have create and drop table permissions
512+
2 - ctas_approach False (Default):
513513
Does a regular query on Athena and parse the regular CSV result on s3
514514
PROS: Accepts max_result_size.
515515
CONS: Slower (But stills faster than other libraries that uses the Athena API) and does not handle nested types so well

testing/test_awswrangler/test_pandas.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def test_etl_complex_ctas(session, bucket, database):
434434
mode="overwrite",
435435
procs_cpu_bound=1)
436436
sleep(1)
437-
df = session.pandas.read_sql_athena(sql="select * from test", database=database)
437+
df = session.pandas.read_sql_athena(ctas_approach=True, sql="select * from test", database=database)
438438
for row in df.itertuples():
439439
assert isinstance(row.my_timestamp, datetime)
440440
assert isinstance(row.my_date, date)
@@ -731,7 +731,7 @@ def test_to_parquet_with_cast_null(
731731
assert len(list(dataframe.columns)) == len(list(dataframe2.columns))
732732

733733

734-
def test_read_sql_athena_with_time_zone(session, bucket, database):
734+
def test_read_sql_athena_with_time_zone(session, database):
735735
query = "select current_timestamp as value, typeof(current_timestamp) as type"
736736
dataframe = session.pandas.read_sql_athena(ctas_approach=False, sql=query, database=database)
737737
assert len(dataframe.index) == 1
@@ -1507,6 +1507,6 @@ def test_read_sql_athena_ctas(session, bucket, database):
15071507
preserve_index=False,
15081508
procs_cpu_bound=4,
15091509
partition_cols=["partition"])
1510-
df2 = session.pandas.read_sql_athena(sql="select * from test", database=database)
1510+
df2 = session.pandas.read_sql_athena(ctas_approach=True, sql="select * from test", database=database)
15111511
assert len(list(df.columns)) == len(list(df2.columns))
15121512
assert len(df.index) == len(df2.index)

0 commit comments

Comments
 (0)