Skip to content

Commit b8335e0

Browse files
committed
Handling and documenting ctas_approach for custom data sources. #392
1 parent 35c8c03 commit b8335e0

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

awswrangler/athena/_read.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ def read_sql_query(
561561
- Does not support columns with repeated names.
562562
- Does not support columns with undefined data types.
563563
- A temporary table will be created and then deleted immediately.
564+
- Does not support custom data_source/catalog_id.
564565
565566
**2** - ctas_approach=False:
566567
@@ -571,6 +572,7 @@ def read_sql_query(
571572
- Faster for small result sizes (less latency).
572573
- Does not require create/delete table permissions on Glue
573574
- Supports timestamp with time zone.
575+
- Support custom data_source/catalog_id.
574576
575577
CONS:
576578
@@ -685,6 +687,12 @@ def read_sql_query(
685687
>>> scanned_bytes = df.query_metadata["Statistics"]["DataScannedInBytes"]
686688
687689
"""
690+
if ctas_approach and data_source not in (None, "AwsDataCatalog"):
691+
raise exceptions.InvalidArgumentCombination("Queries with ctas_approach=True (default) does not support "
692+
"data_source values different than None and 'AwsDataCatalog'. "
693+
"Please check the related tutorial for more details "
694+
"(https://github.com/awslabs/aws-data-wrangler/blob/master/"
695+
"tutorials/006%20-%20Amazon%20Athena.ipynb)")
688696
session: boto3.Session = _utils.ensure_session(session=boto3_session)
689697

690698
cache_info: _CacheInfo = _check_for_cached_results(

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,6 @@ sphinx==3.2.1
1818
sphinx_bootstrap_theme==0.7.1
1919
moto==1.3.16
2020
jupyterlab==2.2.8
21+
jupyter==1.0.0
2122
s3fs==0.4.2
2223
-e .

tests/test_athena.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,3 +788,9 @@ def test_athena_nan_inf(glue_database, ctas_approach, data_source):
788788
assert df.inf.iloc[0] == np.PINF
789789
assert df.inf_n.iloc[0] == np.NINF
790790
assert df.regular.iloc[0] == 1.2
791+
792+
793+
def test_athena_ctas_data_source(glue_database):
794+
sql = "SELECT nan() AS nan, infinity() as inf, -infinity() as inf_n, 1.2 as regular"
795+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
796+
wr.athena.read_sql_query(sql, glue_database, True, data_source="foo")

tutorials/006 - Amazon Athena.ipynb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
" - Does not support columns with repeated names.\n",
2424
" - Does not support columns with undefined data types.\n",
2525
" - A temporary table will be created and then deleted immediately.\n",
26+
" - Does not support custom data_source/catalog_id.\n",
2627
"\n",
2728
"\n",
2829
"- **ctas_approach=False**\n",
@@ -33,6 +34,7 @@
3334
" - Faster for small result sizes (less latency).\n",
3435
" - Does not require create/delete table permissions on Glue\n",
3536
" - Supports timestamp with time zone.\n",
37+
" - Support custom data_source/catalog_id.\n",
3638
" * `CONS`:\n",
3739
" - Slower (But stills faster than other libraries that uses the regular Athena API)\n",
3840
" - Does not handle nested types at all."
@@ -1187,4 +1189,4 @@
11871189
},
11881190
"nbformat": 4,
11891191
"nbformat_minor": 4
1190-
}
1192+
}

0 commit comments

Comments
 (0)