Skip to content

Commit e75dfa9

Browse files
authored
Merge pull request #282 from awslabs/quicksight
QuickSight
2 parents b3837c6 + 7f84c9e commit e75dfa9

File tree

17 files changed

+3341
-53
lines changed

17 files changed

+3341
-53
lines changed

README.md

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,30 @@ df = wr.s3.read_parquet("s3://bucket/dataset/", dataset=True)
4444
df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db")
4545

4646
# Getting Redshift connection (SQLAlchemy) from Glue Catalog Connections
47-
engine = wr.catalog.get_engine("my-redshift-connection")
48-
4947
# Retrieving the data from Amazon Redshift Spectrum
48+
engine = wr.catalog.get_engine("my-redshift-connection")
5049
df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
50+
51+
# Creating QuickSight Data Source and Dataset to reflect our new table
52+
wr.quicksight.create_athena_data_source("athena-source", allowed_to_manage=["username"])
53+
wr.quicksight.create_athena_dataset(
54+
name="my-dataset",
55+
database="my_db",
56+
table="my_table",
57+
data_source_name="athena-source",
58+
allowed_to_manage=["username"]
59+
)
60+
61+
# Getting MySQL connection (SQLAlchemy) from Glue Catalog Connections
62+
# Load the data into MySQL
63+
engine = wr.catalog.get_engine("my-mysql-connection")
64+
wr.db.to_sql(df, engine, schema="test", name="my_table")
65+
66+
# Getting PostgreSQL connection (SQLAlchemy) from Glue Catalog Connections
67+
# Load the data into PostgreSQL
68+
engine = wr.catalog.get_engine("my-postgresql-connection")
69+
wr.db.to_sql(df, engine, schema="test", name="my_table")
70+
5171
```
5272

5373
## [Read The Docs](https://aws-data-wrangler.readthedocs.io/)
@@ -80,13 +100,15 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
80100
- [015 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/015%20-%20EMR.ipynb)
81101
- [016 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/016%20-%20EMR%20%26%20Docker.ipynb)
82102
- [017 - Partition Projection](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/017%20-%20Partition%20Projection.ipynb)
103+
- [018 - QuickSight](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/018%20-%20QuickSight.ipynb)
83104
- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html)
84105
- [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-s3)
85106
- [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#aws-glue-catalog)
86107
- [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-athena)
87108
- [Databases (Redshift, PostgreSQL, MySQL)](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#databases-redshift-postgresql-mysql)
88109
- [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#emr-cluster)
89110
- [CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#cloudwatch-logs)
111+
- [QuickSight](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#quicksight)
90112
- [**License**](https://github.com/awslabs/aws-data-wrangler/blob/master/LICENSE)
91113
- [**Contributing**](https://github.com/awslabs/aws-data-wrangler/blob/master/CONTRIBUTING.md)
92114
- [**Legacy Docs** (pre-1.0.0)](https://aws-data-wrangler.readthedocs.io/en/legacy/)

awswrangler/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import logging
99

10-
from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, s3 # noqa
10+
from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, quicksight, s3 # noqa
1111
from awswrangler.__metadata__ import __description__, __license__, __title__, __version__ # noqa
1212
from awswrangler._utils import get_account_id # noqa
1313

awswrangler/_data_types.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,34 @@ def athena2redshift( # pylint: disable=too-many-branches,too-many-return-statem
114114
raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover
115115

116116

117+
def athena2quicksight(dtype: str) -> str: # pylint: disable=too-many-branches,too-many-return-statements
118+
"""Athena to Quicksight data types conversion."""
119+
dtype = dtype.lower()
120+
if dtype == "smallint":
121+
return "INTEGER"
122+
if dtype in ("int", "integer"):
123+
return "INTEGER"
124+
if dtype == "bigint":
125+
return "INTEGER"
126+
if dtype == "float":
127+
return "DECIMAL"
128+
if dtype == "double":
129+
return "DECIMAL"
130+
if dtype in ("boolean", "bool"):
131+
return "BOOLEAN"
132+
if dtype in ("string", "char", "varchar"):
133+
return "STRING"
134+
if dtype == "timestamp":
135+
return "DATETIME"
136+
if dtype == "date":
137+
return "DATETIME"
138+
if dtype.startswith("decimal"):
139+
return "DECIMAL"
140+
if dtype in ("binary" or "varbinary"):
141+
return "BIT"
142+
raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover
143+
144+
117145
def pyarrow2athena(dtype: pa.DataType) -> str: # pylint: disable=too-many-branches,too-many-return-statements
118146
"""Pyarrow to Athena data types conversion."""
119147
if pa.types.is_int8(dtype):

awswrangler/catalog.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,9 @@ def get_table_types(
439439
dtypes: Dict[str, str] = {}
440440
for col in response["Table"]["StorageDescriptor"]["Columns"]:
441441
dtypes[col["Name"]] = col["Type"]
442-
for par in response["Table"]["PartitionKeys"]:
443-
dtypes[par["Name"]] = par["Type"]
442+
if "PartitionKeys" in response["Table"]:
443+
for par in response["Table"]["PartitionKeys"]:
444+
dtypes[par["Name"]] = par["Type"]
444445
return dtypes
445446

446447

@@ -527,6 +528,11 @@ def get_tables(
527528
) -> Iterator[Dict[str, Any]]:
528529
"""Get an iterator of tables.
529530
531+
Note
532+
----
533+
Please, does not filter using name_contains and name_prefix/name_suffix at the same time.
534+
Only name_prefix and name_suffix can be combined together.
535+
530536
Parameters
531537
----------
532538
catalog_id : str, optional
@@ -560,15 +566,17 @@ def get_tables(
560566
if catalog_id is not None:
561567
args["CatalogId"] = catalog_id
562568
if (name_prefix is not None) and (name_suffix is not None) and (name_contains is not None):
563-
args["Expression"] = f"{name_prefix}.*{name_contains}.*{name_suffix}"
569+
raise exceptions.InvalidArgumentCombination("Please, does not filter using name_contains and "
570+
"name_prefix/name_suffix at the same time. Only "
571+
"name_prefix and name_suffix can be combined together.")
564572
elif (name_prefix is not None) and (name_suffix is not None):
565-
args["Expression"] = f"{name_prefix}.*{name_suffix}"
573+
args["Expression"] = f"{name_prefix}*{name_suffix}"
566574
elif name_contains is not None:
567-
args["Expression"] = f".*{name_contains}.*"
575+
args["Expression"] = f"*{name_contains}*"
568576
elif name_prefix is not None:
569-
args["Expression"] = f"{name_prefix}.*"
577+
args["Expression"] = f"{name_prefix}*"
570578
elif name_suffix is not None:
571-
args["Expression"] = f".*{name_suffix}"
579+
args["Expression"] = f"*{name_suffix}"
572580
if database is not None:
573581
dbs: List[str] = [database]
574582
else:
@@ -647,15 +655,21 @@ def tables(
647655
tbls = tbls[:limit]
648656

649657
df_dict: Dict[str, List] = {"Database": [], "Table": [], "Description": [], "Columns": [], "Partitions": []}
650-
for table in tbls:
651-
df_dict["Database"].append(table["DatabaseName"])
652-
df_dict["Table"].append(table["Name"])
653-
if "Description" in table:
654-
df_dict["Description"].append(table["Description"])
658+
for tbl in tbls:
659+
df_dict["Database"].append(tbl["DatabaseName"])
660+
df_dict["Table"].append(tbl["Name"])
661+
if "Description" in tbl:
662+
df_dict["Description"].append(tbl["Description"])
655663
else:
656664
df_dict["Description"].append("")
657-
df_dict["Columns"].append(", ".join([x["Name"] for x in table["StorageDescriptor"]["Columns"]]))
658-
df_dict["Partitions"].append(", ".join([x["Name"] for x in table["PartitionKeys"]]))
665+
if "Columns" in tbl["StorageDescriptor"]:
666+
df_dict["Columns"].append(", ".join([x["Name"] for x in tbl["StorageDescriptor"]["Columns"]]))
667+
else:
668+
df_dict["Columns"].append("")
669+
if "PartitionKeys" in tbl:
670+
df_dict["Partitions"].append(", ".join([x["Name"] for x in tbl["PartitionKeys"]]))
671+
else:
672+
df_dict["Partitions"].append("")
659673
return pd.DataFrame(data=df_dict)
660674

661675

@@ -771,14 +785,15 @@ def table(
771785
df_dict["Comment"].append(col["Comment"])
772786
else:
773787
df_dict["Comment"].append("")
774-
for col in tbl["PartitionKeys"]:
775-
df_dict["Column Name"].append(col["Name"])
776-
df_dict["Type"].append(col["Type"])
777-
df_dict["Partition"].append(True)
778-
if "Comment" in col:
779-
df_dict["Comment"].append(col["Comment"])
780-
else:
781-
df_dict["Comment"].append("")
788+
if "PartitionKeys" in tbl:
789+
for col in tbl["PartitionKeys"]:
790+
df_dict["Column Name"].append(col["Name"])
791+
df_dict["Type"].append(col["Type"])
792+
df_dict["Partition"].append(True)
793+
if "Comment" in col:
794+
df_dict["Comment"].append(col["Comment"])
795+
else:
796+
df_dict["Comment"].append("")
782797
return pd.DataFrame(data=df_dict)
783798

784799

@@ -1692,8 +1707,9 @@ def get_columns_comments(
16921707
comments: Dict[str, str] = {}
16931708
for c in response["Table"]["StorageDescriptor"]["Columns"]:
16941709
comments[c["Name"]] = c["Comment"]
1695-
for p in response["Table"]["PartitionKeys"]:
1696-
comments[p["Name"]] = p["Comment"]
1710+
if "PartitionKeys" in response["Table"]:
1711+
for p in response["Table"]["PartitionKeys"]:
1712+
comments[p["Name"]] = p["Comment"]
16971713
return comments
16981714

16991715

awswrangler/quicksight/__init__.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""Amazon QuickSight Module."""
2+
3+
from awswrangler.quicksight._cancel import cancel_ingestion # noqa
4+
from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion # noqa
5+
from awswrangler.quicksight._delete import ( # noqa
6+
delete_all_dashboards,
7+
delete_all_data_sources,
8+
delete_all_datasets,
9+
delete_all_templates,
10+
delete_dashboard,
11+
delete_data_source,
12+
delete_dataset,
13+
delete_template,
14+
)
15+
from awswrangler.quicksight._describe import ( # noqa
16+
describe_dashboard,
17+
describe_data_source,
18+
describe_data_source_permissions,
19+
describe_dataset,
20+
describe_ingestion,
21+
)
22+
from awswrangler.quicksight._get_list import ( # noqa
23+
get_dashboard_id,
24+
get_dashboard_ids,
25+
get_data_source_arn,
26+
get_data_source_arns,
27+
get_data_source_id,
28+
get_data_source_ids,
29+
get_dataset_id,
30+
get_dataset_ids,
31+
get_template_id,
32+
get_template_ids,
33+
list_dashboards,
34+
list_data_sources,
35+
list_datasets,
36+
list_group_memberships,
37+
list_groups,
38+
list_iam_policy_assignments,
39+
list_iam_policy_assignments_for_user,
40+
list_ingestions,
41+
list_templates,
42+
list_users,
43+
)

awswrangler/quicksight/_cancel.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""Amazon QuickSight Cancel Module."""
2+
3+
import logging
4+
from typing import Optional
5+
6+
import boto3 # type: ignore
7+
8+
from awswrangler import _utils, exceptions
9+
from awswrangler.quicksight._get_list import get_dataset_id
10+
11+
_logger: logging.Logger = logging.getLogger(__name__)
12+
13+
14+
def cancel_ingestion(
15+
ingestion_id: str = None,
16+
dataset_name: Optional[str] = None,
17+
dataset_id: Optional[str] = None,
18+
account_id: Optional[str] = None,
19+
boto3_session: Optional[boto3.Session] = None,
20+
) -> None:
21+
"""Cancel an ongoing ingestion of data into SPICE.
22+
23+
Note
24+
----
25+
You must pass a not None value for ``dataset_name`` or ``dataset_id`` argument.
26+
27+
Parameters
28+
----------
29+
ingestion_id : str
30+
Ingestion ID.
31+
dataset_name : str, optional
32+
Dataset name.
33+
dataset_id : str, optional
34+
Dataset ID.
35+
account_id : str, optional
36+
If None, the account ID will be inferred from your boto3 session.
37+
boto3_session : boto3.Session(), optional
38+
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
39+
40+
Returns
41+
-------
42+
None
43+
None.
44+
45+
Examples
46+
--------
47+
>>> import awswrangler as wr
48+
>>> wr.quicksight.cancel_ingestion(ingestion_id="...", dataset_name="...")
49+
"""
50+
if (dataset_name is None) and (dataset_id is None):
51+
raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.")
52+
session: boto3.Session = _utils.ensure_session(session=boto3_session)
53+
if account_id is None:
54+
account_id = _utils.get_account_id(boto3_session=session)
55+
if (dataset_id is None) and (dataset_name is not None):
56+
dataset_id = get_dataset_id(name=dataset_name, account_id=account_id, boto3_session=session)
57+
client: boto3.client = _utils.client(service_name="quicksight", session=session)
58+
client.cancel_ingestion(IngestionId=ingestion_id, AwsAccountId=account_id, DataSetId=dataset_id)

0 commit comments

Comments
 (0)