Skip to content

Commit 01b7bef

Browse files
authored
Merge pull request #284 from awslabs/dev
Bumping version to 1.5.0
2 parents 0dcda71 + 35f1675 commit 01b7bef

39 files changed

+6793
-3106
lines changed

README.md

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33

44
![AWS Data Wrangler](docs/source/_static/logo2.png?raw=true "AWS Data Wrangler")
55

6-
[![Release](https://img.shields.io/badge/release-1.4.0-brightgreen.svg)](https://pypi.org/project/awswrangler/)
6+
[![Release](https://img.shields.io/badge/release-1.5.0-brightgreen.svg)](https://pypi.org/project/awswrangler/)
77
[![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8-brightgreen.svg)](https://anaconda.org/conda-forge/awswrangler)
88
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
99
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
1010

1111
[![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
12-
[![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen.svg)](https://pypi.org/project/awswrangler/)
12+
[![Coverage](https://img.shields.io/badge/coverage-90%25-brightgreen.svg)](https://pypi.org/project/awswrangler/)
1313
![Static Checking](https://github.com/awslabs/aws-data-wrangler/workflows/Static%20Checking/badge.svg?branch=master)
1414
[![Documentation Status](https://readthedocs.org/projects/aws-data-wrangler/badge/?version=latest)](https://aws-data-wrangler.readthedocs.io/?badge=latest)
1515

@@ -43,11 +43,27 @@ df = wr.s3.read_parquet("s3://bucket/dataset/", dataset=True)
4343
# Retrieving the data from Amazon Athena
4444
df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db")
4545

46-
# Getting Redshift connection (SQLAlchemy) from Glue Catalog Connections
46+
# Get Redshift connection (SQLAlchemy) from Glue and retrieving data from Redshift Spectrum
4747
engine = wr.catalog.get_engine("my-redshift-connection")
48-
49-
# Retrieving the data from Amazon Redshift Spectrum
5048
df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
49+
50+
# Creating QuickSight Data Source and Dataset to reflect our new table
51+
wr.quicksight.create_athena_data_source("athena-source", allowed_to_manage=["username"])
52+
wr.quicksight.create_athena_dataset(
53+
name="my-dataset",
54+
database="my_db",
55+
table="my_table",
56+
data_source_name="athena-source",
57+
allowed_to_manage=["username"]
58+
)
59+
60+
# Get MySQL connection (SQLAlchemy) from Glue Catalog and LOAD the data into MySQL
61+
engine = wr.catalog.get_engine("my-mysql-connection")
62+
wr.db.to_sql(df, engine, schema="test", name="my_table")
63+
64+
# Get PostgreSQL connection (SQLAlchemy) from Glue Catalog and LOAD the data into PostgreSQL
65+
engine = wr.catalog.get_engine("my-postgresql-connection")
66+
wr.db.to_sql(df, engine, schema="test", name="my_table")
5167
```
5268

5369
## [Read The Docs](https://aws-data-wrangler.readthedocs.io/)
@@ -80,13 +96,15 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
8096
- [015 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/015%20-%20EMR.ipynb)
8197
- [016 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/016%20-%20EMR%20%26%20Docker.ipynb)
8298
- [017 - Partition Projection](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/017%20-%20Partition%20Projection.ipynb)
99+
- [018 - QuickSight](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/018%20-%20QuickSight.ipynb)
83100
- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html)
84101
- [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-s3)
85102
- [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#aws-glue-catalog)
86103
- [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-athena)
87104
- [Databases (Redshift, PostgreSQL, MySQL)](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#databases-redshift-postgresql-mysql)
88105
- [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#emr-cluster)
89106
- [CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#cloudwatch-logs)
107+
- [QuickSight](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#quicksight)
90108
- [**License**](https://github.com/awslabs/aws-data-wrangler/blob/master/LICENSE)
91109
- [**Contributing**](https://github.com/awslabs/aws-data-wrangler/blob/master/CONTRIBUTING.md)
92110
- [**Legacy Docs** (pre-1.0.0)](https://aws-data-wrangler.readthedocs.io/en/legacy/)

awswrangler/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
66
"""
77

8-
import logging
8+
import logging as _logging
99

10-
from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, s3 # noqa
10+
from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, quicksight, s3 # noqa
1111
from awswrangler.__metadata__ import __description__, __license__, __title__, __version__ # noqa
1212
from awswrangler._utils import get_account_id # noqa
1313

14-
logging.getLogger("awswrangler").addHandler(logging.NullHandler())
14+
_logging.getLogger("awswrangler").addHandler(_logging.NullHandler())

awswrangler/__metadata__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77

88
__title__ = "awswrangler"
99
__description__ = "Pandas on AWS."
10-
__version__ = "1.4.0"
10+
__version__ = "1.5.0"
1111
__license__ = "Apache License 2.0"

awswrangler/_data_types.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,34 @@ def athena2redshift( # pylint: disable=too-many-branches,too-many-return-statem
114114
raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover
115115

116116

117+
def athena2quicksight(dtype: str) -> str: # pylint: disable=too-many-branches,too-many-return-statements
118+
"""Athena to Quicksight data types conversion."""
119+
dtype = dtype.lower()
120+
if dtype == "smallint":
121+
return "INTEGER"
122+
if dtype in ("int", "integer"):
123+
return "INTEGER"
124+
if dtype == "bigint":
125+
return "INTEGER"
126+
if dtype == "float":
127+
return "DECIMAL"
128+
if dtype == "double":
129+
return "DECIMAL"
130+
if dtype in ("boolean", "bool"):
131+
return "BOOLEAN"
132+
if dtype in ("string", "char", "varchar"):
133+
return "STRING"
134+
if dtype == "timestamp":
135+
return "DATETIME"
136+
if dtype == "date":
137+
return "DATETIME"
138+
if dtype.startswith("decimal"):
139+
return "DECIMAL"
140+
if dtype in ("binary" or "varbinary"):
141+
return "BIT"
142+
raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover
143+
144+
117145
def pyarrow2athena(dtype: pa.DataType) -> str: # pylint: disable=too-many-branches,too-many-return-statements
118146
"""Pyarrow to Athena data types conversion."""
119147
if pa.types.is_int8(dtype):

awswrangler/_utils.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""Internal (private) Utilities Module."""
22

3+
import copy
34
import logging
45
import math
56
import os
67
import random
7-
from typing import Any, Dict, Generator, List, Optional, Tuple
8+
from typing import Any, Dict, Generator, List, Optional, Tuple, Union
89

910
import boto3 # type: ignore
1011
import botocore.config # type: ignore
@@ -17,8 +18,10 @@
1718
_logger: logging.Logger = logging.getLogger(__name__)
1819

1920

20-
def ensure_session(session: Optional[boto3.Session] = None) -> boto3.Session:
21+
def ensure_session(session: Optional[Union[boto3.Session, Dict[str, Optional[str]]]] = None) -> boto3.Session:
2122
"""Ensure that a valid boto3.Session will be returned."""
23+
if isinstance(session, dict): # Primitives received
24+
return boto3_from_primitives(primitives=session)
2225
if session is not None:
2326
return session
2427
# Ensure the boto3's default session is used so that its parameters can be
@@ -28,6 +31,30 @@ def ensure_session(session: Optional[boto3.Session] = None) -> boto3.Session:
2831
return boto3.Session() # pragma: no cover
2932

3033

34+
def boto3_to_primitives(boto3_session: Optional[boto3.Session] = None) -> Dict[str, Optional[str]]:
35+
"""Convert Boto3 Session to Python primitives."""
36+
_boto3_session: boto3.Session = ensure_session(session=boto3_session)
37+
credentials = _boto3_session.get_credentials()
38+
return {
39+
"aws_access_key_id": getattr(credentials, "access_key", None),
40+
"aws_secret_access_key": getattr(credentials, "secret_key", None),
41+
"aws_session_token": getattr(credentials, "token", None),
42+
"region_name": _boto3_session.region_name,
43+
"profile_name": _boto3_session.profile_name,
44+
}
45+
46+
47+
def boto3_from_primitives(primitives: Dict[str, Optional[str]] = None) -> boto3.Session:
48+
"""Convert Python primitives to Boto3 Session."""
49+
if primitives is None:
50+
return boto3.DEFAULT_SESSION # pragma: no cover
51+
_primitives: Dict[str, Optional[str]] = copy.deepcopy(primitives)
52+
profile_name: Optional[str] = _primitives.get("profile_name", None)
53+
_primitives["profile_name"] = None if profile_name in (None, "default") else profile_name
54+
args: Dict[str, str] = {k: v for k, v in _primitives.items() if v is not None}
55+
return boto3.Session(**args)
56+
57+
3158
def client(service_name: str, session: Optional[boto3.Session] = None) -> boto3.client:
3259
"""Create a valid boto3.client."""
3360
return ensure_session(session=session).client(
@@ -63,6 +90,8 @@ def parse_path(path: str) -> Tuple[str, str]:
6390
>>> bucket, key = parse_path('s3://bucket/key')
6491
6592
"""
93+
if path.startswith("s3://") is False:
94+
raise exceptions.InvalidArgumentValue(f"'{path}' is not a valid path. It MUST start with 's3://'")
6695
parts = path.replace("s3://", "").split("/", 1)
6796
bucket: str = parts[0]
6897
key: str = ""
@@ -139,7 +168,8 @@ def chunkify(lst: List[Any], num_chunks: int = 1, max_length: Optional[int] = No
139168

140169

141170
def get_fs(
142-
session: Optional[boto3.Session] = None, s3_additional_kwargs: Optional[Dict[str, str]] = None
171+
session: Optional[Union[boto3.Session, Dict[str, Optional[str]]]] = None,
172+
s3_additional_kwargs: Optional[Dict[str, str]] = None,
143173
) -> s3fs.S3FileSystem:
144174
"""Build a S3FileSystem from a given boto3 session."""
145175
fs: s3fs.S3FileSystem = s3fs.S3FileSystem(

0 commit comments

Comments
 (0)