Skip to content

Commit 6066d19

Browse files
committed
Improving pandas to redshift implementation and documentation.
1 parent b7160f9 commit 6066d19

File tree

5 files changed

+37
-17
lines changed

5 files changed

+37
-17
lines changed

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
---
66

7-
*Contents:* **[Use Cases](#Use-Cases)** | **[Installation](#Installation)** | **[Examples](#Examples)**
7+
*Contents:* **[Use Cases](#Use-Cases)** | **[Installation](#Installation)** | **[Examples](#Examples)** | **[Diving Deep](#Diving Deep)**
88

99
---
1010

@@ -95,3 +95,9 @@ session.spark.to_redshift(
9595
mode="append",
9696
)
9797
```
98+
99+
##Diving Deep
100+
101+
### Pandas to Redshift Flow
102+
103+
![Pandas to Redshift Flow](docs/pandas_to_redshift/pandas-to-redshift-flow.jpg?raw=true "Pandas to Redshift Flow")

awswrangler/pandas.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -763,15 +763,29 @@ def to_redshift(
763763
preserve_index=False,
764764
mode="append",
765765
):
766+
"""
767+
Load Pandas Dataframe as a Table on Amazon Redshift
768+
:param dataframe: Pandas Dataframe
769+
:param path: S3 path to write temporary files (E.g. s3://BUCKET_NAME/ANY_NAME/)
770+
:param connection: A PEP 249 compatible connection (Can be generated with Redshift.generate_connection())
771+
:param schema: The Redshift Schema for the table
772+
:param table: The name of the desired Redshift table
773+
:param iam_role: AWS IAM role with the related permissions
774+
:param preserve_index: Should we preserve the Dataframe index?
775+
:param mode: append or overwrite
776+
:return: None
777+
"""
778+
if path[-1] != "/":
779+
path += "/"
766780
self._session.s3.delete_objects(path=path)
767-
num_slices = self._session.redshift.get_number_of_slices(
768-
redshift_conn=connection)
769-
logger.debug(f"Number of slices on Redshift: {num_slices}")
770781
num_rows = len(dataframe.index)
771782
logger.info(f"Number of rows: {num_rows}")
772783
if num_rows < MIN_NUMBER_OF_ROWS_TO_DISTRIBUTE:
773784
num_partitions = 1
774785
else:
786+
num_slices = self._session.redshift.get_number_of_slices(
787+
redshift_conn=connection)
788+
logger.debug(f"Number of slices on Redshift: {num_slices}")
775789
num_partitions = num_slices
776790
logger.debug(f"Number of partitions calculated: {num_partitions}")
777791
objects_paths = self.to_parquet(
@@ -781,8 +795,6 @@ def to_redshift(
781795
mode="append",
782796
procs_cpu_bound=num_partitions,
783797
)
784-
if path[-1] != "/":
785-
path += "/"
786798
manifest_path = f"{path}manifest.json"
787799
self._session.redshift.write_load_manifest(manifest_path=manifest_path,
788800
objects_paths=objects_paths)

awswrangler/redshift.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,17 @@ def load_table(
9494
if mode == "overwrite":
9595
cursor.execute("-- AWS DATA WRANGLER\n"
9696
f"DROP TABLE IF EXISTS {schema_name}.{table_name}")
97-
schema = Redshift._get_redshift_schema(
98-
dataframe=dataframe,
99-
dataframe_type=dataframe_type,
100-
preserve_index=preserve_index,
101-
)
102-
cols_str = "".join([f"{col[0]} {col[1]},\n" for col in schema])[:-2]
103-
sql = (
104-
"-- AWS DATA WRANGLER\n"
105-
f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} (\n{cols_str}"
106-
") DISTSTYLE AUTO")
107-
cursor.execute(sql)
97+
schema = Redshift._get_redshift_schema(
98+
dataframe=dataframe,
99+
dataframe_type=dataframe_type,
100+
preserve_index=preserve_index,
101+
)
102+
cols_str = "".join([f"{col[0]} {col[1]},\n" for col in schema])[:-2]
103+
sql = (
104+
"-- AWS DATA WRANGLER\n"
105+
f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} (\n{cols_str}"
106+
") DISTSTYLE AUTO")
107+
cursor.execute(sql)
108108
sql = ("-- AWS DATA WRANGLER\n"
109109
f"COPY {schema_name}.{table_name} FROM '{manifest_path}'\n"
110110
f"IAM_ROLE '{iam_role}'\n"
103 KB
Loading
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<mxfile modified="2019-08-10T18:02:27.726Z" host="www.draw.io" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36" etag="BfaHWruCsNSngJoDnESY" version="11.1.4" type="google"><diagram id="uhJXyVCDerINJ9iByMH9" name="Page-1">7Vxbc5u6Gv01mTnnwRkkAcaPSZzsZqZtMnE6bZ8yMsg2LUYuyE18fv2RhMRNOKWNAWc3fWiMEEJIa32XJcEJulg//ZPgzeoDDUh0Aq3g6QRNTyAEACD+R5TsspKxC7OCZRIGqlJRMAv/R1ShpUq3YUDSSkVGacTCTbXQp3FMfFYpw0lCH6vVFjSq3nWDl8QomPk4Mks/hwFbZaUeHBfl70i4XOk7A3eSnVljXVk9SbrCAX0sFaHLE3SRUMqyX+unCxKJwdPjkl13teds3rGExKzNBfG7W7DF4Ob22yd4m358N12whxFQs5GynX5iEvABUIcxjfmf84Ru44CIdix+RBO2oksa4+g9pRteCHjhN8LYTk0f3jLKi1ZsHamzvIvJ7ou4/tTRh19Vc/Jg+lQ52qmjlOGEnYlpLDojy67CKFJ1sgcQvd47MKoopdvEJ8+Mhq0AhpMlYc+NGsjnjwOf0DXhneYXJiTCLPxZ7QhWCFzm9YpJ4j/UPP3GnKle/sTRVt3pH95baMXb9Zwk/AddiIeNQp8TB7oRf5LzOS93l+JXGPOTdyRIV+FCXHURbVMmLqvBoDrnj6uQkdkGy+F75FSvzq/qEUkYeXp+FsxBUxe4UGBDXqRMBdJUeiyIBzSbViXSuVZHIz3uhRw9g9xpCXJ3SIw7BsanJCKMiEHgMNuxVRgvRSNxyv2DeJwVkZ3zCe9bYMKekfWGJlg+wgzJoeOYGhr00KlCnnsWA/Jw3AB5ZHc08K795hBqHGjBFW9IrrgGVy749LCcFFW/IEMiQRx9vs6UWxwHWFSZYoYXCV6T4Wmi4a5pMjFpggAwaQInHY25Dk9Lg56PpB5IXbCgcjKK8XN/bKk+MUolRc54BeBsnoqTupWPVLfDO5o1VW2eF5du2Qd3n0L2pfT7a0FjflQQVxwMxNs9fNxVc4vW9FQt3dJQzqSOWMCkHrFManjLDIe6rhyb66Z0RbpYpIQZsMz7/gKker2CwvoNULwSJ+C1dALAeaEXkJfy58K7UoWNQEq6H4VgLwoLIGVtHhRWsDcD+LXIYv7EAnKvw6pQTQi/JZ7LCgIqaoR5bef8xJnyEhyFy5gX+Bwr3HOic+G9Qh9HZ+rEOgwCcf15hOckOsf+96Vk0AWNaCLvixby33P+T0kWqicnuVBQxugzlN7rLUfWqT2xxwexcSNYgVauF3Rvt7y9+NokZA/CFtivAuwdiX4SMXNlWLUAIhRAzMyINc9nd+Rn0yuqJMv5f2T0zieNP6BV+/3fBkfeEIbVYy/+F69F/BSVn644/+H648PHTx/OL+8ebq4e7m4+zx7ubx6m17P7u+vzT/eXjS3mJVdNhQaV9jCsMur1eHBF1/Nt2k8saNo729QJkN2QNAGrK6EA9aMUvBKHOWnpMNGgMtrEMDCfk1AqDGKAoHWLkx9bKawtwkgUbCR7+YCwkIVU5E88U+L/L2UlYSbylEsqDN/JTrOdYH+lGjp9nqMqBzud8XrWZ9GWj8WtYsqykZFtipIoTNkIL7iDGj3mHRfTI+7NkWN0LEyzTqWnwyd0nlWhsO026B5NUp/XFYP1OsW/S+vTOfEvqTgZkom6lyUqVp3lreZck8uUfrxWWL18pqTwo1O47bEJ+34VbtC346q4rcKLHYfjAm218WHpYorjL6QLMEDQNzecelA3PDWg2y81wFFTA8KW1BhEBHE9UEEPtFFt/ruQQKDBw7MgSGUaWCR/V5XUs5DdLSKCxzoTJX+zpaksfjw6nwWAPTQxnX6IqRXNXMX8WjrzqhVNiHoi88vm2czQ3iTGniRGuGfmC4lRQeil+qKh5uil/+4VRvgW+/6JTcjc3mBGYb8ufFQLr/9GmzD+lU0AjluNFQ5iI+oRSJ4A9WAi0F60pRupy5ngkkHZKM2iMgGvmCZrHJkAm1Lhdqw9O5PqkeFK7snAvxNdoqsSVLP+HoOAP6mbfK9hn5/XpN+DzvT7nhbJX0d4iNqqhggN6QqQKRtmG2EzBkj7rjRxySuRW9VZheVthH7OiKgoFPrhlUIE6xRx2u6FRV3teLJNkehvpkjbaAkNmkIh03/pRa6MJGschwuSstNvqSRKwZbHUIoQdYLQ+TcePEhXJBe/eDNhop9HbLyVvBueQtCzaxRytaBXXmMCfVLIsYxh+Zsp1FZstwdV25Gptv8tFHJrBPLGJoGcBgLZXYVpdk/K3+vI2O22krxtD0kg29yf3lHGfsPhrrZCvCXuBm2e3S9oa7H1hbk6dIxkHVbb6C5Zd3qThs42m9LqzdHsy38dfr+12XL2CI0lvWliVQWnEXrh1v7uUaofv/y6zoqU9o59uJlemr6/R4EGwermrPGkYcWvaX+l3Vnu+SbPGO68jd/fI9j25PfN95Sndze3vOT+7Pw9h7h1fcX/u/xyPbufDR7sulYV9J7lak9Whr3b9JJZV+9iOj0vUB33DhT9RsMf+42eYD827TuNBTbzVDB/wzLATAj6bLfJVgLEpt3Sq/m101m26CcEl7LPLBxUWWdx7cBsctz6oo3nOg18gk1uBHWVPjpvEqYhq7RxI8PyydwFMhODolfDWILjFPtqR/5x0aCuoIztlgoK6CqSct8oYHiKNi5lUAlSd7NEgfcUB7kPUItWRwf+ugLvjcHQ8Idv8D8gqps1IDSphtITt7ZfY89b8QfTgUyPUV4X/rEl8imvp4NTBOn3BvVIgYZN71ri6IcgPW96P26CuG1Xed1BV3nH/Vi1QQb/4NbJralbwLL6NU9uH3vKKpYu4sGCXFds3u1ypNvEjPAh9yO/Ch86e0nUfdMhDaPXxjoOqkO65gJ+9i25NGdEJYAuvcFd3pp5HN+LM0jBwwOvSVjpN6w2Na87wraJ3Amx9X2SpottFL3wdSY+S1FpwTRwiBfYEuIJ/U5KZzw4RzyQOVCIBqvjDYCZxAC9I7mf0Tbj2/uVIH2R/In08JIbgkRw/cknm0weOeDoLzyf+H7T6M89x3asw4y++frZZNwEd6A3upUnwEFdxVv9iCg9m/Ox1TwVB84GPa/XaEt/PLMsx0uj//os0RjWP3zT8BXEBkMEfz8c4ofFh6mzqSg+740u/w8=</diagram></mxfile>

0 commit comments

Comments
 (0)