Skip to content

Commit 60c3377

Browse files
committed
Add tests to VARCHAR lengths for Redshift and Aurora tables
1 parent a4e00dc commit 60c3377

File tree

3 files changed

+113
-1
lines changed

3 files changed

+113
-1
lines changed

awswrangler/redshift.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,12 +409,13 @@ def _get_redshift_schema(dataframe,
409409
redshift_type = data_types.pyarrow2redshift(dtype=dtype, varchar_length=varchar_len)
410410
schema_built.append((name, redshift_type))
411411
elif dataframe_type.lower() == "spark":
412+
logger.debug(f"cast_columns.keys: {cast_columns.keys()}")
412413
for name, dtype in dataframe.dtypes:
413414
varchar_len = varchar_lengths.get(name, varchar_default_length)
414415
if name in cast_columns.keys():
415416
redshift_type = data_types.athena2redshift(dtype=cast_columns[name], varchar_length=varchar_len)
416417
else:
417-
redshift_type = data_types.spark2redshift(dtype=cast_columns[name], varchar_length=varchar_len)
418+
redshift_type = data_types.spark2redshift(dtype=dtype, varchar_length=varchar_len)
418419
schema_built.append((name, redshift_type))
419420
else:
420421
raise InvalidDataframeType(

testing/test_awswrangler/test_pandas.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2365,3 +2365,68 @@ def test_s3_overall_nan(bucket, database):
23652365
ctas_approach=True)
23662366
wr.s3.delete_objects(path=path)
23672367
assert df.equals(df2)
2368+
2369+
2370+
def test_aurora_postgres_load_varchar(bucket, postgres_parameters):
2371+
df = pd.DataFrame({"id": [1, 2, 3], "varchar3": ["foo", "boo", "bar"], "varchar1": ["a", "b", "c"]})
2372+
path = f"s3://{bucket}/test_aurora_postgres_load_varchar"
2373+
wr.pandas.to_aurora(dataframe=df,
2374+
connection="aws-data-wrangler-postgres",
2375+
schema="public",
2376+
table="test_aurora_postgres_load_varchar",
2377+
mode="overwrite",
2378+
temp_s3_path=path,
2379+
engine="postgres",
2380+
preserve_index=False,
2381+
varchar_default_length=3,
2382+
varchar_lengths={"varchar1": 1})
2383+
conn = Aurora.generate_connection(database="postgres",
2384+
host=postgres_parameters["PostgresAddress"],
2385+
port=3306,
2386+
user="test",
2387+
password=postgres_parameters["DatabasesPassword"],
2388+
engine="postgres")
2389+
with conn.cursor() as cursor:
2390+
cursor.execute("SELECT * FROM public.test_aurora_postgres_load_varchar")
2391+
rows = cursor.fetchall()
2392+
assert len(rows) == len(df.index)
2393+
assert rows[0][0] == 1
2394+
assert rows[1][0] == 2
2395+
assert rows[2][0] == 3
2396+
assert rows[0][1] == "foo"
2397+
assert rows[1][1] == "boo"
2398+
assert rows[2][1] == "bar"
2399+
assert rows[0][2] == "a"
2400+
assert rows[1][2] == "b"
2401+
assert rows[2][2] == "c"
2402+
conn.close()
2403+
2404+
2405+
def test_aurora_mysql_load_varchar(bucket):
2406+
df = pd.DataFrame({"id": [1, 2, 3], "varchar3": ["foo", "boo", "bar"], "varchar1": ["a", "b", "c"]})
2407+
path = f"s3://{bucket}/test_aurora_mysql_load_varchar"
2408+
wr.pandas.to_aurora(dataframe=df,
2409+
connection="aws-data-wrangler-mysql",
2410+
schema="test",
2411+
table="test_aurora_mysql_load_varchar",
2412+
mode="overwrite",
2413+
temp_s3_path=path,
2414+
engine="mysql",
2415+
preserve_index=False,
2416+
varchar_default_length=3,
2417+
varchar_lengths={"varchar1": 1})
2418+
conn = wr.glue.get_connection("aws-data-wrangler-mysql")
2419+
with conn.cursor() as cursor:
2420+
cursor.execute("SELECT * FROM test.test_aurora_mysql_load_varchar")
2421+
rows = cursor.fetchall()
2422+
assert len(rows) == len(df.index)
2423+
assert rows[0][0] == 1
2424+
assert rows[1][0] == 2
2425+
assert rows[2][0] == 3
2426+
assert rows[0][1] == "foo"
2427+
assert rows[1][1] == "boo"
2428+
assert rows[2][1] == "bar"
2429+
assert rows[0][2] == "a"
2430+
assert rows[1][2] == "b"
2431+
assert rows[2][2] == "c"
2432+
conn.close()

testing/test_awswrangler/test_redshift.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,3 +866,49 @@ def test_spectrum_csv(bucket, glue_database, external_schema):
866866
assert len(rows) == len(df.index)
867867
for row in rows:
868868
assert len(row) == len(df.columns)
869+
870+
871+
def test_to_redshift_pandas_varchar(bucket, redshift_parameters):
872+
df = pd.DataFrame({"id": [1, 2, 3], "varchar3": ["foo", "boo", "bar"], "varchar1": ["a", "b", "c"]})
873+
path = f"s3://{bucket}/test_to_redshift_pandas_varchar"
874+
wr.pandas.to_redshift(dataframe=df,
875+
path=path,
876+
schema="public",
877+
table="test_to_redshift_pandas_varchar",
878+
connection="aws-data-wrangler-redshift",
879+
iam_role=redshift_parameters.get("RedshiftRole"),
880+
mode="overwrite",
881+
preserve_index=False,
882+
varchar_default_length=3,
883+
varchar_lengths={"varchar1": 1})
884+
conn = wr.glue.get_connection("aws-data-wrangler-redshift")
885+
with conn.cursor() as cursor:
886+
cursor.execute("SELECT * FROM public.test_to_redshift_pandas_varchar")
887+
rows = cursor.fetchall()
888+
assert len(rows) == len(df.index)
889+
for row in rows:
890+
assert len(row) == len(df.columns)
891+
conn.close()
892+
893+
894+
def test_to_redshift_spark_varchar(session, bucket, redshift_parameters):
895+
pdf = pd.DataFrame({"id": [1, 2, 3], "varchar3": ["foo", "boo", "bar"], "varchar1": ["a", "b", "c"]})
896+
df = session.spark_session.createDataFrame(pdf)
897+
path = f"s3://{bucket}/test_to_redshift_spark_varchar"
898+
session.spark.to_redshift(dataframe=df,
899+
path=path,
900+
schema="public",
901+
table="test_to_redshift_spark_varchar",
902+
connection="aws-data-wrangler-redshift",
903+
iam_role=redshift_parameters.get("RedshiftRole"),
904+
mode="overwrite",
905+
varchar_default_length=3,
906+
varchar_lengths={"varchar1": 1})
907+
conn = wr.glue.get_connection("aws-data-wrangler-redshift")
908+
with conn.cursor() as cursor:
909+
cursor.execute("SELECT * FROM public.test_to_redshift_spark_varchar")
910+
rows = cursor.fetchall()
911+
assert len(rows) == len(pdf.index)
912+
for row in rows:
913+
assert len(row) == len(pdf.columns)
914+
conn.close()

0 commit comments

Comments
 (0)