Skip to content

Commit 282dac7

Browse files
committed
Fix index sanitize. #343
1 parent 5b05099 commit 282dac7

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

awswrangler/catalog/_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def sanitize_dataframe_columns_names(df: pd.DataFrame) -> pd.DataFrame:
129129
130130
"""
131131
df.columns = [sanitize_column_name(x) for x in df.columns]
132+
df.index.names = [None if x is None else sanitize_column_name(x) for x in df.index.names]
132133
return df
133134

134135

tests/test_athena_parquet.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,16 @@ def test_glue_number_of_versions_created(path, glue_table, glue_database):
439439
df, path, dataset=True, table=glue_table, database=glue_database,
440440
)
441441
assert wr.catalog.get_table_number_of_versions(table=glue_table, database=glue_database) == 1
442+
443+
444+
def test_sanitize_index(path, glue_table, glue_database):
445+
df = pd.DataFrame({"id": [1, 2], "DATE": [datetime.date(2020, 1, 1), datetime.date(2020, 1, 2)]})
446+
df.set_index("DATE", inplace=True, verify_integrity=True)
447+
wr.s3.to_parquet(df, path, dataset=True, index=True, database=glue_database, table=glue_table, mode="overwrite")
448+
df = pd.DataFrame({"id": [1, 2], "DATE": [datetime.date(2020, 1, 1), datetime.date(2020, 1, 2)]})
449+
df.set_index("DATE", inplace=True, verify_integrity=True)
450+
wr.s3.to_parquet(df, path, dataset=True, index=True, database=glue_database, table=glue_table, mode="append")
451+
df2 = wr.athena.read_sql_table(database=glue_database, table=glue_table)
452+
assert df2.shape == (4, 2)
453+
assert df2.id.sum() == 6
454+
assert list(df2.columns) == ["id", "date"]

0 commit comments

Comments
 (0)