Skip to content

Commit ce49b17

Browse files
committed
Sanitize documentation fix
1 parent 91eece9 commit ce49b17

File tree

2 files changed

+6
-11
lines changed

2 files changed

+6
-11
lines changed

awswrangler/_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,7 @@ def check_duplicated_columns(df: pd.DataFrame) -> Any:
313313
raise exceptions.InvalidDataFrame(
314314
f"There are duplicated column names in your DataFrame: {duplicated}. "
315315
f"Note that your columns may have been sanitized and it can be the cause of "
316-
f"the duplicity. Wrangler sanitization removes all special characters and "
317-
f"also converts CamelCase to snake_case. So you must avoid columns like "
318-
f"['MyCol', 'my_col'] in your DataFrame."
316+
f"the duplicity."
319317
)
320318

321319

awswrangler/catalog/_utils.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,13 @@ def does_table_exist(
9696

9797

9898
def sanitize_column_name(column: str) -> str:
99-
"""Convert the column name to be compatible with Amazon Athena.
99+
"""Convert the column name to be compatible with Amazon Athena and the AWS Glue Catalog.
100100
101101
https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html
102102
103103
Possible transformations:
104104
- Strip accents
105105
- Remove non alphanumeric characters
106-
- Convert CamelCase to snake_case
107106
108107
Parameters
109108
----------
@@ -119,21 +118,20 @@ def sanitize_column_name(column: str) -> str:
119118
--------
120119
>>> import awswrangler as wr
121120
>>> wr.catalog.sanitize_column_name('MyNewColumn')
122-
'my_new_column'
121+
'mynewcolumn'
123122
124123
"""
125124
return _sanitize_name(name=column)
126125

127126

128127
def sanitize_dataframe_columns_names(df: pd.DataFrame) -> pd.DataFrame:
129-
"""Normalize all columns names to be compatible with Amazon Athena.
128+
"""Normalize all columns names to be compatible with Amazon Athena and the AWS Glue Catalog.
130129
131130
https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html
132131
133132
Possible transformations:
134133
- Strip accents
135134
- Remove non alphanumeric characters
136-
- Convert CamelCase to snake_case
137135
138136
Note
139137
----
@@ -162,14 +160,13 @@ def sanitize_dataframe_columns_names(df: pd.DataFrame) -> pd.DataFrame:
162160

163161

164162
def sanitize_table_name(table: str) -> str:
165-
"""Convert the table name to be compatible with Amazon Athena.
163+
"""Convert the table name to be compatible with Amazon Athena and the AWS Glue Catalog.
166164
167165
https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html
168166
169167
Possible transformations:
170168
- Strip accents
171169
- Remove non alphanumeric characters
172-
- Convert CamelCase to snake_case
173170
174171
Parameters
175172
----------
@@ -185,7 +182,7 @@ def sanitize_table_name(table: str) -> str:
185182
--------
186183
>>> import awswrangler as wr
187184
>>> wr.catalog.sanitize_table_name('MyNewTable')
188-
'my_new_table'
185+
'mynewtable'
189186
190187
"""
191188
return _sanitize_name(name=table)

0 commit comments

Comments
 (0)