Skip to content

Commit e3a522a

Browse files
committed
Allowing table names with double underscore
1 parent 96d15bf commit e3a522a

File tree

6 files changed

+27
-27
lines changed

6 files changed

+27
-27
lines changed

awswrangler/athena.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def repair_table(
235235
>>> query_final_state = wr.athena.repair_table(table='...', database='...')
236236
237237
"""
238-
query = f"MSCK REPAIR TABLE {table};"
238+
query = f"MSCK REPAIR TABLE `{table}`;"
239239
session: boto3.Session = _utils.ensure_session(session=boto3_session)
240240
query_id = start_query_execution(
241241
sql=query,
@@ -456,7 +456,7 @@ def read_sql_query( # pylint: disable=too-many-branches,too-many-locals
456456
else:
457457
dfs = _utils.empty_generator()
458458
else:
459-
s3.wait_objects_exist(paths=paths, use_threads=use_threads, boto3_session=session)
459+
s3.wait_objects_exist(paths=paths, use_threads=False, boto3_session=session)
460460
dfs = s3.read_parquet(path=paths, use_threads=use_threads, boto3_session=session, chunked=chunked)
461461
return dfs
462462
dtype, parse_timestamps, parse_dates, converters, binaries = _get_query_metadata(
@@ -633,7 +633,7 @@ def read_sql_table(
633633
634634
"""
635635
return read_sql_query(
636-
sql=f"SELECT * FROM {table}",
636+
sql=f'SELECT * FROM "{table}"',
637637
database=database,
638638
ctas_approach=ctas_approach,
639639
chunksize=chunksize,

awswrangler/catalog.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -742,11 +742,7 @@ def _sanitize_name(name: str) -> str:
742742
name = name.replace(".", "_")
743743
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
744744
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name)
745-
name = name.lower()
746-
name = re.sub(r"(_)\1+", "\\1", name) # remove repeated underscores
747-
name = name[1:] if name.startswith("_") else name # remove trailing underscores
748-
name = name[:-1] if name.endswith("_") else name # remove trailing underscores
749-
return name
745+
return name.lower()
750746

751747

752748
def sanitize_column_name(column: str) -> str:

awswrangler/db.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ def copy_to_redshift( # pylint: disable=too-many-arguments
522522
boto3_session=session,
523523
s3_additional_kwargs=s3_additional_kwargs,
524524
)["paths"]
525-
s3.wait_objects_exist(paths=paths, use_threads=use_threads, boto3_session=session)
525+
s3.wait_objects_exist(paths=paths, use_threads=False, boto3_session=session)
526526
copy_files_to_redshift(
527527
path=paths,
528528
manifest_directory=_utils.get_directory(path=path),
@@ -642,6 +642,7 @@ def copy_files_to_redshift( # pylint: disable=too-many-locals,too-many-argument
642642
write_redshift_copy_manifest(
643643
manifest_path=manifest_path, paths=paths, use_threads=use_threads, boto3_session=session
644644
)
645+
s3.wait_objects_exist(paths=paths + [manifest_path], use_threads=False, boto3_session=session)
645646
athena_types, _ = s3.read_parquet_metadata(
646647
path=paths, dataset=False, use_threads=use_threads, boto3_session=session
647648
)
@@ -953,7 +954,7 @@ def unload_redshift(
953954
paths: List[str] = unload_redshift_to_files(
954955
sql=sql, path=path, con=con, iam_role=iam_role, use_threads=use_threads, boto3_session=session
955956
)
956-
s3.wait_objects_exist(paths=paths, use_threads=use_threads, boto3_session=session)
957+
s3.wait_objects_exist(paths=paths, use_threads=False, boto3_session=session)
957958
if chunked is False:
958959
if not paths: # pragma: no cover
959960
return pd.DataFrame()

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ boto3>=1.10.34
55
s3fs~=0.4.2
66
psycopg2-binary~=2.8.5
77
pymysql~=0.9.3
8-
SQLAlchemy~=1.3.15
8+
SQLAlchemy==1.3.15
99
sqlalchemy-redshift~=0.7.7

testing/test_awswrangler/test_data_lake.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,12 @@ def test_athena(bucket, database, kms_key, workgroup_secondary):
130130
dataset=True,
131131
mode="overwrite",
132132
database=database,
133-
table="test_athena",
133+
table="__test_athena",
134134
partition_cols=["par0", "par1"],
135135
)["paths"]
136136
wr.s3.wait_objects_exist(paths=paths, use_threads=False)
137137
dfs = wr.athena.read_sql_query(
138-
sql="SELECT * FROM test_athena",
138+
sql="SELECT * FROM __test_athena",
139139
database=database,
140140
ctas_approach=False,
141141
chunksize=1,
@@ -147,12 +147,12 @@ def test_athena(bucket, database, kms_key, workgroup_secondary):
147147
print(df2)
148148
ensure_data_types(df=df2)
149149
df = wr.athena.read_sql_query(
150-
sql="SELECT * FROM test_athena", database=database, ctas_approach=False, workgroup=workgroup_secondary
150+
sql="SELECT * FROM __test_athena", database=database, ctas_approach=False, workgroup=workgroup_secondary
151151
)
152152
assert len(df.index) == 3
153153
ensure_data_types(df=df)
154-
wr.athena.repair_table(table="test_athena", database=database)
155-
wr.catalog.delete_table_if_exists(database=database, table="test_athena")
154+
wr.athena.repair_table(table="__test_athena", database=database)
155+
wr.catalog.delete_table_if_exists(database=database, table="__test_athena")
156156
wr.s3.delete_objects(path=paths)
157157
wr.s3.wait_objects_not_exist(paths=paths)
158158
wr.s3.delete_objects(path=f"s3://{bucket}/athena_workgroup_secondary/")
@@ -361,7 +361,7 @@ def test_parquet_catalog_casting(bucket, database):
361361
dataset=True,
362362
mode="overwrite",
363363
database=database,
364-
table="test_parquet_catalog_casting",
364+
table="__test_parquet_catalog_casting",
365365
dtype={
366366
"iint8": "tinyint",
367367
"iint16": "smallint",
@@ -385,16 +385,16 @@ def test_parquet_catalog_casting(bucket, database):
385385
assert len(df.index) == 3
386386
assert len(df.columns) == 15
387387
ensure_data_types(df=df, has_list=False)
388-
df = wr.athena.read_sql_table(table="test_parquet_catalog_casting", database=database, ctas_approach=True)
388+
df = wr.athena.read_sql_table(table="__test_parquet_catalog_casting", database=database, ctas_approach=True)
389389
assert len(df.index) == 3
390390
assert len(df.columns) == 15
391391
ensure_data_types(df=df, has_list=False)
392-
df = wr.athena.read_sql_table(table="test_parquet_catalog_casting", database=database, ctas_approach=False)
392+
df = wr.athena.read_sql_table(table="__test_parquet_catalog_casting", database=database, ctas_approach=False)
393393
assert len(df.index) == 3
394394
assert len(df.columns) == 15
395395
ensure_data_types(df=df, has_list=False)
396396
wr.s3.delete_objects(path=path)
397-
assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog_casting") is True
397+
assert wr.catalog.delete_table_if_exists(database=database, table="__test_parquet_catalog_casting") is True
398398

399399

400400
def test_catalog(bucket, database):
@@ -552,8 +552,11 @@ def test_athena_read_list(database):
552552

553553

554554
def test_normalize_column_name():
555-
assert wr.catalog.sanitize_column_name("foo()__Boo))))____BAR") == "foo_boo_bar"
556-
assert wr.catalog.sanitize_column_name("foo()__Boo))))_{}{}{{}{}{}{___BAR[][][][]") == "foo_boo_bar"
555+
assert wr.catalog.sanitize_column_name("foo()__Boo))))____BAR") == "foo_____boo________bar"
556+
assert (
557+
wr.catalog.sanitize_column_name("foo()__Boo))))_{}{}{{}{}{}{___BAR[][][][]")
558+
== "foo_____boo____________________bar________"
559+
)
557560

558561

559562
def test_athena_ctas_empty(database):

testing/test_awswrangler/test_db.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,12 @@ def test_redshift_copy_unload(bucket, parameters):
150150
path=path,
151151
con=engine,
152152
schema="public",
153-
table="test_redshift_copy",
153+
table="__test_redshift_copy",
154154
mode="overwrite",
155155
iam_role=parameters["redshift"]["role"],
156156
)
157157
df2 = wr.db.unload_redshift(
158-
sql="SELECT * FROM public.test_redshift_copy",
158+
sql="SELECT * FROM public.__test_redshift_copy",
159159
con=engine,
160160
iam_role=parameters["redshift"]["role"],
161161
path=path,
@@ -168,12 +168,12 @@ def test_redshift_copy_unload(bucket, parameters):
168168
path=path,
169169
con=engine,
170170
schema="public",
171-
table="test_redshift_copy",
171+
table="__test_redshift_copy",
172172
mode="append",
173173
iam_role=parameters["redshift"]["role"],
174174
)
175175
df2 = wr.db.unload_redshift(
176-
sql="SELECT * FROM public.test_redshift_copy",
176+
sql="SELECT * FROM public.__test_redshift_copy",
177177
con=engine,
178178
iam_role=parameters["redshift"]["role"],
179179
path=path,
@@ -182,7 +182,7 @@ def test_redshift_copy_unload(bucket, parameters):
182182
assert len(df2.index) == 6
183183
ensure_data_types(df=df2, has_list=False)
184184
dfs = wr.db.unload_redshift(
185-
sql="SELECT * FROM public.test_redshift_copy",
185+
sql="SELECT * FROM public.__test_redshift_copy",
186186
con=engine,
187187
iam_role=parameters["redshift"]["role"],
188188
path=path,

0 commit comments

Comments
 (0)