Skip to content

Commit a6ba86c

Browse files
committed
add replace_filenames argument to wr.s3.copy_objects() #215
1 parent fe6f50b commit a6ba86c

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

awswrangler/s3.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,6 +2284,7 @@ def copy_objects(
22842284
paths: List[str],
22852285
source_path: str,
22862286
target_path: str,
2287+
replace_filenames: Optional[Dict[str, str]] = None,
22872288
use_threads: bool = True,
22882289
boto3_session: Optional[boto3.Session] = None,
22892290
) -> List[str]:
@@ -2334,6 +2335,15 @@ def copy_objects(
23342335
for path in paths:
23352336
path_wo_prefix: str = path.replace(f"{source_path}/", "")
23362337
path_final: str = f"{target_path}/{path_wo_prefix}"
2338+
if replace_filenames is not None:
2339+
parts: List[str] = path_final.rsplit(sep="/", maxsplit=1)
2340+
if len(parts) == 2:
2341+
path_wo_filename: str = parts[0]
2342+
filename: str = parts[1]
2343+
if filename in replace_filenames:
2344+
new_filename: str = replace_filenames[filename]
2345+
_logger.debug("Replacing filename: %s -> %s", filename, new_filename)
2346+
path_final = f"{path_wo_filename}/{new_filename}"
23372347
new_objects.append(path_final)
23382348
batch.append((path, path_final))
23392349
_logger.debug("len(new_objects): %s", len(new_objects))

testing/test_awswrangler/test_data_lake.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,3 +1348,22 @@ def test_catalog_versioning(bucket, database):
13481348
# Cleaning Up
13491349
wr.catalog.delete_table_if_exists(database=database, table=table)
13501350
wr.s3.delete_objects(path=path)
1351+
1352+
1353+
def test_copy_replacing_filename(bucket):
1354+
path = f"s3://{bucket}/test_copy_replacing_filename/"
1355+
wr.s3.delete_objects(path=path)
1356+
df = pd.DataFrame({"c0": [1, 2]})
1357+
file_path = f"{path}myfile.parquet"
1358+
wr.s3.to_parquet(df=df, path=file_path)
1359+
wr.s3.wait_objects_exist(paths=[file_path], use_threads=False)
1360+
path2 = f"s3://{bucket}/test_copy_replacing_filename2/"
1361+
wr.s3.copy_objects(
1362+
paths=[file_path], source_path=path, target_path=path2, replace_filenames={"myfile.parquet": "myfile2.parquet"}
1363+
)
1364+
expected_file = f"{path2}myfile2.parquet"
1365+
wr.s3.wait_objects_exist(paths=[expected_file], use_threads=False)
1366+
objs = wr.s3.list_objects(path=path2)
1367+
assert objs[0] == expected_file
1368+
wr.s3.delete_objects(path=path)
1369+
wr.s3.delete_objects(path=path2)

0 commit comments

Comments
 (0)