Skip to content

Commit ab29b24

Browse files
authored
Merge pull request #9609 from rakshil14-2/no_overwrite_for_sync_operation
Implementing no-overwrite for sync and download
2 parents 2493f8b + eb56a58 commit ab29b24

File tree

10 files changed

+430
-35
lines changed

10 files changed

+430
-35
lines changed

awscli/customizations/s3/s3handler.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,36 @@ def _get_fileout(self, fileinfo):
438438
return fileinfo.dest
439439

440440
def _get_warning_handlers(self):
441-
return [self._warn_glacier, self._warn_parent_reference]
441+
return [
442+
self._warn_glacier,
443+
self._warn_parent_reference,
444+
self._warn_if_file_exists_with_no_overwrite,
445+
]
446+
447+
def _warn_if_file_exists_with_no_overwrite(self, fileinfo):
448+
"""
449+
Warning handler to skip downloads when no-overwrite is set and local file exists.
450+
451+
This method prevents overwriting existing local files during S3 download operations
452+
when the --no-overwrite flag is specified. It checks if the destination file already
453+
exists on the local filesystem and skips the download if found.
454+
455+
:type fileinfo: FileInfo
456+
:param fileinfo: The FileInfo object containing transfer details
457+
458+
:rtype: bool
459+
:returns: True if the file should be skipped (exists and no-overwrite is set),
460+
False if the download should proceed
461+
"""
462+
if not self._cli_params.get('no_overwrite'):
463+
return False
464+
fileout = self._get_fileout(fileinfo)
465+
if os.path.exists(fileout):
466+
LOGGER.debug(
467+
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
468+
)
469+
return True
470+
return False
442471

443472
def _format_src_dest(self, fileinfo):
444473
src = self._format_s3_path(fileinfo.src)
@@ -512,9 +541,7 @@ def _warn_if_zero_byte_file_exists_with_no_overwrite(self, fileinfo):
512541
try:
513542
client.head_object(Bucket=bucket, Key=key)
514543
LOGGER.debug(
515-
"Warning: Skipping file %s as it already exists on %s",
516-
fileinfo.src,
517-
fileinfo.dest,
544+
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
518545
)
519546
return True
520547
except ClientError as e:

awscli/customizations/s3/subcommands.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,7 +1143,7 @@ class SyncCommand(S3TransferCommand):
11431143
}
11441144
]
11451145
+ TRANSFER_ARGS
1146-
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE]
1146+
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, NO_OVERWRITE]
11471147
)
11481148

11491149

@@ -1314,7 +1314,6 @@ def choose_sync_strategies(self):
13141314
sync_type = override_sync_strategy.sync_type
13151315
sync_type += '_sync_strategy'
13161316
sync_strategies[sync_type] = override_sync_strategy
1317-
13181317
return sync_strategies
13191318

13201319
def run(self):
@@ -1401,7 +1400,8 @@ def run(self):
14011400
self._client, self._source_client, self.parameters
14021401
)
14031402

1404-
s3_transfer_handler = S3TransferHandlerFactory(self.parameters)(
1403+
params = self._get_s3_handler_params()
1404+
s3_transfer_handler = S3TransferHandlerFactory(params)(
14051405
self._transfer_manager, result_queue
14061406
)
14071407

@@ -1510,6 +1510,16 @@ def _map_sse_c_params(self, request_parameters, paths_type):
15101510
},
15111511
)
15121512

1513+
def _get_s3_handler_params(self):
1514+
"""
1515+
Removing no-overwrite params from sync since file to
1516+
be synced are already separated out using sync strategy
1517+
"""
1518+
params = self.parameters.copy()
1519+
if self.cmd == 'sync':
1520+
params.pop('no_overwrite', None)
1521+
return params
1522+
15131523

15141524
# TODO: This class is fairly quirky in the sense that it is both a builder
15151525
# and a data object. In the future we should make the following refactorings
@@ -1573,6 +1583,7 @@ def add_paths(self, paths):
15731583
elif len(paths) == 1:
15741584
self.parameters['dest'] = paths[0]
15751585
self._validate_streaming_paths()
1586+
self._validate_no_overwrite_for_download_streaming()
15761587
self._validate_path_args()
15771588
self._validate_sse_c_args()
15781589
self._validate_not_s3_express_bucket_for_sync()
@@ -1824,3 +1835,20 @@ def _validate_sse_c_copy_source_for_paths(self):
18241835
'--sse-c-copy-source is only supported for '
18251836
'copy operations.'
18261837
)
1838+
1839+
def _validate_no_overwrite_for_download_streaming(self):
1840+
"""
1841+
Validates that no-overwrite parameter is not used with streaming downloads.
1842+
1843+
Raises:
1844+
ParamValidationError: If no-overwrite is specified with a streaming download.
1845+
"""
1846+
if (
1847+
self.parameters['is_stream']
1848+
and self.parameters.get('no_overwrite')
1849+
and self.parameters['dest'] == '-'
1850+
):
1851+
raise ParamValidationError(
1852+
"--no-overwrite parameter is not supported for "
1853+
"streaming downloads"
1854+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
import logging
14+
15+
from awscli.customizations.s3.subcommands import NO_OVERWRITE
16+
from awscli.customizations.s3.syncstrategy.base import BaseSync
17+
18+
LOG = logging.getLogger(__name__)
19+
20+
21+
class NoOverwriteSync(BaseSync):
22+
"""Sync strategy that prevents overwriting of existing files at the destination.
23+
This strategy is used only for files that exist at both source and destination
24+
(file_at_src_and_dest_sync_strategy). It always returns False to prevent any
25+
overwriting of existing files, regardless of size or modification time differences.
26+
"""
27+
28+
ARGUMENT = NO_OVERWRITE
29+
30+
def determine_should_sync(self, src_file, dest_file):
31+
LOG.debug(
32+
f"warning: skipping {src_file.src} -> {src_file.dest}, file exists at destination"
33+
)
34+
return False

awscli/customizations/s3/syncstrategy/register.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from awscli.customizations.s3.syncstrategy.exacttimestamps import (
1515
ExactTimestampsSync,
1616
)
17+
from awscli.customizations.s3.syncstrategy.nooverwrite import NoOverwriteSync
1718
from awscli.customizations.s3.syncstrategy.sizeonly import SizeOnlySync
1819

1920

@@ -48,4 +49,7 @@ def register_sync_strategies(command_table, session, **kwargs):
4849
# Register the delete sync strategy.
4950
register_sync_strategy(session, DeleteSync, 'file_not_at_src')
5051

52+
# Register the noOverwrite sync strategy
53+
register_sync_strategy(session, NoOverwriteSync, 'file_at_src_and_dest')
54+
5155
# Register additional sync strategies here...

tests/functional/s3/test_cp_command.py

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,7 @@ def test_no_overwrite_flag_when_object_exists_on_target(self):
315315
# Set up the response to simulate a PreconditionFailed error
316316
self.http_response.status_code = 412
317317
self.parsed_responses = [
318-
{
319-
'Error': {
320-
'Code': 'PreconditionFailed',
321-
'Message': 'At least one of the pre-conditions you specified did not hold',
322-
'Condition': 'If-None-Match',
323-
}
324-
}
318+
self.precondition_failed_error_response(),
325319
]
326320
self.run_cmd(cmdline, expected_rc=0)
327321
# Verify PutObject was attempted with IfNoneMatch
@@ -367,13 +361,7 @@ def test_no_overwrite_flag_multipart_upload_when_object_exists_on_target(
367361
{'UploadId': 'foo'}, # CreateMultipartUpload response
368362
{'ETag': '"foo-1"'}, # UploadPart response
369363
{'ETag': '"foo-2"'}, # UploadPart response
370-
{
371-
'Error': {
372-
'Code': 'PreconditionFailed',
373-
'Message': 'At least one of the pre-conditions you specified did not hold',
374-
'Condition': 'If-None-Match',
375-
}
376-
}, # PreconditionFailed error for CompleteMultipart Upload
364+
self.precondition_failed_error_response(), # PreconditionFailed error for CompleteMultipart Upload
377365
{}, # AbortMultipartUpload response
378366
]
379367
# Checking for success as file is skipped
@@ -550,6 +538,40 @@ def test_no_overwrite_flag_on_copy_when_large_object_does_not_exist_on_target(
550538
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
551539
self.assertEqual(self.operations_called[5][1]['IfNoneMatch'], '*')
552540

541+
def test_no_overwrite_flag_on_download_when_single_object_already_exists_at_target(
542+
self,
543+
):
544+
full_path = self.files.create_file('foo.txt', 'existing content')
545+
cmdline = (
546+
f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite'
547+
)
548+
self.parsed_responses = [
549+
self.head_object_response(),
550+
]
551+
self.run_cmd(cmdline, expected_rc=0)
552+
self.assertEqual(len(self.operations_called), 1)
553+
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
554+
with open(full_path) as f:
555+
self.assertEqual(f.read(), 'existing content')
556+
557+
def test_no_overwrite_flag_on_download_when_single_object_does_not_exist_at_target(
558+
self,
559+
):
560+
full_path = self.files.full_path('foo.txt')
561+
cmdline = (
562+
f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite'
563+
)
564+
self.parsed_responses = [
565+
self.head_object_response(),
566+
self.get_object_response(),
567+
]
568+
self.run_cmd(cmdline, expected_rc=0)
569+
self.assertEqual(len(self.operations_called), 2)
570+
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
571+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
572+
with open(full_path) as f:
573+
self.assertEqual(f.read(), 'foo')
574+
553575
def test_dryrun_download(self):
554576
self.parsed_responses = [self.head_object_response()]
555577
target = self.files.full_path('file.txt')
@@ -1428,6 +1450,15 @@ def test_streaming_download_error(self):
14281450
)
14291451
self.assertIn(error_message, stderr)
14301452

1453+
def test_no_overwrite_cannot_be_used_with_streaming_download(self):
1454+
command = "s3 cp s3://bucket/streaming.txt - --no-overwrite"
1455+
_, stderr, _ = self.run_cmd(command, expected_rc=252)
1456+
error_message = (
1457+
"--no-overwrite parameter is not supported for "
1458+
"streaming downloads"
1459+
)
1460+
self.assertIn(error_message, stderr)
1461+
14311462

14321463
class TestCpCommandWithRequesterPayer(BaseCPCommandTest):
14331464
def setUp(self):

tests/functional/s3/test_mv_command.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,7 @@ def test_mv_no_overwrite_flag_when_object_exists_on_target(self):
336336
# Set up the response to simulate a PreconditionFailed error
337337
self.http_response.status_code = 412
338338
self.parsed_responses = [
339-
{
340-
'Error': {
341-
'Code': 'PreconditionFailed',
342-
'Message': 'At least one of the pre-conditions you specified did not hold',
343-
'Condition': 'If-None-Match',
344-
}
345-
}
339+
self.precondition_failed_error_response(),
346340
]
347341
self.run_cmd(cmdline, expected_rc=0)
348342
# Verify PutObject was attempted with IfNoneMatch
@@ -392,13 +386,7 @@ def test_mv_no_overwrite_flag_multipart_upload_when_object_exists_on_target(
392386
{'UploadId': 'foo'}, # CreateMultipartUpload response
393387
{'ETag': '"foo-1"'}, # UploadPart response
394388
{'ETag': '"foo-2"'}, # UploadPart response
395-
{
396-
'Error': {
397-
'Code': 'PreconditionFailed',
398-
'Message': 'At least one of the pre-conditions you specified did not hold',
399-
'Condition': 'If-None-Match',
400-
}
401-
}, # CompleteMultipartUpload response
389+
self.precondition_failed_error_response(), # CompleteMultipartUpload response
402390
{}, # Abort Multipart
403391
]
404392
self.run_cmd(cmdline, expected_rc=0)
@@ -575,6 +563,42 @@ def test_mv_no_overwrite_flag_when_large_object_does_not_exist_on_target(
575563
)
576564
self.assertEqual(self.operations_called[6][0].name, 'DeleteObject')
577565

566+
def test_no_overwrite_flag_on_mv_download_when_single_object_exists_at_target(
567+
self,
568+
):
569+
full_path = self.files.create_file('foo.txt', 'existing content')
570+
cmdline = (
571+
f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite'
572+
)
573+
self.parsed_responses = [
574+
self.head_object_response(),
575+
]
576+
self.run_cmd(cmdline, expected_rc=0)
577+
self.assertEqual(len(self.operations_called), 1)
578+
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
579+
with open(full_path) as f:
580+
self.assertEqual(f.read(), 'existing content')
581+
582+
def test_no_overwrite_flag_on_mv_download_when_single_object_does_not_exist_at_target(
583+
self,
584+
):
585+
full_path = self.files.full_path('foo.txt')
586+
cmdline = (
587+
f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite'
588+
)
589+
self.parsed_responses = [
590+
self.head_object_response(),
591+
self.get_object_response(),
592+
self.delete_object_response(),
593+
]
594+
self.run_cmd(cmdline, expected_rc=0)
595+
self.assertEqual(len(self.operations_called), 3)
596+
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
597+
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
598+
self.assertEqual(self.operations_called[2][0].name, 'DeleteObject')
599+
with open(full_path) as f:
600+
self.assertEqual(f.read(), 'foo')
601+
578602

579603
class TestMvWithCRTClient(BaseCRTTransferClientTest):
580604
def test_upload_move_using_crt_client(self):

0 commit comments

Comments
 (0)