Skip to content

Commit 39e8a61

Browse files
authored
[v2] add parameter to prevent overwriting existing objects in S3 high level commands (#9750)
Add the `--no-overwrite` parameter to high-level AWS S3 operations `cp`, `mv`, and `sync`, which prevents overwriting objects that already exist at the destination. This change makes use of the S3 feature `IfNoneMatch` which enforces conditional write operations on S3 buckets: https://docs.aws.amazon.com/AmazonS3/latest/userguide/conditional-writes-enforce.html Updates have been applied to the vended `s3transfer` module as well as the AWS CLI to support the new parameter.
1 parent bf581f5 commit 39e8a61

File tree

21 files changed

+1207
-23
lines changed

21 files changed

+1207
-23
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "feature",
3+
"category": "``s3``",
4+
"description": "Add the ``--no-overwrite`` parameter to high-level AWS S3 operations ``cp``, ``mv``, and ``sync``, which prevents overwriting objects that already exist at the destination. Implements `#2874 <https://github.com/aws/aws-cli/issues/2874>`__."
5+
}

awscli/customizations/s3/results.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121

2222
from awscli.compat import ensure_text_type, queue
2323
from awscli.customizations.s3.subscribers import OnDoneFilteredSubscriber
24-
from awscli.customizations.s3.utils import WarningResult, human_readable_size
24+
from awscli.customizations.s3.utils import (
25+
WarningResult,
26+
human_readable_size,
27+
)
2528
from awscli.customizations.utils import uni_print
2629

2730
LOGGER = logging.getLogger(__name__)
@@ -54,6 +57,7 @@ def _create_new_result_cls(name, extra_fields=None, base_cls=BaseResult):
5457
FailureResult = _create_new_result_cls('FailureResult', ['exception'])
5558

5659
DryRunResult = _create_new_result_cls('DryRunResult')
60+
SkipFileResult = _create_new_result_cls('SkipFileResult')
5761

5862
ErrorResult = namedtuple('ErrorResult', ['exception'])
5963

@@ -123,6 +127,17 @@ def _on_failure(self, future, e):
123127
if isinstance(e, FatalError):
124128
error_result_cls = ErrorResult
125129
self._result_queue.put(error_result_cls(exception=e))
130+
elif self._is_precondition_failed(e):
131+
LOGGER.debug(
132+
f"warning: Skipping file {self._src} as it already exists on {self._dest}"
133+
)
134+
self._result_queue.put(
135+
SkipFileResult(
136+
transfer_type=self._transfer_type,
137+
src=self._src,
138+
dest=self._dest,
139+
)
140+
)
126141
else:
127142
self._result_queue.put(
128143
FailureResult(
@@ -133,6 +148,13 @@ def _on_failure(self, future, e):
133148
)
134149
)
135150

151+
def _is_precondition_failed(self, exception):
152+
return (
153+
hasattr(exception, 'response')
154+
and exception.response.get('Error', {}).get('Code')
155+
== 'PreconditionFailed'
156+
)
157+
136158

137159
class BaseResultHandler:
138160
"""Base handler class to be called in the ResultProcessor"""
@@ -150,6 +172,7 @@ def __init__(self):
150172
self.files_transferred = 0
151173
self.files_failed = 0
152174
self.files_warned = 0
175+
self.files_skipped = 0
153176
self.errors = 0
154177
self.expected_bytes_transferred = 0
155178
self.expected_files_transferred = 0
@@ -167,6 +190,7 @@ def __init__(self):
167190
SuccessResult: self._record_success_result,
168191
FailureResult: self._record_failure_result,
169192
WarningResult: self._record_warning_result,
193+
SkipFileResult: self._record_skipped_file_result,
170194
ErrorResult: self._record_error_result,
171195
CtrlCResult: self._record_error_result,
172196
FinalTotalSubmissionsResult: self._record_final_expected_files,
@@ -282,6 +306,9 @@ def _record_failure_result(self, result, **kwargs):
282306
self.files_failed += 1
283307
self.files_transferred += 1
284308

309+
def _record_skipped_file_result(self, result, **kwargs):
310+
self.files_skipped += 1
311+
285312
def _record_warning_result(self, **kwargs):
286313
self.files_warned += 1
287314

@@ -362,6 +389,7 @@ def __init__(
362389
SuccessResult: self._print_success,
363390
FailureResult: self._print_failure,
364391
WarningResult: self._print_warning,
392+
SkipFileResult: self._print_skip,
365393
ErrorResult: self._print_error,
366394
CtrlCResult: self._print_ctrl_c,
367395
DryRunResult: self._print_dry_run,
@@ -380,6 +408,10 @@ def _print_noop(self, **kwargs):
380408
# If the result does not have a handler, then do nothing with it.
381409
pass
382410

411+
def _print_skip(self, **kwargs):
412+
# Don't reset progress length since this result printer doesn't print a newline
413+
self._redisplay_progress(reset_progress_length=False)
414+
383415
def _print_dry_run(self, result, **kwargs):
384416
statement = self.DRY_RUN_FORMAT.format(
385417
transfer_type=result.transfer_type,
@@ -432,16 +464,19 @@ def _get_transfer_location(self, result):
432464
src=result.src, dest=result.dest
433465
)
434466

435-
def _redisplay_progress(self):
467+
def _redisplay_progress(self, reset_progress_length=True):
436468
# Reset to zero because done statements are printed with new lines
437469
# meaning there are no carriage returns to take into account when
438470
# printing the next line.
439-
self._progress_length = 0
471+
if reset_progress_length:
472+
self._progress_length = 0
440473
self._add_progress_if_needed()
441474

442475
def _add_progress_if_needed(self):
443476
if self._has_remaining_progress():
444477
self._print_progress()
478+
else:
479+
self._clear_progress_if_no_more_expected_transfers(ending_char='\r')
445480

446481
def _should_print_progress_now(self):
447482
"""Check to see if should print progres based on frequency.
@@ -467,7 +502,7 @@ def _print_progress(self, **kwargs):
467502
remaining_files = self._get_expected_total(
468503
str(
469504
self._result_recorder.expected_files_transferred
470-
- self._result_recorder.files_transferred
505+
- (self._result_recorder.files_transferred + self._result_recorder.files_skipped)
471506
)
472507
)
473508

@@ -535,7 +570,7 @@ def _adjust_statement_padding(self, print_statement, ending_char='\n'):
535570
def _has_remaining_progress(self):
536571
if not self._result_recorder.expected_totals_are_final():
537572
return True
538-
actual = self._result_recorder.files_transferred
573+
actual = self._result_recorder.files_transferred + self._result_recorder.files_skipped
539574
expected = self._result_recorder.expected_files_transferred
540575
return actual != expected
541576

@@ -545,9 +580,9 @@ def _print_to_out_file(self, statement):
545580
def _print_to_error_file(self, statement):
546581
uni_print(statement, self._error_file)
547582

548-
def _clear_progress_if_no_more_expected_transfers(self, **kwargs):
583+
def _clear_progress_if_no_more_expected_transfers(self, ending_char='\n', **kwargs):
549584
if self._progress_length and not self._has_remaining_progress():
550-
uni_print(self._adjust_statement_padding(''), self._out_file)
585+
uni_print(self._adjust_statement_padding('', ending_char=ending_char), self._out_file)
551586

552587

553588
class NoProgressResultPrinter(ResultPrinter):

awscli/customizations/s3/s3handler.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,36 @@ def _get_fileout(self, fileinfo):
449449
return fileinfo.dest
450450

451451
def _get_warning_handlers(self):
452-
return [self._warn_glacier, self._warn_parent_reference]
452+
return [
453+
self._warn_glacier,
454+
self._warn_parent_reference,
455+
self._warn_if_file_exists_with_no_overwrite,
456+
]
457+
458+
def _warn_if_file_exists_with_no_overwrite(self, fileinfo):
459+
"""
460+
Warning handler to skip downloads when no-overwrite is set and local file exists.
461+
462+
This method prevents overwriting existing local files during S3 download operations
463+
when the --no-overwrite flag is specified. It checks if the destination file already
464+
exists on the local filesystem and skips the download if found.
465+
466+
:type fileinfo: FileInfo
467+
:param fileinfo: The FileInfo object containing transfer details
468+
469+
:rtype: bool
470+
:returns: True if the file should be skipped (exists and no-overwrite is set),
471+
False if the download should proceed
472+
"""
473+
if not self._cli_params.get('no_overwrite'):
474+
return False
475+
fileout = self._get_fileout(fileinfo)
476+
if os.path.exists(fileout):
477+
LOGGER.debug(
478+
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
479+
)
480+
return True
481+
return False
453482

454483
def _format_src_dest(self, fileinfo):
455484
src = self._format_s3_path(fileinfo.src)
@@ -493,7 +522,9 @@ def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
493522
)
494523

495524
def _get_warning_handlers(self):
496-
return [self._warn_glacier]
525+
return [
526+
self._warn_glacier,
527+
]
497528

498529
def _format_src_dest(self, fileinfo):
499530
src = self._format_s3_path(fileinfo.src)

awscli/customizations/s3/subcommands.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,15 @@
664664
),
665665
}
666666

667+
NO_OVERWRITE = {
668+
'name': 'no-overwrite',
669+
'action': 'store_true',
670+
'help_text': (
671+
"This flag prevents overwriting of files at the destination. With this flag, "
672+
"only files not present at the destination will be transferred."
673+
),
674+
}
675+
667676

668677
CASE_CONFLICT = {
669678
'name': 'case-conflict',
@@ -1117,6 +1126,7 @@ class CpCommand(S3TransferCommand):
11171126
EXPECTED_SIZE,
11181127
RECURSIVE,
11191128
CASE_CONFLICT,
1129+
NO_OVERWRITE,
11201130
]
11211131
)
11221132

@@ -1142,6 +1152,7 @@ class MvCommand(S3TransferCommand):
11421152
RECURSIVE,
11431153
VALIDATE_SAME_S3_PATHS,
11441154
CASE_CONFLICT,
1155+
NO_OVERWRITE,
11451156
]
11461157
)
11471158

@@ -1187,7 +1198,7 @@ class SyncCommand(S3TransferCommand):
11871198
}
11881199
]
11891200
+ TRANSFER_ARGS
1190-
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, CASE_CONFLICT]
1201+
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, CASE_CONFLICT, NO_OVERWRITE]
11911202
)
11921203

11931204

@@ -1367,7 +1378,6 @@ def choose_sync_strategies(self):
13671378
sync_type = override_sync_strategy.sync_type
13681379
sync_type += '_sync_strategy'
13691380
sync_strategies[sync_type] = override_sync_strategy
1370-
13711381
return sync_strategies
13721382

13731383
def run(self):
@@ -1454,7 +1464,8 @@ def run(self):
14541464
self._client, self._source_client, self.parameters
14551465
)
14561466

1457-
s3_transfer_handler = S3TransferHandlerFactory(self.parameters)(
1467+
params = self._get_s3_handler_params()
1468+
s3_transfer_handler = S3TransferHandlerFactory(params)(
14581469
self._transfer_manager, result_queue
14591470
)
14601471

@@ -1575,6 +1586,14 @@ def _map_sse_c_params(self, request_parameters, paths_type):
15751586
},
15761587
)
15771588

1589+
def _get_s3_handler_params(self):
1590+
params = self.parameters.copy()
1591+
1592+
# Removing no-overwrite params from sync since file to be synced are
1593+
# already separated out using sync strategy
1594+
if self.cmd == 'sync':
1595+
params.pop('no_overwrite', None)
1596+
return params
15781597
def _should_handle_case_conflicts(self):
15791598
return (
15801599
self.cmd in {'sync', 'cp', 'mv'}
@@ -1706,6 +1725,7 @@ def add_paths(self, paths):
17061725
elif len(paths) == 1:
17071726
self.parameters['dest'] = paths[0]
17081727
self._validate_streaming_paths()
1728+
self._validate_no_overwrite_for_download_streaming()
17091729
self._validate_path_args()
17101730
self._validate_sse_c_args()
17111731
self._validate_not_s3_express_bucket_for_sync()
@@ -1957,3 +1977,14 @@ def _validate_sse_c_copy_source_for_paths(self):
19571977
'--sse-c-copy-source is only supported for '
19581978
'copy operations.'
19591979
)
1980+
1981+
def _validate_no_overwrite_for_download_streaming(self):
1982+
if (
1983+
self.parameters['is_stream']
1984+
and self.parameters.get('no_overwrite')
1985+
and self.parameters['dest'] == '-'
1986+
):
1987+
raise ParamValidationError(
1988+
"--no-overwrite parameter is not supported for "
1989+
"streaming downloads"
1990+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
import logging
14+
15+
from awscli.customizations.s3.subcommands import NO_OVERWRITE
16+
from awscli.customizations.s3.syncstrategy.base import BaseSync
17+
18+
LOG = logging.getLogger(__name__)
19+
20+
21+
class NoOverwriteSync(BaseSync):
22+
"""Sync strategy that prevents overwriting of existing files at the destination.
23+
This strategy is used only for files that exist at both source and destination
24+
(file_at_src_and_dest_sync_strategy). It always returns False to prevent any
25+
overwriting of existing files, regardless of size or modification time differences.
26+
"""
27+
28+
ARGUMENT = NO_OVERWRITE
29+
30+
def determine_should_sync(self, src_file, dest_file):
31+
LOG.debug(
32+
f"warning: skipping {src_file.src} -> {src_file.dest}, file exists at destination"
33+
)
34+
return False

awscli/customizations/s3/syncstrategy/register.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from awscli.customizations.s3.syncstrategy.exacttimestamps import (
1515
ExactTimestampsSync,
1616
)
17+
from awscli.customizations.s3.syncstrategy.nooverwrite import NoOverwriteSync
1718
from awscli.customizations.s3.syncstrategy.sizeonly import SizeOnlySync
1819

1920

@@ -48,4 +49,7 @@ def register_sync_strategies(command_table, session, **kwargs):
4849
# Register the delete sync strategy.
4950
register_sync_strategy(session, DeleteSync, 'file_not_at_src')
5051

52+
# Register the nooverwrite sync strategy
53+
register_sync_strategy(session, NoOverwriteSync, 'file_at_src_and_dest')
54+
5155
# Register additional sync strategies here...

awscli/customizations/s3/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ def map_put_object_params(cls, request_params, cli_params):
489489
cls._set_sse_c_request_params(request_params, cli_params)
490490
cls._set_request_payer_param(request_params, cli_params)
491491
cls._set_checksum_algorithm_param(request_params, cli_params)
492+
cls._set_no_overwrite_param(request_params, cli_params)
492493

493494
@classmethod
494495
def map_get_object_params(cls, request_params, cli_params):
@@ -520,6 +521,7 @@ def map_copy_object_params(cls, request_params, cli_params):
520521
)
521522
cls._set_request_payer_param(request_params, cli_params)
522523
cls._set_checksum_algorithm_param(request_params, cli_params)
524+
cls._set_no_overwrite_param(request_params, cli_params)
523525

524526
@classmethod
525527
def map_head_object_params(cls, request_params, cli_params):
@@ -558,6 +560,11 @@ def map_delete_object_params(cls, request_params, cli_params):
558560
def map_list_objects_v2_params(cls, request_params, cli_params):
559561
cls._set_request_payer_param(request_params, cli_params)
560562

563+
@classmethod
564+
def _set_no_overwrite_param(cls, request_params, cli_params):
565+
if cli_params.get('no_overwrite'):
566+
request_params['IfNoneMatch'] = "*"
567+
561568
@classmethod
562569
def _set_request_payer_param(cls, request_params, cli_params):
563570
if cli_params.get('request_payer'):

0 commit comments

Comments
 (0)