Skip to content
54 changes: 46 additions & 8 deletions awscli/customizations/s3/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@

from awscli.compat import ensure_text_type, queue
from awscli.customizations.s3.subscribers import OnDoneFilteredSubscriber
from awscli.customizations.s3.utils import WarningResult, human_readable_size
from awscli.customizations.s3.utils import (
WarningResult,
human_readable_size,
)
from awscli.customizations.utils import uni_print

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -54,6 +57,7 @@ def _create_new_result_cls(name, extra_fields=None, base_cls=BaseResult):
FailureResult = _create_new_result_cls('FailureResult', ['exception'])

DryRunResult = _create_new_result_cls('DryRunResult')
SkipFileResult = _create_new_result_cls('SkipFileResult')

ErrorResult = namedtuple('ErrorResult', ['exception'])

Expand Down Expand Up @@ -123,6 +127,19 @@ def _on_failure(self, future, e):
if isinstance(e, FatalError):
error_result_cls = ErrorResult
self._result_queue.put(error_result_cls(exception=e))
elif self._is_precondition_failed(e):
LOGGER.debug(
"Warning: Skipping file %s as it already exists on %s",
self._src,
self._dest,
)
self._result_queue.put(
SkipFileResult(
transfer_type=self._transfer_type,
src=self._src,
dest=self._dest,
)
)
else:
self._result_queue.put(
FailureResult(
Expand All @@ -133,6 +150,14 @@ def _on_failure(self, future, e):
)
)

def _is_precondition_failed(self, exception):
"""Check if this is a PreconditionFailed error"""
return (
hasattr(exception, 'response')
and exception.response.get('Error', {}).get('Code')
== 'PreconditionFailed'
)


class BaseResultHandler:
"""Base handler class to be called in the ResultProcessor"""
Expand All @@ -150,6 +175,7 @@ def __init__(self):
self.files_transferred = 0
self.files_failed = 0
self.files_warned = 0
self.files_skipped = 0
self.errors = 0
self.expected_bytes_transferred = 0
self.expected_files_transferred = 0
Expand All @@ -167,6 +193,7 @@ def __init__(self):
SuccessResult: self._record_success_result,
FailureResult: self._record_failure_result,
WarningResult: self._record_warning_result,
SkipFileResult: self._record_skipped_file_result,
ErrorResult: self._record_error_result,
CtrlCResult: self._record_error_result,
FinalTotalSubmissionsResult: self._record_final_expected_files,
Expand Down Expand Up @@ -282,6 +309,9 @@ def _record_failure_result(self, result, **kwargs):
self.files_failed += 1
self.files_transferred += 1

def _record_skipped_file_result(self, result, **kwargs):
self.files_skipped += 1

def _record_warning_result(self, **kwargs):
self.files_warned += 1

Expand Down Expand Up @@ -342,6 +372,7 @@ def __init__(self, result_recorder, out_file=None, error_file=None):
SuccessResult: self._print_success,
FailureResult: self._print_failure,
WarningResult: self._print_warning,
SkipFileResult: self._print_skip,
ErrorResult: self._print_error,
CtrlCResult: self._print_ctrl_c,
DryRunResult: self._print_dry_run,
Expand All @@ -358,6 +389,10 @@ def _print_noop(self, **kwargs):
# If the result does not have a handler, then do nothing with it.
pass

def _print_skip(self, **kwargs):
# Don't reset progress length since this result printer doesn't print a newline
self._redisplay_progress(reset_progress_length=False)

def _print_dry_run(self, result, **kwargs):
statement = self.DRY_RUN_FORMAT.format(
transfer_type=result.transfer_type,
Expand Down Expand Up @@ -410,23 +445,26 @@ def _get_transfer_location(self, result):
src=result.src, dest=result.dest
)

def _redisplay_progress(self):
def _redisplay_progress(self, reset_progress_length=True):
# Reset to zero because done statements are printed with new lines
# meaning there are no carriage returns to take into account when
# printing the next line.
self._progress_length = 0
if reset_progress_length:
self._progress_length = 0
self._add_progress_if_needed()

def _add_progress_if_needed(self):
if self._has_remaining_progress():
self._print_progress()
else:
self._clear_progress_if_no_more_expected_transfers(ending_char='\r')

def _print_progress(self, **kwargs):
# Get all of the statistics in the correct form.
# Get all the statistics in the correct form.
remaining_files = self._get_expected_total(
str(
self._result_recorder.expected_files_transferred
- self._result_recorder.files_transferred
- (self._result_recorder.files_transferred + self._result_recorder.files_skipped)
)
)

Expand Down Expand Up @@ -489,7 +527,7 @@ def _adjust_statement_padding(self, print_statement, ending_char='\n'):
def _has_remaining_progress(self):
if not self._result_recorder.expected_totals_are_final():
return True
actual = self._result_recorder.files_transferred
actual = self._result_recorder.files_transferred + self._result_recorder.files_skipped
expected = self._result_recorder.expected_files_transferred
return actual != expected

Expand All @@ -499,9 +537,9 @@ def _print_to_out_file(self, statement):
def _print_to_error_file(self, statement):
uni_print(statement, self._error_file)

def _clear_progress_if_no_more_expected_transfers(self, **kwargs):
def _clear_progress_if_no_more_expected_transfers(self, ending_char='\n', **kwargs):
if self._progress_length and not self._has_remaining_progress():
uni_print(self._adjust_statement_padding(''), self._out_file)
uni_print(self._adjust_statement_padding('', ending_char=ending_char), self._out_file)


class NoProgressResultPrinter(ResultPrinter):
Expand Down
71 changes: 69 additions & 2 deletions awscli/customizations/s3/s3handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import os

from botocore.exceptions import ClientError
from s3transfer.manager import TransferManager

from awscli.compat import get_binary_stdin
Expand Down Expand Up @@ -437,7 +438,36 @@ def _get_fileout(self, fileinfo):
return fileinfo.dest

def _get_warning_handlers(self):
return [self._warn_glacier, self._warn_parent_reference]
return [
self._warn_glacier,
self._warn_parent_reference,
self._warn_if_file_exists_with_no_overwrite,
]

def _warn_if_file_exists_with_no_overwrite(self, fileinfo):
"""
Warning handler to skip downloads when no-overwrite is set and local file exists.

This method prevents overwriting existing local files during S3 download operations
when the --no-overwrite flag is specified. It checks if the destination file already
exists on the local filesystem and skips the download if found.

:type fileinfo: FileInfo
:param fileinfo: The FileInfo object containing transfer details

:rtype: bool
:returns: True if the file should be skipped (exists and no-overwrite is set),
False if the download should proceed
"""
if not self._cli_params.get('no_overwrite'):
return False
fileout = self._get_fileout(fileinfo)
if os.path.exists(fileout):
LOGGER.debug(
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
)
return True
return False

def _format_src_dest(self, fileinfo):
src = self._format_s3_path(fileinfo.src)
Expand Down Expand Up @@ -481,7 +511,44 @@ def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
)

def _get_warning_handlers(self):
return [self._warn_glacier]
return [
self._warn_glacier,
self._warn_if_zero_byte_file_exists_with_no_overwrite,
]

def _warn_if_zero_byte_file_exists_with_no_overwrite(self, fileinfo):
"""
Warning handler to skip zero-byte files when no_overwrite is set and file exists.

This method handles the transfer of zero-byte objects when the no-overwrite parameter is specified.
To prevent overwrite, it uses head_object to verify if the object exists at the destination:
If the object is present at destination: skip the file (return True)
If the object is not present at destination: allow transfer (return False)

:type fileinfo: FileInfo
:param fileinfo: The FileInfo object containing transfer details

:rtype: bool
:return: True if file should be skipped, False if transfer should proceed
"""
if not self._cli_params.get('no_overwrite') or (
getattr(fileinfo, 'size') and fileinfo.size > 0
):
return False

bucket, key = find_bucket_key(fileinfo.dest)
client = fileinfo.source_client
try:
client.head_object(Bucket=bucket, Key=key)
LOGGER.debug(
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
)
return True
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
else:
raise

def _format_src_dest(self, fileinfo):
src = self._format_s3_path(fileinfo.src)
Expand Down
53 changes: 49 additions & 4 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,15 @@
),
}

NO_OVERWRITE = {
'name': 'no-overwrite',
'action': 'store_true',
'help_text': (
"This flag prevents overwriting of files at the destination. With this flag, "
"only files not present at the destination will be transferred."
),
}

TRANSFER_ARGS = [
DRYRUN,
QUIET,
Expand Down Expand Up @@ -1057,7 +1066,14 @@ class CpCommand(S3TransferCommand):
}
]
+ TRANSFER_ARGS
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE]
+ [
METADATA,
COPY_PROPS,
METADATA_DIRECTIVE,
EXPECTED_SIZE,
RECURSIVE,
NO_OVERWRITE,
]
)


Expand All @@ -1081,6 +1097,7 @@ class MvCommand(S3TransferCommand):
METADATA_DIRECTIVE,
RECURSIVE,
VALIDATE_SAME_S3_PATHS,
NO_OVERWRITE,
]
)

Expand Down Expand Up @@ -1126,7 +1143,7 @@ class SyncCommand(S3TransferCommand):
}
]
+ TRANSFER_ARGS
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE]
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, NO_OVERWRITE]
)


Expand Down Expand Up @@ -1297,7 +1314,6 @@ def choose_sync_strategies(self):
sync_type = override_sync_strategy.sync_type
sync_type += '_sync_strategy'
sync_strategies[sync_type] = override_sync_strategy

return sync_strategies

def run(self):
Expand Down Expand Up @@ -1384,7 +1400,8 @@ def run(self):
self._client, self._source_client, self.parameters
)

s3_transfer_handler = S3TransferHandlerFactory(self.parameters)(
params = self._get_s3_handler_params()
s3_transfer_handler = S3TransferHandlerFactory(params)(
self._transfer_manager, result_queue
)

Expand Down Expand Up @@ -1493,6 +1510,16 @@ def _map_sse_c_params(self, request_parameters, paths_type):
},
)

def _get_s3_handler_params(self):
"""
Removing no-overwrite params from sync since file to
be synced are already separated out using sync strategy
"""
params = self.parameters.copy()
if self.cmd == 'sync':
params.pop('no_overwrite', None)
return params


# TODO: This class is fairly quirky in the sense that it is both a builder
# and a data object. In the future we should make the following refactorings
Expand Down Expand Up @@ -1556,6 +1583,7 @@ def add_paths(self, paths):
elif len(paths) == 1:
self.parameters['dest'] = paths[0]
self._validate_streaming_paths()
self._validate_no_overwrite_for_download_streaming()
self._validate_path_args()
self._validate_sse_c_args()
self._validate_not_s3_express_bucket_for_sync()
Expand Down Expand Up @@ -1807,3 +1835,20 @@ def _validate_sse_c_copy_source_for_paths(self):
'--sse-c-copy-source is only supported for '
'copy operations.'
)

def _validate_no_overwrite_for_download_streaming(self):
"""
Validates that no-overwrite parameter is not used with streaming downloads.

Raises:
ParamValidationError: If no-overwrite is specified with a streaming download.
"""
if (
self.parameters['is_stream']
and self.parameters.get('no_overwrite')
and self.parameters['dest'] == '-'
):
raise ParamValidationError(
"--no-overwrite parameter is not supported for "
"streaming downloads"
)
34 changes: 34 additions & 0 deletions awscli/customizations/s3/syncstrategy/nooverwrite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import logging

from awscli.customizations.s3.subcommands import NO_OVERWRITE
from awscli.customizations.s3.syncstrategy.base import BaseSync

LOG = logging.getLogger(__name__)


class NoOverwriteSync(BaseSync):
"""Sync strategy that prevents overwriting of existing files at the destination.
This strategy is used only for files that exist at both source and destination
(file_at_src_and_dest_sync_strategy). It always returns False to prevent any
overwriting of existing files, regardless of size or modification time differences.
"""

ARGUMENT = NO_OVERWRITE

def determine_should_sync(self, src_file, dest_file):
LOG.debug(
f"warning: skipping {src_file.src} -> {src_file.dest}, file exists at destination"
)
return False
Loading