From 751cab82cb8c6c5df89b1ea2c0c76fc9514b337e Mon Sep 17 00:00:00 2001 From: Rakshil Modi Date: Fri, 11 Jul 2025 13:54:50 -0700 Subject: [PATCH 1/8] Update s3Transfer Manager Updated s3Transfer manager generating original version Updated args list updated transfer manager for upload --- awscli/s3transfer/manager.py | 1 + awscli/s3transfer/upload.py | 6 +- tests/functional/s3transfer/test_upload.py | 152 +++++++++++++++++++++ 3 files changed, 158 insertions(+), 1 deletion(-) diff --git a/awscli/s3transfer/manager.py b/awscli/s3transfer/manager.py index baaafe1d37cb..f70e69acbf9f 100644 --- a/awscli/s3transfer/manager.py +++ b/awscli/s3transfer/manager.py @@ -195,6 +195,7 @@ class TransferManager: + [ 'ChecksumType', 'MpuObjectSize', + 'IfNoneMatch', ] + FULL_OBJECT_CHECKSUM_ARGS ) diff --git a/awscli/s3transfer/upload.py b/awscli/s3transfer/upload.py index a3db8f899e2b..5eb0c6f2af14 100644 --- a/awscli/s3transfer/upload.py +++ b/awscli/s3transfer/upload.py @@ -515,7 +515,10 @@ class UploadSubmissionTask(SubmissionTask): PUT_OBJECT_BLOCKLIST = ["ChecksumType", "MpuObjectSize"] - CREATE_MULTIPART_BLOCKLIST = FULL_OBJECT_CHECKSUM_ARGS + ["MpuObjectSize"] + CREATE_MULTIPART_BLOCKLIST = FULL_OBJECT_CHECKSUM_ARGS + [ + "MpuObjectSize", + "IfNoneMatch", + ] UPLOAD_PART_ARGS = [ 'ChecksumAlgorithm', @@ -534,6 +537,7 @@ class UploadSubmissionTask(SubmissionTask): 'ExpectedBucketOwner', 'ChecksumType', 'MpuObjectSize', + "IfNoneMatch", ] + FULL_OBJECT_CHECKSUM_ARGS def _get_upload_input_manager_cls(self, transfer_future): diff --git a/tests/functional/s3transfer/test_upload.py b/tests/functional/s3transfer/test_upload.py index 222c22efd3c9..f614d3bb1c0f 100644 --- a/tests/functional/s3transfer/test_upload.py +++ b/tests/functional/s3transfer/test_upload.py @@ -367,6 +367,51 @@ def test_raise_exception_on_s3_object_lambda_resource(self): with self.assertRaisesRegex(ValueError, 'methods do not support'): self.manager.upload(self.filename, s3_object_lambda_arn, self.key) + def test_upload_with_no_overwrite_flag_when_object_exists(self): + self.extra_args['IfNoneMatch'] = '*' + # Mocking a precondition Error + self.stubber.add_client_error( + 'put_object', + http_status_code=412, + service_error_code='PreconditionFailed', + expected_params={ + 'Body': ANY, + 'Bucket': self.bucket, + 'Key': self.key, + 'IfNoneMatch': '*', + 'ChecksumAlgorithm': DEFAULT_CHECKSUM_ALGORITHM, + }, + ) + with self.assertRaises(ClientError) as context: + future = self.manager.upload( + self.filename, self.bucket, self.key, self.extra_args + ) + future.result() + # Verify the error is a PreconditionFailed error + self.assertEqual( + context.exception.response['Error']['Code'], 'PreconditionFailed' + ) + self.stubber.assert_no_pending_responses() + + def test_upload_with_no_overwrite_flag_when_object_does_not_exists(self): + self.extra_args['IfNoneMatch'] = '*' + self.stubber.add_response( + 'put_object', + service_response={}, + expected_params={ + 'Body': ANY, + 'Bucket': self.bucket, + 'Key': self.key, + 'IfNoneMatch': '*', + 'ChecksumAlgorithm': DEFAULT_CHECKSUM_ALGORITHM, + }, + ) + future = self.manager.upload( + self.filename, self.bucket, self.key, self.extra_args + ) + future.result() + self.stubber.assert_no_pending_responses() + class TestMultipartUpload(BaseUploadTest): __test__ = True @@ -848,3 +893,110 @@ def test_multipart_upload_with_default_checksum_when_required(self): ) future.result() self.assert_expected_client_calls_were_correct() + + def test_multipart_upload_with_no_overwrite_flag_when_object_exists(self): + self.extra_args['IfNoneMatch'] = '*' + # Add response for create_multipart_upload (no IfNoneMatch here) + self.add_create_multipart_response_with_default_expected_params() + # Add responses for upload_part + self.add_upload_part_responses_with_default_expected_params() + # Mock a PreconditionFailed error response when trying to complete the multipart upload + self.stubber.add_client_error( + method='complete_multipart_upload', + service_error_code='PreconditionFailed', + service_message='The condition specified in the conditional header(s) was not met', + http_status_code=412, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + 'MultipartUpload': { + 'Parts': [ + { + 'ETag': 'etag-1', + 'PartNumber': 1, + 'ChecksumCRC64NVME': 'sum1==', + }, + { + 'ETag': 'etag-2', + 'PartNumber': 2, + 'ChecksumCRC64NVME': 'sum2==', + }, + { + 'ETag': 'etag-3', + 'PartNumber': 3, + 'ChecksumCRC64NVME': 'sum3==', + }, + ] + }, + 'IfNoneMatch': '*', + }, + ) + # Add response for abort_multipart_upload that should be called after the error + self.stubber.add_response( + method='abort_multipart_upload', + service_response={}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + }, + ) + # Execute the upload and verify it raises the expected exception + with self.assertRaises(ClientError) as context: + future = self.manager.upload( + self.filename, self.bucket, self.key, self.extra_args + ) + future.result() + # Verify the error is a PreconditionFailed error + self.assertEqual( + context.exception.response['Error']['Code'], 'PreconditionFailed' + ) + self.stubber.assert_no_pending_responses() + + def test_multipart_upload_with_no_overwrite_flag_when_object_does_not_exist( + self, + ): + self.extra_args['IfNoneMatch'] = '*' + # Add response for create_multipart_upload (no IfNoneMatch here) + self.add_create_multipart_response_with_default_expected_params() + # Add responses for upload_part + self.add_upload_part_responses_with_default_expected_params() + # Add response for complete_multipart_upload with IfNoneMatch in expected_params + self.stubber.add_response( + 'complete_multipart_upload', + service_response={}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + 'MultipartUpload': { + 'Parts': [ + { + 'ETag': 'etag-1', + 'PartNumber': 1, + 'ChecksumCRC64NVME': 'sum1==', + }, + { + 'ETag': 'etag-2', + 'PartNumber': 2, + 'ChecksumCRC64NVME': 'sum2==', + }, + { + 'ETag': 'etag-3', + 'PartNumber': 3, + 'ChecksumCRC64NVME': 'sum3==', + }, + ] + }, + 'IfNoneMatch': '*', + }, + ) + # Execute the upload + future = self.manager.upload( + self.filename, self.bucket, self.key, self.extra_args + ) + future.result() + # Verify the upload was successful + self.stubber.assert_no_pending_responses() + self.assert_upload_part_bodies_were_correct() From 6345224aef3043c5e31aca7ff09f13ce9f7b21eb Mon Sep 17 00:00:00 2001 From: Rakshil Modi Date: Fri, 11 Jul 2025 13:53:11 -0700 Subject: [PATCH 2/8] Adding no-overwrite flag for S3 upload Removed unwanted comments for test --- awscli/customizations/s3/results.py | 20 +++- awscli/customizations/s3/subcommands.py | 19 +++- awscli/customizations/s3/utils.py | 7 ++ tests/functional/s3/test_cp_command.py | 101 ++++++++++++++++++++ tests/functional/s3/test_mv_command.py | 106 +++++++++++++++++++++ tests/unit/customizations/s3/test_utils.py | 14 +++ 6 files changed, 265 insertions(+), 2 deletions(-) diff --git a/awscli/customizations/s3/results.py b/awscli/customizations/s3/results.py index 558f2f956e9b..05bcd44e2fa0 100644 --- a/awscli/customizations/s3/results.py +++ b/awscli/customizations/s3/results.py @@ -21,7 +21,11 @@ from awscli.compat import ensure_text_type, queue from awscli.customizations.s3.subscribers import OnDoneFilteredSubscriber -from awscli.customizations.s3.utils import WarningResult, human_readable_size +from awscli.customizations.s3.utils import ( + WarningResult, + create_warning, + human_readable_size, +) from awscli.customizations.utils import uni_print LOGGER = logging.getLogger(__name__) @@ -123,6 +127,12 @@ def _on_failure(self, future, e): if isinstance(e, FatalError): error_result_cls = ErrorResult self._result_queue.put(error_result_cls(exception=e)) + elif self._is_precondition_failed(e): + LOGGER.debug( + "Warning: Skipping file %s as it already exists on %s", + self._src, + self._dest, + ) else: self._result_queue.put( FailureResult( @@ -133,6 +143,14 @@ def _on_failure(self, future, e): ) ) + def _is_precondition_failed(self, exception): + """Check if this is a PreconditionFailed error""" + return ( + hasattr(exception, 'response') + and exception.response.get('Error', {}).get('Code') + == 'PreconditionFailed' + ) + class BaseResultHandler: """Base handler class to be called in the ResultProcessor""" diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 8dc8a61fa895..e4123856708c 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -642,6 +642,15 @@ ), } +NO_OVERWRITE = { + 'name': 'no-overwrite', + 'action': 'store_true', + 'help_text': ( + "This flag prevents overwriting of files at the destination. With this flag, " + "only files not present at the destination will be transferred." + ), +} + TRANSFER_ARGS = [ DRYRUN, QUIET, @@ -1057,7 +1066,14 @@ class CpCommand(S3TransferCommand): } ] + TRANSFER_ARGS - + [METADATA, COPY_PROPS, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE] + + [ + METADATA, + COPY_PROPS, + METADATA_DIRECTIVE, + EXPECTED_SIZE, + RECURSIVE, + NO_OVERWRITE, + ] ) @@ -1081,6 +1097,7 @@ class MvCommand(S3TransferCommand): METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS, + NO_OVERWRITE, ] ) diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index f6a26803b082..d4d8dfd2583c 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -489,6 +489,7 @@ def map_put_object_params(cls, request_params, cli_params): cls._set_sse_c_request_params(request_params, cli_params) cls._set_request_payer_param(request_params, cli_params) cls._set_checksum_algorithm_param(request_params, cli_params) + cls._set_no_overwrite_param(request_params, cli_params) @classmethod def map_get_object_params(cls, request_params, cli_params): @@ -558,6 +559,12 @@ def map_delete_object_params(cls, request_params, cli_params): def map_list_objects_v2_params(cls, request_params, cli_params): cls._set_request_payer_param(request_params, cli_params) + @classmethod + def _set_no_overwrite_param(cls, request_params, cli_params): + """Map No overwrite header with IfNoneMatch""" + if cli_params.get('no_overwrite'): + request_params['IfNoneMatch'] = "*" + @classmethod def _set_request_payer_param(cls, request_params, cli_params): if cli_params.get('request_payer'): diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py index 5494d0a8a7d9..31ab26f18900 100644 --- a/tests/functional/s3/test_cp_command.py +++ b/tests/functional/s3/test_cp_command.py @@ -296,6 +296,107 @@ def test_operations_used_in_recursive_download(self): ) self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + def test_no_overwrite_flag_when_object_not_exists_on_target(self): + full_path = self.files.create_file('foo.txt', 'mycontent') + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + self.parsed_responses = [ + {'ETag': '"c8afdb36c52cf4727836669019e69222"'} + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify putObject was called + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'PutObject') + # Verify the IfNoneMatch condition was set in the request + self.assertEqual(self.operations_called[0][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_when_object_exists_on_target(self): + full_path = self.files.create_file('foo.txt', 'mycontent') + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + # Set up the response to simulate a PreconditionFailed error + self.http_response.status_code = 412 + self.parsed_responses = [ + { + 'Error': { + 'Code': 'PreconditionFailed', + 'Message': 'At least one of the pre-conditions you specified did not hold', + 'Condition': 'If-None-Match', + } + } + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify PutObject was attempted with IfNoneMatch + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'PutObject') + self.assertEqual(self.operations_called[0][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_multipart_upload_when_object_not_exists_on_target( + self, + ): + # Create a large file that will trigger multipart upload + full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024**2)) + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + + # Set up responses for multipart upload + self.parsed_responses = [ + {'UploadId': 'foo'}, # CreateMultipartUpload response + {'ETag': '"foo-1"'}, # UploadPart response + {'ETag': '"foo-2"'}, # UploadPart response + {}, # CompleteMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 4) + self.assertEqual( + self.operations_called[0][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[1][0].name, 'UploadPart') + self.assertEqual(self.operations_called[2][0].name, 'UploadPart') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_multipart_upload_when_object_exists_on_target( + self, + ): + # Create a large file that will trigger multipart upload + full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024**2)) + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + # Set up responses for multipart upload + self.parsed_responses = [ + {'UploadId': 'foo'}, # CreateMultipartUpload response + {'ETag': '"foo-1"'}, # UploadPart response + {'ETag': '"foo-2"'}, # UploadPart response + { + 'Error': { + 'Code': 'PreconditionFailed', + 'Message': 'At least one of the pre-conditions you specified did not hold', + 'Condition': 'If-None-Match', + } + }, # PreconditionFailed error for CompleteMultipart Upload + {}, # AbortMultipartUpload response + ] + # Checking for success as file is skipped + self.run_cmd(cmdline, expected_rc=0) + # Set up the response to simulate a PreconditionFailed error + self.http_response.status_code = 412 + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 5) + self.assertEqual( + self.operations_called[0][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[1][0].name, 'UploadPart') + self.assertEqual(self.operations_called[2][0].name, 'UploadPart') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual( + self.operations_called[4][0].name, 'AbortMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + def test_dryrun_download(self): self.parsed_responses = [self.head_object_response()] target = self.files.full_path('file.txt') diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py index 0ef997b50c13..b076a1f9276e 100644 --- a/tests/functional/s3/test_mv_command.py +++ b/tests/functional/s3/test_mv_command.py @@ -316,6 +316,112 @@ def test_download_with_checksum_mode_crc32(self): self.operations_called[1][1]['ChecksumMode'], 'ENABLED' ) + def test_mv_no_overwrite_flag_when_object_not_exists_on_target(self): + full_path = self.files.create_file('foo.txt', 'contents') + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + self.run_cmd(cmdline, expected_rc=0) + # Verify putObject was called + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'PutObject') + # Verify the IfNoneMatch condition was set in the request + self.assertEqual(self.operations_called[0][1]['IfNoneMatch'], '*') + # Verify source file was deleted (move operation) + self.assertFalse(os.path.exists(full_path)) + + def test_mv_no_overwrite_flag_when_object_exists_on_target(self): + full_path = self.files.create_file('foo.txt', 'mycontent') + cmdline = ( + f'{self.prefix} {full_path} s3://bucket/foo.txt --no-overwrite' + ) + # Set up the response to simulate a PreconditionFailed error + self.http_response.status_code = 412 + self.parsed_responses = [ + { + 'Error': { + 'Code': 'PreconditionFailed', + 'Message': 'At least one of the pre-conditions you specified did not hold', + 'Condition': 'If-None-Match', + } + } + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify PutObject was attempted with IfNoneMatch + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'PutObject') + self.assertEqual(self.operations_called[0][1]['IfNoneMatch'], '*') + # Verify source file was not deleted + self.assertTrue(os.path.exists(full_path)) + + def test_mv_no_overwrite_flag_multipart_upload_when_object_not_exists_on_target( + self, + ): + # Create a large file that will trigger multipart upload + full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024**2)) + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + # Set up responses for multipart upload + self.parsed_responses = [ + {'UploadId': 'foo'}, # CreateMultipartUpload response + {'ETag': '"foo-1"'}, # UploadPart response + {'ETag': '"foo-2"'}, # UploadPart response + {}, # CompleteMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 4) + self.assertEqual( + self.operations_called[0][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[1][0].name, 'UploadPart') + self.assertEqual(self.operations_called[2][0].name, 'UploadPart') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + # Verify source file was deleted (successful move operation) + self.assertFalse(os.path.exists(full_path)) + + def test_mv_no_overwrite_flag_multipart_upload_when_object_exists_on_target( + self, + ): + # Create a large file that will trigger multipart upload + full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024**2)) + cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' + # Set up responses for multipart upload + self.parsed_responses = [ + {'UploadId': 'foo'}, # CreateMultipartUpload response + {'ETag': '"foo-1"'}, # UploadPart response + {'ETag': '"foo-2"'}, # UploadPart response + { + 'Error': { + 'Code': 'PreconditionFailed', + 'Message': 'At least one of the pre-conditions you specified did not hold', + 'Condition': 'If-None-Match', + } + }, # CompleteMultipartUpload response + {}, # Abort Multipart + ] + self.run_cmd(cmdline, expected_rc=0) + # Set up the response to simulate a PreconditionFailed error + self.http_response.status_code = 412 + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 5) + self.assertEqual( + self.operations_called[0][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[1][0].name, 'UploadPart') + self.assertEqual(self.operations_called[2][0].name, 'UploadPart') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual( + self.operations_called[4][0].name, 'AbortMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + # Verify source file was not deleted (failed move operation due to PreconditionFailed) + self.assertTrue(os.path.exists(full_path)) + class TestMvWithCRTClient(BaseCRTTransferClientTest): def test_upload_move_using_crt_client(self): diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py index 47e006d73910..2ad66f548f44 100644 --- a/tests/unit/customizations/s3/test_utils.py +++ b/tests/unit/customizations/s3/test_utils.py @@ -859,6 +859,20 @@ def test_can_specify_amount_for_nonseekable_stream(self): self.assertEqual(nonseekable_fileobj.read(3), 'foo') +class TestRequestParamsMapperNoOverwrite(unittest.TestCase): + def test_map_put_object_params_with_no_overwrite(self): + request_params = {} + cli_params = {'no_overwrite': True} + RequestParamsMapper.map_put_object_params(request_params, cli_params) + self.assertEqual(request_params['IfNoneMatch'], '*') + + def test_map_put_object_params_without_no_overwrite(self): + request_params = {} + cli_params = {} + RequestParamsMapper.map_put_object_params(request_params, cli_params) + self.assertNotIn('IfNoneMatch', request_params) + + class TestS3PathResolver: _BASE_ACCESSPOINT_ARN = ( "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/myaccesspoint" From 2493f8b41e73c66c42794fb3c08115c2766c86b9 Mon Sep 17 00:00:00 2001 From: Rakshilkumar Modi <56144321+rakshil14-2@users.noreply.github.com> Date: Fri, 25 Jul 2025 14:31:50 -0700 Subject: [PATCH 3/8] Adding No overwrite for copy operation (#9592) --- awscli/customizations/s3/results.py | 1 - awscli/customizations/s3/s3handler.py | 42 +++- awscli/customizations/s3/utils.py | 1 + awscli/s3transfer/copies.py | 53 +++-- awscli/s3transfer/manager.py | 2 +- tests/functional/s3/__init__.py | 10 + tests/functional/s3/test_cp_command.py | 155 ++++++++++++++- tests/functional/s3/test_mv_command.py | 153 ++++++++++++++ tests/functional/s3transfer/test_copy.py | 221 ++++++++++++++++++++- tests/unit/customizations/s3/test_utils.py | 12 ++ 10 files changed, 632 insertions(+), 18 deletions(-) diff --git a/awscli/customizations/s3/results.py b/awscli/customizations/s3/results.py index 05bcd44e2fa0..024fa7b79f4c 100644 --- a/awscli/customizations/s3/results.py +++ b/awscli/customizations/s3/results.py @@ -23,7 +23,6 @@ from awscli.customizations.s3.subscribers import OnDoneFilteredSubscriber from awscli.customizations.s3.utils import ( WarningResult, - create_warning, human_readable_size, ) from awscli.customizations.utils import uni_print diff --git a/awscli/customizations/s3/s3handler.py b/awscli/customizations/s3/s3handler.py index 23176f30f889..741e735dc6ff 100644 --- a/awscli/customizations/s3/s3handler.py +++ b/awscli/customizations/s3/s3handler.py @@ -13,6 +13,7 @@ import logging import os +from botocore.exceptions import ClientError from s3transfer.manager import TransferManager from awscli.compat import get_binary_stdin @@ -481,7 +482,46 @@ def _submit_transfer_request(self, fileinfo, extra_args, subscribers): ) def _get_warning_handlers(self): - return [self._warn_glacier] + return [ + self._warn_glacier, + self._warn_if_zero_byte_file_exists_with_no_overwrite, + ] + + def _warn_if_zero_byte_file_exists_with_no_overwrite(self, fileinfo): + """ + Warning handler to skip zero-byte files when no_overwrite is set and file exists. + + This method handles the transfer of zero-byte objects when the no-overwrite parameter is specified. + To prevent overwrite, it uses head_object to verify if the object exists at the destination: + If the object is present at destination: skip the file (return True) + If the object is not present at destination: allow transfer (return False) + + :type fileinfo: FileInfo + :param fileinfo: The FileInfo object containing transfer details + + :rtype: bool + :return: True if file should be skipped, False if transfer should proceed + """ + if not self._cli_params.get('no_overwrite') or ( + getattr(fileinfo, 'size') and fileinfo.size > 0 + ): + return False + + bucket, key = find_bucket_key(fileinfo.dest) + client = fileinfo.source_client + try: + client.head_object(Bucket=bucket, Key=key) + LOGGER.debug( + "Warning: Skipping file %s as it already exists on %s", + fileinfo.src, + fileinfo.dest, + ) + return True + except ClientError as e: + if e.response['Error']['Code'] == '404': + return False + else: + raise def _format_src_dest(self, fileinfo): src = self._format_s3_path(fileinfo.src) diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index d4d8dfd2583c..05624581ccd9 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -521,6 +521,7 @@ def map_copy_object_params(cls, request_params, cli_params): ) cls._set_request_payer_param(request_params, cli_params) cls._set_checksum_algorithm_param(request_params, cli_params) + cls._set_no_overwrite_param(request_params, cli_params) @classmethod def map_head_object_params(cls, request_params, cli_params): diff --git a/awscli/s3transfer/copies.py b/awscli/s3transfer/copies.py index c2ae9ce0ca5a..5dd7df6f229a 100644 --- a/awscli/s3transfer/copies.py +++ b/awscli/s3transfer/copies.py @@ -13,6 +13,7 @@ import copy import math +from botocore.exceptions import ClientError from s3transfer.tasks import ( CompleteMultipartUploadTask, CreateMultipartUploadTask, @@ -67,6 +68,7 @@ class CopySubmissionTask(SubmissionTask): 'CopySourceSSECustomerKeyMD5', 'MetadataDirective', 'TaggingDirective', + 'IfNoneMatch', ] COMPLETE_MULTIPART_ARGS = [ @@ -75,6 +77,11 @@ class CopySubmissionTask(SubmissionTask): 'SSECustomerKeyMD5', 'RequestPayer', 'ExpectedBucketOwner', + 'IfNoneMatch', + ] + + COPY_OBJECT_ARGS_BLOCKLIST = [ + 'IfNoneMatch', ] def _submit( @@ -98,6 +105,7 @@ def _submit( :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for """ + call_args = transfer_future.meta.call_args # Determine the size if it was not provided if transfer_future.meta.size is None: # If a size was not provided figure out the size for the @@ -105,7 +113,6 @@ def _submit( # the TransferManager. If the object is outside of the region # of the client, they may have to provide the file size themselves # with a completely new client. - call_args = transfer_future.meta.call_args head_object_request = ( self._get_head_object_request_from_copy_source( call_args.copy_source @@ -127,10 +134,24 @@ def _submit( transfer_future.meta.provide_transfer_size( response['ContentLength'] ) - - # If it is greater than threshold do a multipart copy, otherwise - # do a regular copy object. - if transfer_future.meta.size < config.multipart_threshold: + # Check for ifNoneMatch is enabled and file has content + # Special handling for 0-byte files: Since multipart copy works with object size + # and divides the object into smaller chunks, there's an edge case when the object + # size is zero. This would result in 0 parts being calculated, and the + # CompleteMultipartUpload operation throws a MalformedXML error when transferring + # 0 parts because the XML does not validate against the published schema. + # Therefore, 0-byte files are always handled via single copy request regardless + # of the multipart threshold setting. + should_overwrite = ( + call_args.extra_args.get("IfNoneMatch") + and transfer_future.meta.size != 0 + ) + # If it is less than threshold and ifNoneMatch is not in parameters + # do a regular copy else do multipart copy. + if ( + transfer_future.meta.size < config.multipart_threshold + and not should_overwrite + ): self._submit_copy_request( client, config, osutil, request_executor, transfer_future ) @@ -147,19 +168,25 @@ def _submit_copy_request( # Get the needed progress callbacks for the task progress_callbacks = get_callbacks(transfer_future, 'progress') - # Submit the request of a single copy. + # Submit the request of a single copy and make sure it + # does not include any blocked arguments. + copy_object_extra_args = { + param: val + for param, val in call_args.extra_args.items() + if param not in self.COPY_OBJECT_ARGS_BLOCKLIST + } self._transfer_coordinator.submit( request_executor, CopyObjectTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ - 'client': client, - 'copy_source': call_args.copy_source, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, - 'callbacks': progress_callbacks, - 'size': transfer_future.meta.size, + "client": client, + "copy_source": call_args.copy_source, + "bucket": call_args.bucket, + "key": call_args.key, + "extra_args": copy_object_extra_args, + "callbacks": progress_callbacks, + "size": transfer_future.meta.size, }, is_final=True, ), diff --git a/awscli/s3transfer/manager.py b/awscli/s3transfer/manager.py index f70e69acbf9f..7eb9c540e007 100644 --- a/awscli/s3transfer/manager.py +++ b/awscli/s3transfer/manager.py @@ -188,6 +188,7 @@ class TransferManager: 'SSEKMSEncryptionContext', 'Tagging', 'WebsiteRedirectLocation', + 'IfNoneMatch', ] ALLOWED_UPLOAD_ARGS = ( @@ -195,7 +196,6 @@ class TransferManager: + [ 'ChecksumType', 'MpuObjectSize', - 'IfNoneMatch', ] + FULL_OBJECT_CHECKSUM_ARGS ) diff --git a/tests/functional/s3/__init__.py b/tests/functional/s3/__init__.py index 7705071ea752..1a2c21d04b76 100644 --- a/tests/functional/s3/__init__.py +++ b/tests/functional/s3/__init__.py @@ -257,6 +257,16 @@ def mp_copy_responses(self): self.complete_mpu_response(), ] + def precondition_failed_error_response(self, condition='If-None-Match'): + return { + 'Error': { + 'Code': 'PreconditionFailed', + 'Message': 'At least one of the pre-conditions you ' + 'specified did not hold', + 'Condition': condition, + } + } + class BaseS3CLIRunnerTest(unittest.TestCase): def setUp(self): diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py index 31ab26f18900..e85d62995862 100644 --- a/tests/functional/s3/test_cp_command.py +++ b/tests/functional/s3/test_cp_command.py @@ -335,7 +335,6 @@ def test_no_overwrite_flag_multipart_upload_when_object_not_exists_on_target( # Create a large file that will trigger multipart upload full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024**2)) cmdline = f'{self.prefix} {full_path} s3://bucket --no-overwrite' - # Set up responses for multipart upload self.parsed_responses = [ {'UploadId': 'foo'}, # CreateMultipartUpload response @@ -397,6 +396,160 @@ def test_no_overwrite_flag_multipart_upload_when_object_exists_on_target( # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + def test_no_overwrite_flag_on_copy_when_small_object_does_not_exist_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key1.txt --no-overwrite' + # Set up responses for multipart copy (since no-overwrite always uses multipart) + self.parsed_responses = [ + self.head_object_response(), # HeadObject to get source metadata + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response + {}, # CompleteMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 4) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual( + self.operations_called[1][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_on_copy_when_small_object_exists_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key.txt --no-overwrite' + # Set up responses for multipart copy (since no-overwrite always uses multipart) + self.parsed_responses = [ + self.head_object_response(), # HeadObject to get source metadata + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response + self.precondition_failed_error_response(), # CompleteMultipartUpload + {}, # AbortMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 5) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual( + self.operations_called[1][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual( + self.operations_called[4][0].name, 'AbortMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_on_copy_when_zero_size_object_exists_at_destination( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file.txt --no-overwrite' + self.parsed_responses = [ + self.head_object_response( + ContentLength=0 + ), # Source object (zero size) + self.head_object_response(), # Checking the object at destination + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 2) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + + def test_no_overwrite_flag_on_copy_when_zero_size_object_not_exists_at_destination( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file1.txt --no-overwrite' + self.parsed_responses = [ + self.head_object_response( + ContentLength=0 + ), # Source object (zero size) + { + 'Error': {'Code': '404', 'Message': 'Not Found'} + }, # At destination object does not exists + self.copy_object_response(), # Copy Request when object does not exists + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 3) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + self.assertEqual(self.operations_called[2][0].name, 'CopyObject') + + def test_no_overwrite_flag_on_copy_when_large_object_exists_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key.txt --no-overwrite' + # Set up responses for multipart copy with large object + self.parsed_responses = [ + self.head_object_response( + ContentLength=10 * (1024**2) + ), # HeadObject with large content + self.get_object_tagging_response({}), # GetObjectTagging response + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response part 1 + self.upload_part_copy_response(), # UploadPartCopy response part 2 + self.precondition_failed_error_response(), # CompleteMultipartUpload fails with PreconditionFailed + {}, # AbortMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 7) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObjectTagging') + self.assertEqual( + self.operations_called[2][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[3][0].name, 'UploadPartCopy') + self.assertEqual(self.operations_called[4][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[5][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual( + self.operations_called[6][0].name, 'AbortMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[5][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_on_copy_when_large_object_does_not_exist_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key1.txt --no-overwrite' + # Set up responses for multipart copy with large object + self.parsed_responses = [ + self.head_object_response( + ContentLength=10 * (1024**2) + ), # HeadObject with large content + self.get_object_tagging_response({}), # GetObjectTagging response + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response part 1 + self.upload_part_copy_response(), # UploadPartCopy response part 2 + {}, # CompleteMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 6) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObjectTagging') + self.assertEqual( + self.operations_called[2][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[3][0].name, 'UploadPartCopy') + self.assertEqual(self.operations_called[4][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[5][0].name, 'CompleteMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[5][1]['IfNoneMatch'], '*') + def test_dryrun_download(self): self.parsed_responses = [self.head_object_response()] target = self.files.full_path('file.txt') diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py index b076a1f9276e..1bff7459f92b 100644 --- a/tests/functional/s3/test_mv_command.py +++ b/tests/functional/s3/test_mv_command.py @@ -422,6 +422,159 @@ def test_mv_no_overwrite_flag_multipart_upload_when_object_exists_on_target( # Verify source file was not deleted (failed move operation due to PreconditionFailed) self.assertTrue(os.path.exists(full_path)) + def test_mv_no_overwrite_flag_on_copy_when_small_object_does_not_exist_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket2/key1.txt --no-overwrite' + # Set up responses for multipart copy (since no-overwrite always uses multipart) + self.parsed_responses = [ + self.head_object_response(), # HeadObject to get source metadata + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response + {}, # CompleteMultipartUpload response + self.delete_object_response(), # DeleteObject (for move operation) + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart copy operations were called + self.assertEqual(len(self.operations_called), 5) + self.assertEqual(len(self.operations_called), 5) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual( + self.operations_called[1][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual(self.operations_called[4][0].name, 'DeleteObject') + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + + def test_mv_no_overwrite_flag_on_copy_when_small_object_exists_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket2/key.txt --no-overwrite' + # Set up responses for multipart copy (since no-overwrite always uses multipart) + self.parsed_responses = [ + self.head_object_response(), # HeadObject to get source metadata + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response + self.precondition_failed_error_response(), # CompleteMultipartUpload response + {}, # AbortMultipart + ] + self.run_cmd(cmdline, expected_rc=0) + # Set up the response to simulate a PreconditionFailed error + self.http_response.status_code = 412 + # Verify all multipart copy operations were called + self.assertEqual(len(self.operations_called), 5) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual( + self.operations_called[1][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[3][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual( + self.operations_called[4][0].name, 'AbortMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*') + + def test_no_overwrite_flag_on_copy_when_zero_size_object_exists_at_destination( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file.txt --no-overwrite' + self.parsed_responses = [ + self.head_object_response( + ContentLength=0 + ), # Source object (zero size) + self.head_object_response(), # Checking the object at destination + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 2) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + + def test_no_overwrite_flag_on_copy_when_zero_size_object_not_exists_at_destination( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file1.txt --no-overwrite' + self.parsed_responses = [ + self.head_object_response( + ContentLength=0 + ), # Source object (zero size) + { + 'Error': {'Code': '404', 'Message': 'Not Found'} + }, # At destination object does not exists + self.copy_object_response(), # Copy Request when object does not exists + self.delete_object_response(), # Delete Request for move object + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 4) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'HeadObject') + self.assertEqual(self.operations_called[2][0].name, 'CopyObject') + self.assertEqual(self.operations_called[3][0].name, 'DeleteObject') + + def test_mv_no_overwrite_flag_when_large_object_exists_on_target(self): + cmdline = f'{self.prefix} s3://bucket1/key1.txt s3://bucket/key1.txt --no-overwrite' + self.parsed_responses = [ + self.head_object_response(ContentLength=10 * (1024**2)), + self.get_object_tagging_response({}), # GetObjectTagging response + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response part 1 + self.upload_part_copy_response(), # UploadPartCopy response part 2 + self.precondition_failed_error_response(), # CompleteMultipartUpload fails with PreconditionFailed + {}, # AbortMultipartUpload response + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 7) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObjectTagging') + self.assertEqual( + self.operations_called[2][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[3][0].name, 'UploadPartCopy') + self.assertEqual(self.operations_called[4][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[5][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual( + self.operations_called[6][0].name, 'AbortMultipartUpload' + ) + # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request + self.assertEqual(self.operations_called[5][1]['IfNoneMatch'], '*') + + def test_mv_no_overwrite_flag_when_large_object_does_not_exist_on_target( + self, + ): + cmdline = f'{self.prefix} s3://bucket1/key1.txt s3://bucket/key.txt --no-overwrite' + self.parsed_responses = [ + self.head_object_response(ContentLength=10 * (1024**2)), + self.get_object_tagging_response({}), # GetObjectTagging response + self.create_mpu_response('foo'), # CreateMultipartUpload response + self.upload_part_copy_response(), # UploadPartCopy response part 1 + self.upload_part_copy_response(), # UploadPartCopy response part 2 + {}, # CompleteMultipartUpload response + self.delete_object_response(), # DeleteObject (for move operation) + ] + self.run_cmd(cmdline, expected_rc=0) + # Verify all multipart operations were called + self.assertEqual(len(self.operations_called), 7) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObjectTagging') + self.assertEqual( + self.operations_called[2][0].name, 'CreateMultipartUpload' + ) + self.assertEqual(self.operations_called[3][0].name, 'UploadPartCopy') + self.assertEqual(self.operations_called[4][0].name, 'UploadPartCopy') + self.assertEqual( + self.operations_called[5][0].name, 'CompleteMultipartUpload' + ) + self.assertEqual(self.operations_called[6][0].name, 'DeleteObject') + class TestMvWithCRTClient(BaseCRTTransferClientTest): def test_upload_move_using_crt_client(self): diff --git a/tests/functional/s3transfer/test_copy.py b/tests/functional/s3transfer/test_copy.py index 073e7eb45b20..6e3ab50673ff 100644 --- a/tests/functional/s3transfer/test_copy.py +++ b/tests/functional/s3transfer/test_copy.py @@ -12,6 +12,7 @@ # language governing permissions and limitations under the License. from botocore.exceptions import ClientError from botocore.stub import Stubber +from s3transfer.copies import CopySubmissionTask from s3transfer.manager import TransferConfig, TransferManager from s3transfer.utils import MIN_UPLOAD_CHUNKSIZE @@ -275,7 +276,12 @@ def test_copy_maps_extra_args_to_head_object(self): def test_allowed_copy_params_are_valid(self): op_model = self.client.meta.service_model.operation_model('CopyObject') - for allowed_upload_arg in self._manager.ALLOWED_COPY_ARGS: + allowed_copy_arg = [ + arg + for arg in self._manager.ALLOWED_COPY_ARGS + if arg not in CopySubmissionTask.COPY_OBJECT_ARGS_BLOCKLIST + ] + for allowed_upload_arg in allowed_copy_arg: self.assertIn(allowed_upload_arg, op_model.input_shape.members) def test_copy_with_tagging(self): @@ -700,3 +706,216 @@ def test_mp_copy_with_tagging_directive(self): ) future.result() self.stubber.assert_no_pending_responses() + + def test_copy_with_no_overwrite_flag_when_small_object_exists_at_target( + self, + ): + # Set up IfNoneMatch in extra_args + self.extra_args['IfNoneMatch'] = '*' + # Setting up the size of object + small_content_size = 5 + self.content = b'0' * small_content_size + # Add head object response with small content size + head_response = self.create_stubbed_responses()[0] + head_response['service_response'] = { + 'ContentLength': small_content_size + } + self.stubber.add_response(**head_response) + # Should use multipart upload + # Add create_multipart_upload response + self.stubber.add_response( + 'create_multipart_upload', + service_response={'UploadId': self.multipart_id}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + }, + ) + # Add upload_part_copy response + self.stubber.add_response( + 'upload_part_copy', + {'CopyPartResult': {'ETag': 'etag-1'}}, + { + 'Bucket': self.bucket, + 'Key': self.key, + 'CopySource': self.copy_source, + 'UploadId': self.multipart_id, + 'PartNumber': 1, + 'CopySourceRange': f'bytes=0-{small_content_size-1}', + }, + ) + # Mock a PreconditionFailed error for complete_multipart_upload + self.stubber.add_client_error( + method='complete_multipart_upload', + service_error_code='PreconditionFailed', + service_message='The condition specified in the conditional header(s) was not met', + http_status_code=412, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + 'MultipartUpload': { + 'Parts': [{'ETag': 'etag-1', 'PartNumber': 1}] + }, + 'IfNoneMatch': '*', + }, + ) + # Add abort_multipart_upload response + self.stubber.add_response( + 'abort_multipart_upload', + service_response={}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + }, + ) + call_kwargs = self.create_call_kwargs() + call_kwargs['extra_args'] = self.extra_args + future = self.manager.copy(**call_kwargs) + with self.assertRaises(ClientError) as context: + future.result() + self.assertEqual( + context.exception.response['Error']['Code'], 'PreconditionFailed' + ) + self.stubber.assert_no_pending_responses() + + def test_copy_with_no_overwrite_flag_when_small_object_not_exists_at_target( + self, + ): + # Set up IfNoneMatch in extra_args + self.extra_args['IfNoneMatch'] = '*' + # Setting up the size of object + small_content_size = 5 + self.content = b'0' * small_content_size + # Add head object response with small content size + head_response = self.create_stubbed_responses()[0] + head_response['service_response'] = { + 'ContentLength': small_content_size + } + self.stubber.add_response(**head_response) + # Should use multipart copy + # Add create_multipart_upload response + self.stubber.add_response( + 'create_multipart_upload', + service_response={'UploadId': self.multipart_id}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + }, + ) + # Add upload_part_copy response + self.stubber.add_response( + 'upload_part_copy', + {'CopyPartResult': {'ETag': 'etag-1'}}, + { + 'Bucket': self.bucket, + 'Key': self.key, + 'CopySource': self.copy_source, + 'UploadId': self.multipart_id, + 'PartNumber': 1, + 'CopySourceRange': f'bytes=0-{small_content_size-1}', + }, + ) + self.stubber.add_response( + 'complete_multipart_upload', + service_response={}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + 'MultipartUpload': { + 'Parts': [{'ETag': 'etag-1', 'PartNumber': 1}] + }, + 'IfNoneMatch': '*', + }, + ) + call_kwargs = self.create_call_kwargs() + call_kwargs['extra_args'] = self.extra_args + future = self.manager.copy(**call_kwargs) + future.result() + self.stubber.assert_no_pending_responses() + + def test_copy_with_no_overwrite_flag_when_large_object_exists_at_target( + self, + ): + # Set up IfNoneMatch in extra_args + self.extra_args['IfNoneMatch'] = '*' + # Add head object response + self.add_get_head_response_with_default_expected_params() + # Should use multipart upload + self.add_create_multipart_response_with_default_expected_params() + self.add_upload_part_copy_responses_with_default_expected_params() + # Mock a PreconditionFailed error for complete_multipart_upload + self.stubber.add_client_error( + method='complete_multipart_upload', + service_error_code='PreconditionFailed', + service_message='The condition specified in the conditional header(s) was not met', + http_status_code=412, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + 'MultipartUpload': { + 'Parts': [ + {'ETag': 'etag-1', 'PartNumber': 1}, + {'ETag': 'etag-2', 'PartNumber': 2}, + {'ETag': 'etag-3', 'PartNumber': 3}, + ] + }, + 'IfNoneMatch': '*', + }, + ) + # Add abort_multipart_upload response + self.stubber.add_response( + 'abort_multipart_upload', + service_response={}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + }, + ) + call_kwargs = self.create_call_kwargs() + call_kwargs['extra_args'] = self.extra_args + future = self.manager.copy(**call_kwargs) + with self.assertRaises(ClientError) as context: + future.result() + self.assertEqual( + context.exception.response['Error']['Code'], 'PreconditionFailed' + ) + self.stubber.assert_no_pending_responses() + + def test_copy_with_no_overwrite_flag_when_large_object_not_exists_at_target( + self, + ): + # Set up IfNoneMatch in extra_args + self.extra_args['IfNoneMatch'] = '*' + # Add head object response + self.add_get_head_response_with_default_expected_params() + # Should use multipart upload + self.add_create_multipart_response_with_default_expected_params() + self.add_upload_part_copy_responses_with_default_expected_params() + # Add successful complete_multipart_upload response + self.stubber.add_response( + 'complete_multipart_upload', + service_response={}, + expected_params={ + 'Bucket': self.bucket, + 'Key': self.key, + 'UploadId': self.multipart_id, + 'MultipartUpload': { + 'Parts': [ + {'ETag': 'etag-1', 'PartNumber': 1}, + {'ETag': 'etag-2', 'PartNumber': 2}, + {'ETag': 'etag-3', 'PartNumber': 3}, + ] + }, + 'IfNoneMatch': '*', + }, + ) + call_kwargs = self.create_call_kwargs() + call_kwargs['extra_args'] = self.extra_args + future = self.manager.copy(**call_kwargs) + future.result() + self.stubber.assert_no_pending_responses() diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py index 2ad66f548f44..0c79dc0ea913 100644 --- a/tests/unit/customizations/s3/test_utils.py +++ b/tests/unit/customizations/s3/test_utils.py @@ -872,6 +872,18 @@ def test_map_put_object_params_without_no_overwrite(self): RequestParamsMapper.map_put_object_params(request_params, cli_params) self.assertNotIn('IfNoneMatch', request_params) + def test_map_copy_object_params_with_no_overwrite(self): + request_params = {} + cli_params = {'no_overwrite': True} + RequestParamsMapper.map_copy_object_params(request_params, cli_params) + self.assertEqual(request_params['IfNoneMatch'], '*') + + def test_map_copy_object_params_without_no_overwrite(self): + request_params = {} + cli_params = {} + RequestParamsMapper.map_copy_object_params(request_params, cli_params) + self.assertNotIn('IfNoneMatch', request_params) + class TestS3PathResolver: _BASE_ACCESSPOINT_ARN = ( From 64fc09135c23a0d37c5e59ffdd9e3de4bf89d4cf Mon Sep 17 00:00:00 2001 From: Rakshil Modi Date: Sat, 12 Jul 2025 14:22:58 -0700 Subject: [PATCH 4/8] Updated s3 handler Adding Warning handler for no_overwrite --- awscli/customizations/s3/s3handler.py | 33 ++++++++++++++++++- .../unit/customizations/s3/test_s3handler.py | 33 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/awscli/customizations/s3/s3handler.py b/awscli/customizations/s3/s3handler.py index 741e735dc6ff..5abe55519acf 100644 --- a/awscli/customizations/s3/s3handler.py +++ b/awscli/customizations/s3/s3handler.py @@ -438,7 +438,38 @@ def _get_fileout(self, fileinfo): return fileinfo.dest def _get_warning_handlers(self): - return [self._warn_glacier, self._warn_parent_reference] + return [ + self._warn_glacier, + self._warn_parent_reference, + self._warn_if_file_exists_with_no_overwrite, + ] + + def _warn_if_file_exists_with_no_overwrite(self, fileinfo): + """ + Warning handler to skip downloads when no-overwrite is set and local file exists. + + This method prevents overwriting existing local files during S3 download operations + when the --no-overwrite flag is specified. It checks if the destination file already + exists on the local filesystem and skips the download if found. + + :type fileinfo: awscli.customizations.s3.fileinfo.FileInfo + :param fileinfo: The FileInfo object containing source and destination details + + :rtype: bool + :returns: True if the file should be skipped (exists and no-overwrite is set), + False if the download should proceed + """ + if not self._cli_params.get('no_overwrite'): + return False + fileout = self._get_fileout(fileinfo) + if os.path.exists(fileout): + LOGGER.debug( + "Warning: Skipping file %s as it already exists on %s", + fileinfo.src, + fileinfo.dest, + ) + return True + return False def _format_src_dest(self, fileinfo): src = self._format_s3_path(fileinfo.src) diff --git a/tests/unit/customizations/s3/test_s3handler.py b/tests/unit/customizations/s3/test_s3handler.py index 765f66b2c8e9..eee3b7747aa0 100644 --- a/tests/unit/customizations/s3/test_s3handler.py +++ b/tests/unit/customizations/s3/test_s3handler.py @@ -679,6 +679,39 @@ def test_submit_move_adds_delete_source_subscriber(self): for i, actual_subscriber in enumerate(actual_subscribers): self.assertIsInstance(actual_subscriber, ref_subscribers[i]) + def test_submit_with_no_overwrite_flag_when_file_exists_at_destination( + self, + ): + # Setting up the CLI params with no_overwrite flag as True + self.cli_params['no_overwrite'] = True + fileinfo = self.create_file_info(self.key) + # Mocking os.path.exists to simulate that file already exists + with mock.patch('os.path.exists', return_value=True): + # Submitting download request + future = self.transfer_request_submitter.submit(fileinfo) + # Asserting that the future is None, as the file already exists + self.assertIsNone(future) + # Asserting that no download happened + self.assert_no_downloads_happened() + + def test_submit_with_no_overwrite_flag_when_file_does_not_exist_at_destination( + self, + ): + # Setting up the CLI params with no_overwrite flag as True + self.cli_params['no_overwrite'] = True + fileinfo = self.create_file_info(self.key) + # Mocking os.path.exists to return False, to simulate that file does not exist + with mock.patch('os.path.exists', return_value=False): + # Submitting download request + future = self.transfer_request_submitter.submit(fileinfo) + # Asserting that the future is the same object returned by transfer_manager.download + # This confirms that download was actually initiated + self.assertIs(self.transfer_manager.download.return_value, future) + # Asserting that download happened + self.assertEqual( + len(self.transfer_manager.download.call_args_list), 1 + ) + class TestCopyRequestSubmitter(BaseTransferRequestSubmitterTest): def setUp(self): From b8d8987c92b7207c66ad6f83295ddd19e7dc5270 Mon Sep 17 00:00:00 2001 From: Rakshil Modi Date: Sat, 12 Jul 2025 14:23:54 -0700 Subject: [PATCH 5/8] Added no overwrite for download operation handled error during download streaming streaming --- awscli/customizations/s3/subcommands.py | 20 ++++++ tests/functional/s3/test_cp_command.py | 68 ++++++++++++++++++ tests/functional/s3/test_mv_command.py | 72 +++++++++++++++++++ .../customizations/s3/test_subcommands.py | 11 +++ 4 files changed, 171 insertions(+) diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index e4123856708c..1ecfd0ac21b2 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -1603,6 +1603,7 @@ def _validate_streaming_paths(self): self.parameters['is_stream'] = True self.parameters['dir_op'] = False self.parameters['only_show_errors'] = True + self._validate_streaming_no_overwrite_for_download_parameter() def _validate_path_args(self): # If we're using a mv command, you can't copy the object onto itself. @@ -1824,3 +1825,22 @@ def _validate_sse_c_copy_source_for_paths(self): '--sse-c-copy-source is only supported for ' 'copy operations.' ) + + def _validate_streaming_no_overwrite_for_download_parameter(self): + """ + Validates that no-overwrite parameter is not used with streaming downloads. + + When downloading from S3 to stdout (streaming download), the no-overwrite + parameter doesn't make sense since stdout is not a file that can be checked + for existence. This method checks for this invalid combination and raises + an appropriate error. + + Raises: + ParamValidationError: If no-overwrite is specified with a streaming download. + """ + params = self.parameters + if params['paths_type'] == 's3local' and params.get('no_overwrite'): + raise ParamValidationError( + "--no-overwrite parameter is not supported for " + "streaming downloads" + ) diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py index e85d62995862..03268d5d909c 100644 --- a/tests/functional/s3/test_cp_command.py +++ b/tests/functional/s3/test_cp_command.py @@ -550,6 +550,74 @@ def test_no_overwrite_flag_on_copy_when_large_object_does_not_exist_on_target( # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request self.assertEqual(self.operations_called[5][1]['IfNoneMatch'], '*') + def test_no_overwrite_flag_on_download_when_small_object_already_exists_at_target( + self, + ): + full_path = self.files.create_file('foo.txt', 'existing content') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + with open(full_path) as f: + self.assertEqual(f.read(), 'existing content') + + def test_no_overwrite_flag_on_download_when_small_object_does_not_exist_at_target( + self, + ): + full_path = self.files.full_path('foo.txt') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(), + self.get_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 2) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObject') + with open(full_path) as f: + self.assertEqual(f.read(), 'foo') + + def test_no_overwrite_flag_on_download_when_large_object_exists_at_target( + self, + ): + full_path = self.files.create_file('foo.txt', 'existing content') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(ContentLength=10 * (1024**2)), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + with open(full_path) as f: + self.assertEqual(f.read(), 'existing content') + + def test_no_overwrite_flag_on_download_when_large_object_does_not_exist_at_target( + self, + ): + full_path = self.files.full_path('foo.txt') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(ContentLength=10 * (1024**2)), + self.get_object_response(), + self.get_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 3) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObject') + self.assertEqual(self.operations_called[2][0].name, 'GetObject') + def test_dryrun_download(self): self.parsed_responses = [self.head_object_response()] target = self.files.full_path('file.txt') diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py index 1bff7459f92b..661690cc12b2 100644 --- a/tests/functional/s3/test_mv_command.py +++ b/tests/functional/s3/test_mv_command.py @@ -575,6 +575,78 @@ def test_mv_no_overwrite_flag_when_large_object_does_not_exist_on_target( ) self.assertEqual(self.operations_called[6][0].name, 'DeleteObject') + def test_no_overwrite_flag_on_mv_download_when_small_object_exists_at_target( + self, + ): + full_path = self.files.create_file('foo.txt', 'existing content') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + with open(full_path) as f: + self.assertEqual(f.read(), 'existing content') + + def test_no_overwrite_flag_on_mv_download_when_small_object_does_not_exist_at_target( + self, + ): + full_path = self.files.full_path('foo.txt') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(), + self.get_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 3) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObject') + self.assertEqual(self.operations_called[2][0].name, 'DeleteObject') + with open(full_path) as f: + self.assertEqual(f.read(), 'foo') + + def test_no_overwrite_flag_on_mv_download_when_large_object_exists_at_target( + self, + ): + full_path = self.files.create_file('foo.txt', 'existing content') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(ContentLength=10 * (1024**2)), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + with open(full_path) as f: + self.assertEqual(f.read(), 'existing content') + + def test_no_overwrite_flag_on_mv_download_when_large_object_does_not_exist_at_target( + self, + ): + full_path = self.files.full_path('foo.txt') + cmdline = ( + f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' + ) + self.parsed_responses = [ + self.head_object_response(ContentLength=10 * (1024**2)), + self.get_object_response(), + self.get_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 4) + self.assertEqual(self.operations_called[0][0].name, 'HeadObject') + self.assertEqual(self.operations_called[1][0].name, 'GetObject') + self.assertEqual(self.operations_called[2][0].name, 'GetObject') + self.assertEqual(self.operations_called[3][0].name, 'DeleteObject') + class TestMvWithCRTClient(BaseCRTTransferClientTest): def test_upload_move_using_crt_client(self): diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index 789e1affe79b..8e5c06e3d93c 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -524,10 +524,21 @@ def test_validate_streaming_paths_download(self): paths = ['s3://bucket/key', '-'] cmd_params = CommandParameters('cp', {}, '') cmd_params.add_paths(paths) + print(" Paramertes %s", cmd_params.parameters) self.assertTrue(cmd_params.parameters['is_stream']) self.assertTrue(cmd_params.parameters['only_show_errors']) self.assertFalse(cmd_params.parameters['dir_op']) + def test_validate_streaming_paths_with_no_overwrite(self): + paths = ['s3://bucket/key', '-'] + cmd_params = CommandParameters('cp', {'no_overwrite': True}, '') + with self.assertRaises(ParamValidationError) as cm: + cmd_params.add_paths(paths) + self.assertIn( + '--no-overwrite parameter is not supported for streaming downloads', + cm.msg, + ) + def test_validate_no_streaming_paths(self): paths = [self.file_creator.rootdir, 's3://bucket'] cmd_params = CommandParameters('cp', {}, '') From 17098c98023e67d884cc3d6df64ebc40e8075709 Mon Sep 17 00:00:00 2001 From: Rakshil Modi Date: Sun, 13 Jul 2025 11:20:58 -0700 Subject: [PATCH 6/8] Adding new Sync strategy Updated test case Updated doc comment Added test cases Updating Sync Strategy Updated comments Updated comments Override sync strategy Overridding sync strategy --- awscli/customizations/s3/subcommands.py | 6 --- .../s3/syncstrategy/nooverwrite.py | 34 +++++++++++++ .../s3/syncstrategy/register.py | 4 ++ .../s3/syncstrategy/test_nooverwrite.py | 50 +++++++++++++++++++ 4 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 awscli/customizations/s3/syncstrategy/nooverwrite.py create mode 100644 tests/unit/customizations/s3/syncstrategy/test_nooverwrite.py diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 1ecfd0ac21b2..39c3a350e5b6 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -1314,7 +1314,6 @@ def choose_sync_strategies(self): sync_type = override_sync_strategy.sync_type sync_type += '_sync_strategy' sync_strategies[sync_type] = override_sync_strategy - return sync_strategies def run(self): @@ -1830,11 +1829,6 @@ def _validate_streaming_no_overwrite_for_download_parameter(self): """ Validates that no-overwrite parameter is not used with streaming downloads. - When downloading from S3 to stdout (streaming download), the no-overwrite - parameter doesn't make sense since stdout is not a file that can be checked - for existence. This method checks for this invalid combination and raises - an appropriate error. - Raises: ParamValidationError: If no-overwrite is specified with a streaming download. """ diff --git a/awscli/customizations/s3/syncstrategy/nooverwrite.py b/awscli/customizations/s3/syncstrategy/nooverwrite.py new file mode 100644 index 000000000000..b46923021dfa --- /dev/null +++ b/awscli/customizations/s3/syncstrategy/nooverwrite.py @@ -0,0 +1,34 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import logging + +from awscli.customizations.s3.subcommands import NO_OVERWRITE +from awscli.customizations.s3.syncstrategy.base import BaseSync + +LOG = logging.getLogger(__name__) + + +class NoOverwriteSync(BaseSync): + """Sync strategy that prevents overwriting of existing files at the destination. + This strategy is used only for files that exist at both source and destination + (file_at_src_and_dest_sync_strategy). It always returns False to prevent any + overwriting of existing files, regardless of size or modification time differences. + """ + + ARGUMENT = NO_OVERWRITE + + def determine_should_sync(self, src_file, dest_file): + LOG.debug( + f"warning: skipping {src_file.src} -> {src_file.dest}, file exists at destination" + ) + return False diff --git a/awscli/customizations/s3/syncstrategy/register.py b/awscli/customizations/s3/syncstrategy/register.py index 13f2c35c0620..544393631fb2 100644 --- a/awscli/customizations/s3/syncstrategy/register.py +++ b/awscli/customizations/s3/syncstrategy/register.py @@ -14,6 +14,7 @@ from awscli.customizations.s3.syncstrategy.exacttimestamps import ( ExactTimestampsSync, ) +from awscli.customizations.s3.syncstrategy.nooverwrite import NoOverwriteSync from awscli.customizations.s3.syncstrategy.sizeonly import SizeOnlySync @@ -48,4 +49,7 @@ def register_sync_strategies(command_table, session, **kwargs): # Register the delete sync strategy. register_sync_strategy(session, DeleteSync, 'file_not_at_src') + # Register the noOverwrite sync strategy + register_sync_strategy(session, NoOverwriteSync, 'file_at_src_and_dest') + # Register additional sync strategies here... diff --git a/tests/unit/customizations/s3/syncstrategy/test_nooverwrite.py b/tests/unit/customizations/s3/syncstrategy/test_nooverwrite.py new file mode 100644 index 000000000000..674184311ff0 --- /dev/null +++ b/tests/unit/customizations/s3/syncstrategy/test_nooverwrite.py @@ -0,0 +1,50 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import datetime + +import pytest + +from awscli.customizations.s3.filegenerator import FileStat +from awscli.customizations.s3.syncstrategy.nooverwrite import NoOverwriteSync + + +@pytest.fixture +def sync_strategy(): + return NoOverwriteSync('file_at_src_and_dest') + + +def test_file_exists_at_destination_with_same_key(sync_strategy): + time_src = datetime.datetime.now() + + src_file = FileStat( + src='', + dest='', + compare_key='test.py', + size=10, + last_update=time_src, + src_type='local', + dest_type='s3', + operation_name='upload', + ) + dest_file = FileStat( + src='', + dest='', + compare_key='test.py', + size=100, + last_update=time_src, + src_type='local', + dest_type='s3', + operation_name='', + ) + should_sync = sync_strategy.determine_should_sync(src_file, dest_file) + assert not should_sync From eb56a5820016ad74136ff59216be6139779355d0 Mon Sep 17 00:00:00 2001 From: Rakshil Modi Date: Sun, 13 Jul 2025 11:22:39 -0700 Subject: [PATCH 7/8] Add no overwrite for sync operation Added no overwrite for download operation handled error during download streaming streaming Improved upload test cases Updated test cases Added test cases Resolved requested changes NIT change --- awscli/customizations/s3/s3handler.py | 12 +- awscli/customizations/s3/subcommands.py | 26 +++- tests/functional/s3/test_cp_command.py | 63 ++-------- tests/functional/s3/test_mv_command.py | 56 +-------- tests/functional/s3/test_sync_command.py | 85 +++++++++++++ .../unit/customizations/s3/test_s3handler.py | 115 ++++++++++++++---- .../customizations/s3/test_subcommands.py | 1 - 7 files changed, 218 insertions(+), 140 deletions(-) diff --git a/awscli/customizations/s3/s3handler.py b/awscli/customizations/s3/s3handler.py index 5abe55519acf..2b882a3a4de1 100644 --- a/awscli/customizations/s3/s3handler.py +++ b/awscli/customizations/s3/s3handler.py @@ -452,8 +452,8 @@ def _warn_if_file_exists_with_no_overwrite(self, fileinfo): when the --no-overwrite flag is specified. It checks if the destination file already exists on the local filesystem and skips the download if found. - :type fileinfo: awscli.customizations.s3.fileinfo.FileInfo - :param fileinfo: The FileInfo object containing source and destination details + :type fileinfo: FileInfo + :param fileinfo: The FileInfo object containing transfer details :rtype: bool :returns: True if the file should be skipped (exists and no-overwrite is set), @@ -464,9 +464,7 @@ def _warn_if_file_exists_with_no_overwrite(self, fileinfo): fileout = self._get_fileout(fileinfo) if os.path.exists(fileout): LOGGER.debug( - "Warning: Skipping file %s as it already exists on %s", - fileinfo.src, - fileinfo.dest, + f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination" ) return True return False @@ -543,9 +541,7 @@ def _warn_if_zero_byte_file_exists_with_no_overwrite(self, fileinfo): try: client.head_object(Bucket=bucket, Key=key) LOGGER.debug( - "Warning: Skipping file %s as it already exists on %s", - fileinfo.src, - fileinfo.dest, + f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination" ) return True except ClientError as e: diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 39c3a350e5b6..0f69925c57fc 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -1143,7 +1143,7 @@ class SyncCommand(S3TransferCommand): } ] + TRANSFER_ARGS - + [METADATA, COPY_PROPS, METADATA_DIRECTIVE] + + [METADATA, COPY_PROPS, METADATA_DIRECTIVE, NO_OVERWRITE] ) @@ -1400,7 +1400,8 @@ def run(self): self._client, self._source_client, self.parameters ) - s3_transfer_handler = S3TransferHandlerFactory(self.parameters)( + params = self._get_s3_handler_params() + s3_transfer_handler = S3TransferHandlerFactory(params)( self._transfer_manager, result_queue ) @@ -1509,6 +1510,16 @@ def _map_sse_c_params(self, request_parameters, paths_type): }, ) + def _get_s3_handler_params(self): + """ + Removing no-overwrite params from sync since file to + be synced are already separated out using sync strategy + """ + params = self.parameters.copy() + if self.cmd == 'sync': + params.pop('no_overwrite', None) + return params + # TODO: This class is fairly quirky in the sense that it is both a builder # and a data object. In the future we should make the following refactorings @@ -1572,6 +1583,7 @@ def add_paths(self, paths): elif len(paths) == 1: self.parameters['dest'] = paths[0] self._validate_streaming_paths() + self._validate_no_overwrite_for_download_streaming() self._validate_path_args() self._validate_sse_c_args() self._validate_not_s3_express_bucket_for_sync() @@ -1602,7 +1614,6 @@ def _validate_streaming_paths(self): self.parameters['is_stream'] = True self.parameters['dir_op'] = False self.parameters['only_show_errors'] = True - self._validate_streaming_no_overwrite_for_download_parameter() def _validate_path_args(self): # If we're using a mv command, you can't copy the object onto itself. @@ -1825,15 +1836,18 @@ def _validate_sse_c_copy_source_for_paths(self): 'copy operations.' ) - def _validate_streaming_no_overwrite_for_download_parameter(self): + def _validate_no_overwrite_for_download_streaming(self): """ Validates that no-overwrite parameter is not used with streaming downloads. Raises: ParamValidationError: If no-overwrite is specified with a streaming download. """ - params = self.parameters - if params['paths_type'] == 's3local' and params.get('no_overwrite'): + if ( + self.parameters['is_stream'] + and self.parameters.get('no_overwrite') + and self.parameters['dest'] == '-' + ): raise ParamValidationError( "--no-overwrite parameter is not supported for " "streaming downloads" diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py index 03268d5d909c..48785cddeff1 100644 --- a/tests/functional/s3/test_cp_command.py +++ b/tests/functional/s3/test_cp_command.py @@ -315,13 +315,7 @@ def test_no_overwrite_flag_when_object_exists_on_target(self): # Set up the response to simulate a PreconditionFailed error self.http_response.status_code = 412 self.parsed_responses = [ - { - 'Error': { - 'Code': 'PreconditionFailed', - 'Message': 'At least one of the pre-conditions you specified did not hold', - 'Condition': 'If-None-Match', - } - } + self.precondition_failed_error_response(), ] self.run_cmd(cmdline, expected_rc=0) # Verify PutObject was attempted with IfNoneMatch @@ -367,13 +361,7 @@ def test_no_overwrite_flag_multipart_upload_when_object_exists_on_target( {'UploadId': 'foo'}, # CreateMultipartUpload response {'ETag': '"foo-1"'}, # UploadPart response {'ETag': '"foo-2"'}, # UploadPart response - { - 'Error': { - 'Code': 'PreconditionFailed', - 'Message': 'At least one of the pre-conditions you specified did not hold', - 'Condition': 'If-None-Match', - } - }, # PreconditionFailed error for CompleteMultipart Upload + self.precondition_failed_error_response(), # PreconditionFailed error for CompleteMultipart Upload {}, # AbortMultipartUpload response ] # Checking for success as file is skipped @@ -550,7 +538,7 @@ def test_no_overwrite_flag_on_copy_when_large_object_does_not_exist_on_target( # Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request self.assertEqual(self.operations_called[5][1]['IfNoneMatch'], '*') - def test_no_overwrite_flag_on_download_when_small_object_already_exists_at_target( + def test_no_overwrite_flag_on_download_when_single_object_already_exists_at_target( self, ): full_path = self.files.create_file('foo.txt', 'existing content') @@ -566,7 +554,7 @@ def test_no_overwrite_flag_on_download_when_small_object_already_exists_at_targe with open(full_path) as f: self.assertEqual(f.read(), 'existing content') - def test_no_overwrite_flag_on_download_when_small_object_does_not_exist_at_target( + def test_no_overwrite_flag_on_download_when_single_object_does_not_exist_at_target( self, ): full_path = self.files.full_path('foo.txt') @@ -584,40 +572,6 @@ def test_no_overwrite_flag_on_download_when_small_object_does_not_exist_at_targe with open(full_path) as f: self.assertEqual(f.read(), 'foo') - def test_no_overwrite_flag_on_download_when_large_object_exists_at_target( - self, - ): - full_path = self.files.create_file('foo.txt', 'existing content') - cmdline = ( - f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' - ) - self.parsed_responses = [ - self.head_object_response(ContentLength=10 * (1024**2)), - ] - self.run_cmd(cmdline, expected_rc=0) - self.assertEqual(len(self.operations_called), 1) - self.assertEqual(self.operations_called[0][0].name, 'HeadObject') - with open(full_path) as f: - self.assertEqual(f.read(), 'existing content') - - def test_no_overwrite_flag_on_download_when_large_object_does_not_exist_at_target( - self, - ): - full_path = self.files.full_path('foo.txt') - cmdline = ( - f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' - ) - self.parsed_responses = [ - self.head_object_response(ContentLength=10 * (1024**2)), - self.get_object_response(), - self.get_object_response(), - ] - self.run_cmd(cmdline, expected_rc=0) - self.assertEqual(len(self.operations_called), 3) - self.assertEqual(self.operations_called[0][0].name, 'HeadObject') - self.assertEqual(self.operations_called[1][0].name, 'GetObject') - self.assertEqual(self.operations_called[2][0].name, 'GetObject') - def test_dryrun_download(self): self.parsed_responses = [self.head_object_response()] target = self.files.full_path('file.txt') @@ -1496,6 +1450,15 @@ def test_streaming_download_error(self): ) self.assertIn(error_message, stderr) + def test_no_overwrite_cannot_be_used_with_streaming_download(self): + command = "s3 cp s3://bucket/streaming.txt - --no-overwrite" + _, stderr, _ = self.run_cmd(command, expected_rc=252) + error_message = ( + "--no-overwrite parameter is not supported for " + "streaming downloads" + ) + self.assertIn(error_message, stderr) + class TestCpCommandWithRequesterPayer(BaseCPCommandTest): def setUp(self): diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py index 661690cc12b2..44d046250b50 100644 --- a/tests/functional/s3/test_mv_command.py +++ b/tests/functional/s3/test_mv_command.py @@ -336,13 +336,7 @@ def test_mv_no_overwrite_flag_when_object_exists_on_target(self): # Set up the response to simulate a PreconditionFailed error self.http_response.status_code = 412 self.parsed_responses = [ - { - 'Error': { - 'Code': 'PreconditionFailed', - 'Message': 'At least one of the pre-conditions you specified did not hold', - 'Condition': 'If-None-Match', - } - } + self.precondition_failed_error_response(), ] self.run_cmd(cmdline, expected_rc=0) # Verify PutObject was attempted with IfNoneMatch @@ -392,13 +386,7 @@ def test_mv_no_overwrite_flag_multipart_upload_when_object_exists_on_target( {'UploadId': 'foo'}, # CreateMultipartUpload response {'ETag': '"foo-1"'}, # UploadPart response {'ETag': '"foo-2"'}, # UploadPart response - { - 'Error': { - 'Code': 'PreconditionFailed', - 'Message': 'At least one of the pre-conditions you specified did not hold', - 'Condition': 'If-None-Match', - } - }, # CompleteMultipartUpload response + self.precondition_failed_error_response(), # CompleteMultipartUpload response {}, # Abort Multipart ] self.run_cmd(cmdline, expected_rc=0) @@ -575,7 +563,7 @@ def test_mv_no_overwrite_flag_when_large_object_does_not_exist_on_target( ) self.assertEqual(self.operations_called[6][0].name, 'DeleteObject') - def test_no_overwrite_flag_on_mv_download_when_small_object_exists_at_target( + def test_no_overwrite_flag_on_mv_download_when_single_object_exists_at_target( self, ): full_path = self.files.create_file('foo.txt', 'existing content') @@ -591,7 +579,7 @@ def test_no_overwrite_flag_on_mv_download_when_small_object_exists_at_target( with open(full_path) as f: self.assertEqual(f.read(), 'existing content') - def test_no_overwrite_flag_on_mv_download_when_small_object_does_not_exist_at_target( + def test_no_overwrite_flag_on_mv_download_when_single_object_does_not_exist_at_target( self, ): full_path = self.files.full_path('foo.txt') @@ -611,42 +599,6 @@ def test_no_overwrite_flag_on_mv_download_when_small_object_does_not_exist_at_ta with open(full_path) as f: self.assertEqual(f.read(), 'foo') - def test_no_overwrite_flag_on_mv_download_when_large_object_exists_at_target( - self, - ): - full_path = self.files.create_file('foo.txt', 'existing content') - cmdline = ( - f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' - ) - self.parsed_responses = [ - self.head_object_response(ContentLength=10 * (1024**2)), - ] - self.run_cmd(cmdline, expected_rc=0) - self.assertEqual(len(self.operations_called), 1) - self.assertEqual(self.operations_called[0][0].name, 'HeadObject') - with open(full_path) as f: - self.assertEqual(f.read(), 'existing content') - - def test_no_overwrite_flag_on_mv_download_when_large_object_does_not_exist_at_target( - self, - ): - full_path = self.files.full_path('foo.txt') - cmdline = ( - f'{self.prefix} s3://bucket/foo.txt {full_path} --no-overwrite' - ) - self.parsed_responses = [ - self.head_object_response(ContentLength=10 * (1024**2)), - self.get_object_response(), - self.get_object_response(), - self.delete_object_response(), - ] - self.run_cmd(cmdline, expected_rc=0) - self.assertEqual(len(self.operations_called), 4) - self.assertEqual(self.operations_called[0][0].name, 'HeadObject') - self.assertEqual(self.operations_called[1][0].name, 'GetObject') - self.assertEqual(self.operations_called[2][0].name, 'GetObject') - self.assertEqual(self.operations_called[3][0].name, 'DeleteObject') - class TestMvWithCRTClient(BaseCRTTransferClientTest): def test_upload_move_using_crt_client(self): diff --git a/tests/functional/s3/test_sync_command.py b/tests/functional/s3/test_sync_command.py index a03d810647ce..bed61b96e494 100644 --- a/tests/functional/s3/test_sync_command.py +++ b/tests/functional/s3/test_sync_command.py @@ -540,6 +540,91 @@ def test_download_with_checksum_mode_crc64nvme(self): ('ChecksumMode', 'ENABLED'), self.operations_called[1][1].items() ) + def test_sync_upload_with_no_overwrite_when_file_does_not_exist_at_destination( + self, + ): + self.files.create_file("new_file.txt", "mycontent") + self.parsed_responses = [ + self.list_objects_response(['file.txt']), + {'ETag': '"c8afdb36c52cf4727836669019e69222"'}, + ] + cmdline = ( + f'{self.prefix} {self.files.rootdir} s3://bucket --no-overwrite' + ) + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 2) + self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + self.assertEqual(self.operations_called[1][0].name, 'PutObject') + self.assertEqual(self.operations_called[1][1]['Key'], 'new_file.txt') + + def test_sync_upload_with_no_overwrite_when_file_exists_at_destination( + self, + ): + self.files.create_file("new_file.txt", "mycontent") + self.parsed_responses = [ + self.list_objects_response(['new_file.txt']), + ] + cmdline = ( + f'{self.prefix} {self.files.rootdir} s3://bucket --no-overwrite' + ) + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + + def test_sync_download_with_no_overwrite_file_not_exists_at_destination( + self, + ): + self.parsed_responses = [ + self.list_objects_response(['new_file.txt']), + self.get_object_response(), + ] + cmdline = ( + f'{self.prefix} s3://bucket/ {self.files.rootdir} --no-overwrite' + ) + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 2) + self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + self.assertEqual(self.operations_called[1][0].name, 'GetObject') + local_file_path = os.path.join(self.files.rootdir, 'new_file.txt') + self.assertTrue(os.path.exists(local_file_path)) + + def test_sync_download_with_no_overwrite_file_exists_at_destination(self): + self.files.create_file('file.txt', 'My content') + self.parsed_responses = [ + self.list_objects_response(['file.txt']), + ] + cmdline = ( + f'{self.prefix} s3://bucket/ {self.files.rootdir} --no-overwrite' + ) + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 1) + self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + + def test_sync_copy_with_no_overwrite_file_not_exists_at_destination(self): + self.parsed_responses = [ + self.list_objects_response(['new_file.txt']), + self.list_objects_response(['file1.txt']), + self.copy_object_response(), + ] + cmdline = f'{self.prefix} s3://bucket/ s3://bucket2/ --no-overwrite' + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 3) + self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + self.assertEqual(self.operations_called[1][0].name, 'ListObjectsV2') + self.assertEqual(self.operations_called[2][0].name, 'CopyObject') + self.assertEqual(self.operations_called[2][1]['Key'], 'new_file.txt') + + def test_sync_copy_with_no_overwrite_file_exists_at_destination(self): + self.parsed_responses = [ + self.list_objects_response(['new_file.txt']), + self.list_objects_response(['new_file.txt', 'file1.txt']), + ] + cmdline = f'{self.prefix} s3://bucket/ s3://bucket2/ --no-overwrite' + self.run_cmd(cmdline, expected_rc=0) + self.assertEqual(len(self.operations_called), 2) + self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + self.assertEqual(self.operations_called[1][0].name, 'ListObjectsV2') + class TestSyncSourceRegion(BaseS3CLIRunnerTest): def test_respects_source_region(self): diff --git a/tests/unit/customizations/s3/test_s3handler.py b/tests/unit/customizations/s3/test_s3handler.py index eee3b7747aa0..6f08536d8845 100644 --- a/tests/unit/customizations/s3/test_s3handler.py +++ b/tests/unit/customizations/s3/test_s3handler.py @@ -12,6 +12,7 @@ # language governing permissions and limitations under the License. import os +from botocore.exceptions import ClientError from s3transfer.manager import TransferManager from awscli.compat import queue @@ -679,38 +680,37 @@ def test_submit_move_adds_delete_source_subscriber(self): for i, actual_subscriber in enumerate(actual_subscribers): self.assertIsInstance(actual_subscriber, ref_subscribers[i]) - def test_submit_with_no_overwrite_flag_when_file_exists_at_destination( - self, - ): - # Setting up the CLI params with no_overwrite flag as True + def test_skip_download_when_no_overwrite_and_file_exists(self): self.cli_params['no_overwrite'] = True fileinfo = self.create_file_info(self.key) - # Mocking os.path.exists to simulate that file already exists with mock.patch('os.path.exists', return_value=True): - # Submitting download request future = self.transfer_request_submitter.submit(fileinfo) - # Asserting that the future is None, as the file already exists - self.assertIsNone(future) - # Asserting that no download happened - self.assert_no_downloads_happened() - def test_submit_with_no_overwrite_flag_when_file_does_not_exist_at_destination( - self, - ): - # Setting up the CLI params with no_overwrite flag as True + # Result Queue should be empty because it was specified to ignore no-overwrite warnings. + self.assertTrue(self.result_queue.empty()) + # The transfer should be skipped, so future should be None + self.assertIsNone(future) + self.assert_no_downloads_happened() + + def test_proceed_download_when_no_overwrite_and_file_not_exists(self): self.cli_params['no_overwrite'] = True fileinfo = self.create_file_info(self.key) - # Mocking os.path.exists to return False, to simulate that file does not exist with mock.patch('os.path.exists', return_value=False): - # Submitting download request future = self.transfer_request_submitter.submit(fileinfo) - # Asserting that the future is the same object returned by transfer_manager.download - # This confirms that download was actually initiated - self.assertIs(self.transfer_manager.download.return_value, future) - # Asserting that download happened - self.assertEqual( - len(self.transfer_manager.download.call_args_list), 1 - ) + # The transfer should proceed, so future should be the transfer manager's return value + self.assertIs(self.transfer_manager.download.return_value, future) + # And download should have happened + self.assertEqual(len(self.transfer_manager.download.call_args_list), 1) + + def test_warn_if_file_exists_without_no_overwrite_flag(self): + self.cli_params['no_overwrite'] = False + fileinfo = self.create_file_info(self.key) + with mock.patch('os.path.exists', return_value=True): + future = self.transfer_request_submitter.submit(fileinfo) + # The transfer should proceed, so future should be the transfer manager's return value + self.assertIs(self.transfer_manager.download.return_value, future) + # And download should have happened + self.assertEqual(len(self.transfer_manager.download.call_args_list), 1) class TestCopyRequestSubmitter(BaseTransferRequestSubmitterTest): @@ -957,6 +957,75 @@ def test_submit_move_adds_delete_source_subscriber(self): for i, actual_subscriber in enumerate(actual_subscribers): self.assertIsInstance(actual_subscriber, ref_subscribers[i]) + def test_skip_copy_with_no_overwrite_and_zero_byte_file_exists(self): + self.cli_params['no_overwrite'] = True + fileinfo = FileInfo( + src=self.source_bucket + "/" + self.source_key, + dest=self.bucket + "/" + self.key, + operation_name='copy', + size=0, + source_client=mock.Mock(), + ) + fileinfo.source_client.head_object.return_value = {} + future = self.transfer_request_submitter.submit(fileinfo) + # The transfer should be skipped, so future should be None + self.assertIsNone(future) + # Result Queue should be empty because it was specified to ignore no-overwrite warnings. + self.assertTrue(self.result_queue.empty()) + self.assertEqual(len(self.transfer_manager.copy.call_args_list), 0) + + def test_proceed_copy_with_no_overwrite_and_zero_byte_file_does_not_exist( + self, + ): + self.cli_params['no_overwrite'] = True + fileinfo = FileInfo( + src=self.source_bucket + "/" + self.source_key, + dest=self.bucket + "/" + self.key, + operation_name='copy', + size=0, + source_client=mock.Mock(), + ) + fileinfo.source_client.head_object.side_effect = ClientError( + {'Error': {'Code': '404', 'Message': 'Not Found'}}, + 'HeadObject', + ) + future = self.transfer_request_submitter.submit(fileinfo) + # The transfer should proceed, so future should be the transfer manager's return value + self.assertIs(self.transfer_manager.copy.return_value, future) + self.assertEqual(len(self.transfer_manager.copy.call_args_list), 1) + + def test_proceed_copy_with_no_overwrite_for_non_zero_byte_file(self): + self.cli_params['no_overwrite'] = True + fileinfo = FileInfo( + src=self.source_bucket + "/" + self.source_key, + dest=self.bucket + "/" + self.key, + operation_name='copy', + size=100, + source_client=mock.Mock(), + ) + future = self.transfer_request_submitter.submit(fileinfo) + # The transfer should proceed, so future should be the transfer manager's return value + self.assertIs(self.transfer_manager.copy.return_value, future) + self.assertEqual(len(self.transfer_manager.copy.call_args_list), 1) + # Head should not be called when no_overwrite is false + fileinfo.source_client.head_object.assert_not_called() + + def test_file_exists_without_no_overwrite(self): + self.cli_params['no_overwrite'] = False + fileinfo = FileInfo( + src=self.source_bucket + "/" + self.source_key, + dest=self.bucket + "/" + self.key, + operation_name='copy', + size=100, + source_client=mock.Mock(), + ) + future = self.transfer_request_submitter.submit(fileinfo) + # The transfer should proceed, so future should be the transfer manager's return value + self.assertIs(self.transfer_manager.copy.return_value, future) + self.assertEqual(len(self.transfer_manager.copy.call_args_list), 1) + # Head should not be called when no_overwrite is false + fileinfo.source_client.head_object.assert_not_called() + class TestUploadStreamRequestSubmitter(BaseTransferRequestSubmitterTest): def setUp(self): diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index 8e5c06e3d93c..0b99e6f1ff0d 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -524,7 +524,6 @@ def test_validate_streaming_paths_download(self): paths = ['s3://bucket/key', '-'] cmd_params = CommandParameters('cp', {}, '') cmd_params.add_paths(paths) - print(" Paramertes %s", cmd_params.parameters) self.assertTrue(cmd_params.parameters['is_stream']) self.assertTrue(cmd_params.parameters['only_show_errors']) self.assertFalse(cmd_params.parameters['dir_op']) From 59f4f72c9fb5ec56b54b53ad80bf6dd3e2d11a81 Mon Sep 17 00:00:00 2001 From: Ahmed Moustafa <35640105+aemous@users.noreply.github.com> Date: Wed, 8 Oct 2025 16:58:57 -0400 Subject: [PATCH 8/8] Add support for results that do not print newlines via new SkipFileResult (fix printing bugs when using no-overwrite flag) (#9717) Fixed bug when using `--no-overwrite` parameter where the last character printed to CLI would be a carriage return, which would cause incompatibility with some terminal program. * Keep track of files skipped. * Update files remaining progress computation to take into account files skipped. * Add support for results that do not print newlines via SkipFileResult and updates to ResultPrinter. --- awscli/customizations/s3/results.py | 35 +++++++++++++---- tests/unit/customizations/s3/test_results.py | 40 ++++++++++++++++++++ 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/awscli/customizations/s3/results.py b/awscli/customizations/s3/results.py index 024fa7b79f4c..e7387e3b16d4 100644 --- a/awscli/customizations/s3/results.py +++ b/awscli/customizations/s3/results.py @@ -57,6 +57,7 @@ def _create_new_result_cls(name, extra_fields=None, base_cls=BaseResult): FailureResult = _create_new_result_cls('FailureResult', ['exception']) DryRunResult = _create_new_result_cls('DryRunResult') +SkipFileResult = _create_new_result_cls('SkipFileResult') ErrorResult = namedtuple('ErrorResult', ['exception']) @@ -132,6 +133,13 @@ def _on_failure(self, future, e): self._src, self._dest, ) + self._result_queue.put( + SkipFileResult( + transfer_type=self._transfer_type, + src=self._src, + dest=self._dest, + ) + ) else: self._result_queue.put( FailureResult( @@ -167,6 +175,7 @@ def __init__(self): self.files_transferred = 0 self.files_failed = 0 self.files_warned = 0 + self.files_skipped = 0 self.errors = 0 self.expected_bytes_transferred = 0 self.expected_files_transferred = 0 @@ -184,6 +193,7 @@ def __init__(self): SuccessResult: self._record_success_result, FailureResult: self._record_failure_result, WarningResult: self._record_warning_result, + SkipFileResult: self._record_skipped_file_result, ErrorResult: self._record_error_result, CtrlCResult: self._record_error_result, FinalTotalSubmissionsResult: self._record_final_expected_files, @@ -299,6 +309,9 @@ def _record_failure_result(self, result, **kwargs): self.files_failed += 1 self.files_transferred += 1 + def _record_skipped_file_result(self, result, **kwargs): + self.files_skipped += 1 + def _record_warning_result(self, **kwargs): self.files_warned += 1 @@ -359,6 +372,7 @@ def __init__(self, result_recorder, out_file=None, error_file=None): SuccessResult: self._print_success, FailureResult: self._print_failure, WarningResult: self._print_warning, + SkipFileResult: self._print_skip, ErrorResult: self._print_error, CtrlCResult: self._print_ctrl_c, DryRunResult: self._print_dry_run, @@ -375,6 +389,10 @@ def _print_noop(self, **kwargs): # If the result does not have a handler, then do nothing with it. pass + def _print_skip(self, **kwargs): + # Don't reset progress length since this result printer doesn't print a newline + self._redisplay_progress(reset_progress_length=False) + def _print_dry_run(self, result, **kwargs): statement = self.DRY_RUN_FORMAT.format( transfer_type=result.transfer_type, @@ -427,23 +445,26 @@ def _get_transfer_location(self, result): src=result.src, dest=result.dest ) - def _redisplay_progress(self): + def _redisplay_progress(self, reset_progress_length=True): # Reset to zero because done statements are printed with new lines # meaning there are no carriage returns to take into account when # printing the next line. - self._progress_length = 0 + if reset_progress_length: + self._progress_length = 0 self._add_progress_if_needed() def _add_progress_if_needed(self): if self._has_remaining_progress(): self._print_progress() + else: + self._clear_progress_if_no_more_expected_transfers(ending_char='\r') def _print_progress(self, **kwargs): - # Get all of the statistics in the correct form. + # Get all the statistics in the correct form. remaining_files = self._get_expected_total( str( self._result_recorder.expected_files_transferred - - self._result_recorder.files_transferred + - (self._result_recorder.files_transferred + self._result_recorder.files_skipped) ) ) @@ -506,7 +527,7 @@ def _adjust_statement_padding(self, print_statement, ending_char='\n'): def _has_remaining_progress(self): if not self._result_recorder.expected_totals_are_final(): return True - actual = self._result_recorder.files_transferred + actual = self._result_recorder.files_transferred + self._result_recorder.files_skipped expected = self._result_recorder.expected_files_transferred return actual != expected @@ -516,9 +537,9 @@ def _print_to_out_file(self, statement): def _print_to_error_file(self, statement): uni_print(statement, self._error_file) - def _clear_progress_if_no_more_expected_transfers(self, **kwargs): + def _clear_progress_if_no_more_expected_transfers(self, ending_char='\n', **kwargs): if self._progress_length and not self._has_remaining_progress(): - uni_print(self._adjust_statement_padding(''), self._out_file) + uni_print(self._adjust_statement_padding('', ending_char=ending_char), self._out_file) class NoProgressResultPrinter(ResultPrinter): diff --git a/tests/unit/customizations/s3/test_results.py b/tests/unit/customizations/s3/test_results.py index 4c98b08174a1..0e80d2ebbf19 100644 --- a/tests/unit/customizations/s3/test_results.py +++ b/tests/unit/customizations/s3/test_results.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +from botocore.exceptions import HTTPClientError from s3transfer.exceptions import CancelledError, FatalError from awscli.compat import StringIO, queue @@ -31,6 +32,7 @@ ResultProcessor, ResultRecorder, ShutdownThreadRequest, + SkipFileResult, SuccessResult, ) from awscli.customizations.s3.utils import WarningResult @@ -159,6 +161,27 @@ def test_on_done_failure(self): ), ) + def test_on_done_precondition_failed(self): + subscriber = self.get_result_subscriber(DoneResultSubscriber) + precondition_failed = HTTPClientError( + request=mock.Mock(), + response={ + 'Error': { + 'Code': 'PreconditionFailed' + } + }, + error='PreconditionFailed', + ) + precondition_failed_future = self.get_failed_transfer_future(precondition_failed) + subscriber.on_done(precondition_failed_future) + result = self.get_queued_result() + self.assert_result_queue_is_empty() + self.assertEqual( + result, + SkipFileResult(transfer_type=mock.ANY, src=mock.ANY, dest=mock.ANY) + ) + + def test_on_done_unexpected_cancelled(self): subscriber = self.get_result_subscriber(DoneResultSubscriber) cancelled_exception = FatalError('some error') @@ -1493,6 +1516,23 @@ def test_print_unicode_error(self): ref_error_statement = 'fatal error: unicode exists \u2713\n' self.assertEqual(self.error_file.getvalue(), ref_error_statement) + def test_does_not_print_skipped_file(self): + transfer_type = 'upload' + src = 'file' + dest = 's3://mybucket/mykey' + skip_file_result = SkipFileResult( + transfer_type=transfer_type, src=src, dest=dest + ) + + # Pretend that this is the final result in the result queue that + # is processed. + self.result_recorder.final_expected_files_transferred = 1 + self.result_recorder.expected_files_transferred = 1 + self.result_recorder.files_skipped = 1 + + self.result_printer(skip_file_result) + self.assertEqual(self.out_file.getvalue(), '') + class TestNoProgressResultPrinter(BaseResultPrinterTest): def setUp(self):