diff --git a/lambdas/handlers/bulk_upload_metadata_processor_handler.py b/lambdas/handlers/bulk_upload_metadata_processor_handler.py index 64b7759ad..3404706ea 100644 --- a/lambdas/handlers/bulk_upload_metadata_processor_handler.py +++ b/lambdas/handlers/bulk_upload_metadata_processor_handler.py @@ -1,21 +1,13 @@ -import urllib.parse - from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat -from services.bulk_upload.metadata_general_preprocessor import ( - MetadataGeneralPreprocessor, -) -from services.bulk_upload.metadata_usb_preprocessor import ( - MetadataUsbPreprocessorService, -) from services.bulk_upload_metadata_processor_service import ( BulkUploadMetadataProcessorService, + get_formatter_service, ) from utils.audit_logging_setup import LoggingService from utils.decorators.ensure_env_var import ensure_environment_variables from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions from utils.decorators.override_error_check import override_error_check from utils.decorators.set_audit_arg import set_request_context_for_logging -from utils.exceptions import BulkUploadMetadataException logger = LoggingService(__name__) @@ -27,64 +19,33 @@ ) @handle_lambda_exceptions def lambda_handler(event, _context): - if "source" in event and event.get("source") == "aws.s3": - logger.info("Handling EventBridge event from S3") - handle_expedite_event(event) - return - - practice_directory = event.get("practiceDirectory", "") raw_pre_format_type = event.get( "preFormatType", LloydGeorgePreProcessFormat.GENERAL ) formatter_service_class = get_formatter_service(raw_pre_format_type) - if not practice_directory: - logger.error( - "Failed to start metadata processing due to missing practice directory" - ) - return - - logger.info( - f"Starting metadata processing for practice directory: {practice_directory}" - ) + practice_directory = event.get("practiceDirectory", "") remappings = event.get("metadataFieldRemappings", {}) - metadata_formatter_service = formatter_service_class(practice_directory) metadata_service = BulkUploadMetadataProcessorService( metadata_formatter_service=metadata_formatter_service, metadata_heading_remap=remappings, ) - metadata_service.process_metadata() + if "source" in event and event.get("source") == "aws.s3": + logger.info("Handling EventBridge event from S3") + + metadata_service.handle_expedite_event(event) + return -def get_formatter_service(raw_pre_format_type): - try: - pre_format_type = LloydGeorgePreProcessFormat(raw_pre_format_type) - if pre_format_type == LloydGeorgePreProcessFormat.GENERAL: - logger.info("Using general preFormatType") - return MetadataGeneralPreprocessor - elif pre_format_type == LloydGeorgePreProcessFormat.USB: - logger.info("Using usb preFormatType") - return MetadataUsbPreprocessorService - except ValueError: - logger.warning( - f"Invalid preFormatType: '{raw_pre_format_type}', defaulting to {LloydGeorgePreProcessFormat.GENERAL}." + if not practice_directory: + logger.error( + "Failed to start metadata processing due to missing practice directory" ) - return MetadataGeneralPreprocessor + return + logger.info( + f"Starting metadata processing for practice directory: {practice_directory}" + ) -def handle_expedite_event(event): - try: - key_string = event["detail"]["object"]["key"] - key = urllib.parse.unquote_plus(key_string, encoding="utf-8") - if key.startswith("expedite/"): - logger.info("Processing file from expedite folder") - return # To be added upon by ticket PRMP-540 - else: - failure_msg = f"Unexpected directory or file location received from EventBridge: {key_string}" - logger.error(failure_msg) - raise BulkUploadMetadataException(failure_msg) - except KeyError as e: - failure_msg = f"Failed due to missing key: {str(e)}" - logger.error(failure_msg) - raise BulkUploadMetadataException(failure_msg) + metadata_service.process_metadata() diff --git a/lambdas/repositories/bulk_upload/bulk_upload_s3_repository.py b/lambdas/repositories/bulk_upload/bulk_upload_s3_repository.py index c0c341c8c..8c7411df8 100644 --- a/lambdas/repositories/bulk_upload/bulk_upload_s3_repository.py +++ b/lambdas/repositories/bulk_upload/bulk_upload_s3_repository.py @@ -14,7 +14,7 @@ VirusScanNoResultException, ) -_logger = LoggingService(__name__) +logger = LoggingService(__name__) class BulkUploadS3Repository: @@ -54,17 +54,17 @@ def check_virus_result( ) except ClientError as e: if "AccessDenied" in str(e) or "NoSuchKey" in str(e): - _logger.info( + logger.info( f"Failed to check object tag for given file_path: {file_path}" ) - _logger.info( + logger.info( "file_path may be incorrect or contain invalid character" ) raise S3FileNotFoundException(f"Failed to access file {file_path}") else: raise e - _logger.info( + logger.info( f"Verified that all documents for patient {staging_metadata.nhs_number} are clean." ) @@ -96,3 +96,33 @@ def rollback_transaction(self): def file_exists_on_staging_bucket(self, file_key: str) -> bool: return self.s3_repository.file_exist_on_s3(self.staging_bucket_name, file_key) + + def check_file_tag_status_on_staging_bucket(self, file_key: str) -> str: + """ + Retrieves the virus scan tag value for a single file. + Raises specific exceptions based on the tag's presence or S3 access. + """ + s3_service = self.s3_repository + + try: + # Call the underlying S3 method to get the tag value + raw_scan_result = s3_service.get_tag_value( + s3_bucket_name=self.staging_bucket_name, + file_key=file_key, + tag_key=SCAN_RESULT_TAG_KEY, + ) + return raw_scan_result + + except TagNotFoundException: + return "" + + except ClientError as e: + error_msg = str(e) + if "AccessDenied" in str(e) or "NoSuchKey" in error_msg: + logger.error( + f"Failed to check object tag for given file_path: {file_key}" + ) + logger.error("file_path may be incorrect or contain invalid character") + raise S3FileNotFoundException(f"Failed to access file {file_key}") + else: + raise e diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index f30fd8f48..b20b12d33 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -2,12 +2,16 @@ import os import shutil import tempfile +import urllib.parse from collections import defaultdict from datetime import datetime import pydantic from botocore.exceptions import ClientError + +from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat from enums.upload_status import UploadStatus +from enums.virus_scan_result import VirusScanResult from models.staging_metadata import ( METADATA_FILENAME, BulkUploadQueueMetadata, @@ -17,8 +21,15 @@ from repositories.bulk_upload.bulk_upload_dynamo_repository import ( BulkUploadDynamoRepository, ) +from repositories.bulk_upload.bulk_upload_s3_repository import BulkUploadS3Repository from services.base.s3_service import S3Service from services.base.sqs_service import SQSService +from services.bulk_upload.metadata_general_preprocessor import ( + MetadataGeneralPreprocessor, +) +from services.bulk_upload.metadata_usb_preprocessor import ( + MetadataUsbPreprocessorService, +) from services.bulk_upload_metadata_preprocessor_service import ( MetadataPreprocessorService, ) @@ -28,8 +39,10 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, + VirusScanFailedException, ) from utils.lloyd_george_validator import validate_file_name +from utils.utilities import get_virus_scan_service logger = LoggingService(__name__) UNSUCCESSFUL = "Unsuccessful bulk upload" @@ -47,6 +60,9 @@ def __init__( self.s3_service = S3Service() self.sqs_service = SQSService() self.dynamo_repository = BulkUploadDynamoRepository() + self.s3_repo = BulkUploadS3Repository() + self.virus_scan_service = get_virus_scan_service() + self.metadata_heading_remap = metadata_heading_remap self.temp_download_dir = tempfile.mkdtemp() @@ -245,3 +261,71 @@ def clear_temp_storage(self): shutil.rmtree(self.temp_download_dir) except FileNotFoundError: pass + + def check_file_status(self, file_key: str): + scan_result = self.s3_repo.check_file_tag_status_on_staging_bucket(file_key) + if scan_result != VirusScanResult.CLEAN: + logger.info(f"Found an issue with the file {file_key}.") + raise VirusScanFailedException( + f"Encountered an issue when scanning the file {file_key}, scan result was {scan_result}" + ) + + def enforce_virus_scanner(self, file_key: str): + logger.info( + f"Checking virus scan result for file: {file_key} in {self.staging_bucket_name}" + ) + + try: + result = self.s3_repo.check_file_tag_status_on_staging_bucket(file_key) + if(result != ""): + logger.info("The file has been scanned before") + return + logger.info(f"Virus scan tag missing for {file_key}.") + self.virus_scan_service.scan_file(file_ref=file_key) + + except ClientError as e: + error_message = str(e) + if "NoSuchKey" in error_message or "AccessDenied" in error_message: + logger.error(f"S3 access error when checking tag for {file_key}.") + raise BulkUploadMetadataException( + f"Failed to access S3 file {file_key} during tag check." + ) + else: + raise + + def handle_expedite_event(self, event): + try: + key_string = event["detail"]["object"]["key"] + key = urllib.parse.unquote_plus(key_string, encoding="utf-8") + + if key.startswith("expedite/"): + logger.info("Processing file from expedite folder") + + self.enforce_virus_scanner(key) + self.check_file_status(key) + + return # To be added upon by ticket PRMP-540 + else: + failure_msg = f"Unexpected directory or file location received from EventBridge: {key_string}" + logger.error(failure_msg) + raise BulkUploadMetadataException(failure_msg) + except KeyError as e: + failure_msg = f"Failed due to missing key: {str(e)}" + logger.error(failure_msg) + raise BulkUploadMetadataException(failure_msg) + + +def get_formatter_service(raw_pre_format_type): + try: + pre_format_type = LloydGeorgePreProcessFormat(raw_pre_format_type) + if pre_format_type == LloydGeorgePreProcessFormat.GENERAL: + logger.info("Using general preFormatType") + return MetadataGeneralPreprocessor + elif pre_format_type == LloydGeorgePreProcessFormat.USB: + logger.info("Using usb preFormatType") + return MetadataUsbPreprocessorService + except ValueError: + logger.warning( + f"Invalid preFormatType: '{raw_pre_format_type}', defaulting to {LloydGeorgePreProcessFormat.GENERAL}." + ) + return MetadataGeneralPreprocessor diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_processor_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_processor_handler.py index d65717e5b..fe69f1633 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_processor_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_processor_handler.py @@ -18,10 +18,10 @@ def eventbridge_event_with_s3_key(key: str): return { "source": "aws.s3", "detail": { - "object":{ - "key": key, - }, - } + "object": { + "key": key, + }, + }, } @@ -41,23 +41,41 @@ def test_metadata_processor_lambda_handler_empty_event( mock_metadata_service.process_metadata.assert_not_called() +def test_metadata_processor_lambda_handler_s3_event_triggers_expedite( + set_env, context, mock_metadata_service +): + event = { + "source": "aws.s3", + "detail": { + "object": { + "key": "expedite/folder/file.pdf", + } + }, + } + + lambda_handler(event, context) + + mock_metadata_service.handle_expedite_event.assert_called_once_with(event) + mock_metadata_service.process_metadata.assert_not_called() + + def test_s3_event_with_expedite_key_processes( set_env, context, mock_metadata_service, caplog ): event = eventbridge_event_with_s3_key( "expedite%2F1of1_Lloyd_George_Record_[John Michael SMITH]_[1234567890]_[15-05-1990].pdf" ) - lambda_handler(event, context) + + with caplog.at_level("INFO"): + lambda_handler(event, context) assert any( - f"Handling EventBridge event from S3" - in r.message - for r in caplog.records - ) - assert any( - "Processing file from expedite folder" in r.message for r in caplog.records + "Handling EventBridge event from S3" in r.message for r in caplog.records ) + mock_metadata_service.handle_expedite_event.assert_called_once_with(event) + mock_metadata_service.process_metadata.assert_not_called() + def test_s3_event_with_non_expedite_key_is_rejected( set_env, context, mock_metadata_service, caplog @@ -65,11 +83,8 @@ def test_s3_event_with_non_expedite_key_is_rejected( key_string = "uploads/1of1_Lloyd_George_Record_[John Michael SMITH]_[1234567890]_[15-05-1990].pdf" event = eventbridge_event_with_s3_key(key_string) - lambda_handler(event, context) + with caplog.at_level("INFO"): + lambda_handler(event, context) - assert any( - f"Unexpected directory or file location received from EventBridge: {key_string}" - in r.message - for r in caplog.records - ) + mock_metadata_service.handle_expedite_event.assert_called_once_with(event) mock_metadata_service.process_metadata.assert_not_called() diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index 74c3ebe40..d9bf00824 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -1,5 +1,7 @@ import os import tempfile +import urllib +import urllib.parse from collections import defaultdict from unittest.mock import call @@ -7,6 +9,8 @@ from botocore.exceptions import ClientError from enums.upload_status import UploadStatus from freezegun import freeze_time + +from enums.virus_scan_result import VirusScanResult from models.staging_metadata import ( METADATA_FILENAME, BulkUploadQueueMetadata, @@ -31,6 +35,7 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, + VirusScanNoResultException, VirusScanFailedException, ) METADATA_FILE_DIR = "tests/unit/helpers/data/bulk_upload" @@ -61,6 +66,12 @@ def test_service(mocker, set_env, mock_tempfile): mocker.patch( "services.bulk_upload_metadata_processor_service.BulkUploadDynamoRepository" ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadS3Repository" + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.get_virus_scan_service" + ) service = BulkUploadMetadataProcessorService( metadata_formatter_service=MockMetadataPreprocessorService( @@ -299,7 +310,19 @@ def validate_record_filename(self, original_filename: str, *args, **kwargs) -> s @pytest.fixture -def bulk_upload_service(): +def bulk_upload_service(mocker, set_env, mock_tempfile): + mocker.patch("services.bulk_upload_metadata_processor_service.S3Service") + mocker.patch("services.bulk_upload_metadata_processor_service.SQSService") + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadDynamoRepository" + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadS3Repository" + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.get_virus_scan_service" + ) + return BulkUploadMetadataProcessorService( metadata_formatter_service=TestMetadataPreprocessorService( practice_directory="test_practice_directory" @@ -693,14 +716,21 @@ def test_clear_temp_storage_handles_missing_directory(mocker, test_service): mock_rm.assert_called_once_with(test_service.temp_download_dir) -@pytest.fixture(autouse=True) +@pytest.fixture @freeze_time("2025-01-01T12:00:00") -def mock_service_remapping_mandatory_fields(mocker): +def mock_service_remapping_mandatory_fields(mocker, set_env, mock_tempfile): + # Patch out external dependencies so __init__ doesn't touch real AWS/services mocker.patch("services.bulk_upload_metadata_processor_service.S3Service") mocker.patch("services.bulk_upload_metadata_processor_service.SQSService") mocker.patch( "services.bulk_upload_metadata_processor_service.BulkUploadDynamoRepository" ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadS3Repository" + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.get_virus_scan_service" + ) service = BulkUploadMetadataProcessorService( metadata_formatter_service=MockMetadataPreprocessorService( @@ -726,8 +756,8 @@ def mock_service_remapping_mandatory_fields(mocker): "process_metadata_row", wraps=service.process_metadata_row, ) - mocker.patch.object(service, "s3_service") + return service @@ -765,14 +795,21 @@ def test_remapping_mandatory_fields( assert result == expected -@pytest.fixture(autouse=True) +@pytest.fixture @freeze_time("2025-01-01T12:00:00") -def mock_service_no_remapping(mocker): +def mock_service_no_remapping(mocker, set_env, mock_tempfile): + # Patch out external dependencies so __init__ doesn't touch real AWS/services mocker.patch("services.bulk_upload_metadata_processor_service.S3Service") mocker.patch("services.bulk_upload_metadata_processor_service.SQSService") mocker.patch( "services.bulk_upload_metadata_processor_service.BulkUploadDynamoRepository" ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadS3Repository" + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.get_virus_scan_service" + ) service = BulkUploadMetadataProcessorService( metadata_formatter_service=MockMetadataPreprocessorService( @@ -791,7 +828,6 @@ def mock_service_no_remapping(mocker): "process_metadata_row", wraps=service.process_metadata_row, ) - mocker.patch.object(service, "s3_service") return service @@ -826,3 +862,185 @@ def test_no_remapping_logic( retries=0, ) ] + + +def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): + encoded_key = urllib.parse.quote_plus("expedite/folder/some file.pdf") + event = {"detail": {"object": {"key": encoded_key}}} + + mocked_enforce = mocker.patch.object(test_service, "enforce_virus_scanner") + mocked_check_status = mocker.patch.object(test_service, "check_file_status") + + test_service.handle_expedite_event(event) + + decoded_key = "expedite/folder/some file.pdf" + mocked_enforce.assert_called_once_with(decoded_key) + mocked_check_status.assert_called_once_with(decoded_key) + + +def test_handle_expedite_event_raises_on_unexpected_directory(mocker, test_service): + mocked_enforce = mocker.patch.object(test_service, "enforce_virus_scanner") + event = {"detail": {"object": {"key": "uploads/something.pdf"}}} + + with pytest.raises(BulkUploadMetadataException) as excinfo: + test_service.handle_expedite_event(event) + + assert "Unexpected directory or file location received from EventBridge" in str( + excinfo.value + ) + + mocked_enforce.assert_not_called() + + +def test_handle_expedite_event_raises_on_missing_key(mocker, test_service): + mocked_enforce = mocker.patch.object(test_service, "enforce_virus_scanner") + event = {"detail": {"object": {}}} + + with pytest.raises(BulkUploadMetadataException) as excinfo: + test_service.handle_expedite_event(event) + + assert "Failed due to missing key" in str(excinfo.value) + + mocked_enforce.assert_not_called() + + +def test_get_formatter_service_returns_general_for_general_value(): + from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat + from services.bulk_upload.metadata_general_preprocessor import ( + MetadataGeneralPreprocessor, + ) + from services.bulk_upload_metadata_processor_service import get_formatter_service + + cls = get_formatter_service(LloydGeorgePreProcessFormat.GENERAL.value) + assert cls is MetadataGeneralPreprocessor + + +def test_get_formatter_service_returns_usb_for_usb_value(): + from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat + from services.bulk_upload.metadata_usb_preprocessor import ( + MetadataUsbPreprocessorService, + ) + from services.bulk_upload_metadata_processor_service import get_formatter_service + + cls = get_formatter_service(LloydGeorgePreProcessFormat.USB.value) + assert cls is MetadataUsbPreprocessorService + + +def test_get_formatter_service_defaults_to_general_on_invalid_value(): + from services.bulk_upload.metadata_general_preprocessor import ( + MetadataGeneralPreprocessor, + ) + from services.bulk_upload_metadata_processor_service import get_formatter_service + + cls = get_formatter_service("this-is-not-valid") + assert cls is MetadataGeneralPreprocessor + + +def test_enforce_virus_scanner_happy_path_does_not_trigger_scan(mocker, test_service): + file_key = "expedite/folder/file.pdf" + + mock_check = mocker.patch.object( + test_service.s3_repo, + "check_file_tag_status_on_staging_bucket", + return_value=VirusScanResult.CLEAN, + ) + mock_scan = mocker.patch.object(test_service.virus_scan_service, "scan_file") + + test_service.enforce_virus_scanner(file_key) + + mock_check.assert_called_once_with(file_key) + mock_scan.assert_not_called() + + +def test_enforce_virus_scanner_triggers_scan_when_no_result(mocker, test_service): + file_key = "expedite/folder/file.pdf" + + mocker.patch.object( + test_service.s3_repo, + "check_file_tag_status_on_staging_bucket", + return_value="", + ) + mock_scan = mocker.patch.object(test_service.virus_scan_service, "scan_file") + + test_service.enforce_virus_scanner(file_key) + + mock_scan.assert_called_once_with(file_ref=file_key) + + +def test_enforce_virus_scanner_raises_bulk_exception_on_s3_access_error( + mocker, test_service +): + file_key = "expedite/folder/file.pdf" + client_error = ClientError( + {"Error": {"Code": "403", "Message": "NoSuchKey: object not found"}}, + "GetObject", + ) + + mocker.patch.object( + test_service.s3_repo, + "check_file_tag_status_on_staging_bucket", + side_effect=client_error, + ) + mock_scan = mocker.patch.object(test_service.virus_scan_service, "scan_file") + + with pytest.raises(BulkUploadMetadataException) as excinfo: + test_service.enforce_virus_scanner(file_key) + + assert f"Failed to access S3 file {file_key} during tag check." in str( + excinfo.value + ) + mock_scan.assert_not_called() + + +def test_enforce_virus_scanner_re_raises_unexpected_client_error(mocker, test_service): + file_key = "expedite/folder/file.pdf" + client_error = ClientError( + {"Error": {"Code": "500", "Message": "InternalError"}}, + "GetObject", + ) + + mocker.patch.object( + test_service.s3_repo, + "check_file_tag_status_on_staging_bucket", + side_effect=client_error, + ) + mock_scan = mocker.patch.object(test_service.virus_scan_service, "scan_file") + + with pytest.raises(ClientError): + test_service.enforce_virus_scanner(file_key) + + mock_scan.assert_not_called() + +def test_check_file_status_clean_does_nothing(mocker, test_service, caplog): + file_key = "expedite/folder/file.pdf" + mock_check = mocker.patch.object( + test_service.s3_repo, + "check_file_tag_status_on_staging_bucket", + return_value=VirusScanResult.CLEAN, + ) + + with caplog.at_level("INFO"): + test_service.check_file_status(file_key) + + mock_check.assert_called_once_with(file_key) + assert not any( + "Found an issue with the file" in record.msg for record in caplog.records + ) + + +def test_check_file_status_logs_issue_when_not_clean(mocker, test_service, caplog): + file_key = "expedite/folder/file.pdf" + mocker.patch.object( + test_service.s3_repo, + "check_file_tag_status_on_staging_bucket", + return_value=VirusScanResult.INFECTED, + ) + + with caplog.at_level("INFO"): + with pytest.raises(VirusScanFailedException): + test_service.check_file_status(file_key) + + assert any( + f"Found an issue with the file {file_key}." in record.msg + for record in caplog.records + ) \ No newline at end of file diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 6f8ca4593..2b336e903 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -50,6 +50,7 @@ PatientRecordAlreadyExistException, PdsTooManyRequestsException, S3FileNotFoundException, + TagNotFoundException, VirusScanNoResultException, ) from utils.lloyd_george_validator import LGInvalidFilesException @@ -1097,3 +1098,62 @@ def test_patient_not_found_is_caught_and_written_to_dynamo( assert call_status == UploadStatus.FAILED assert call_reason == expected_error_message assert call_metadata == TEST_STAGING_METADATA + + +@pytest.fixture +def repo(mocker): + r = BulkUploadS3Repository.__new__(BulkUploadS3Repository) # skip __init__ + r.s3_repository = mocker.Mock() + r.staging_bucket_name = MOCK_STAGING_STORE_BUCKET + return r + + +@pytest.fixture +def file_key(): + return "expedite/folder/file.pdf" + + +def test_check_file_tag_status_returns_value_when_tag_exists(repo, file_key): + repo.s3_repository.get_tag_value.return_value = "CLEAN" + + result = repo.check_file_tag_status_on_staging_bucket(file_key) + + assert result == "CLEAN" + repo.s3_repository.get_tag_value.assert_called_once_with( + s3_bucket_name=MOCK_STAGING_STORE_BUCKET, + file_key=file_key, + tag_key=SCAN_RESULT_TAG_KEY, + ) + + +def test_check_file_tag_status_returns_empty_string_when_tag_missing(repo, file_key): + repo.s3_repository.get_tag_value.side_effect = TagNotFoundException("no tag") + + result = repo.check_file_tag_status_on_staging_bucket(file_key) + + assert result == "" + repo.s3_repository.get_tag_value.assert_called_once_with( + s3_bucket_name=MOCK_STAGING_STORE_BUCKET, + file_key=file_key, + tag_key=SCAN_RESULT_TAG_KEY, + ) + + +@pytest.mark.parametrize("fragment", ["AccessDenied", "NoSuchKey"]) +def test_wraps_access_errors_as_s3_not_found(repo, file_key, fragment): + repo.s3_repository.get_tag_value.side_effect = ClientError( + {"Error": {"Code": "S3Error", "Message": fragment}}, "GetObject" + ) + + with pytest.raises(S3FileNotFoundException): + repo.check_file_tag_status_on_staging_bucket(file_key) + + +def test_reraises_other_client_errors(repo, file_key): + repo.s3_repository.get_tag_value.side_effect = ClientError( + {"Error": {"Code": "ThrottlingException", "Message": "Rate exceeded"}}, + "GetObject", + ) + + with pytest.raises(ClientError): + repo.check_file_tag_status_on_staging_bucket(file_key)