Skip to content

Commit ce7a52c

Browse files
authored
[PRMP-895] Add document review processing to virus scan handler (#923)
1 parent a993ea8 commit ce7a52c

22 files changed

+1146
-86
lines changed

lambdas/handlers/document_reference_virus_scan_handler.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from enums.logging_app_interaction import LoggingAppInteraction
2+
from services.staged_document_review_processing_service import (
3+
StagedDocumentReviewProcessingService,
4+
)
25
from services.upload_document_reference_service import UploadDocumentReferenceService
36
from utils.audit_logging_setup import LoggingService
47
from utils.decorators.ensure_env_var import ensure_environment_variables
5-
from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions
68
from utils.decorators.set_audit_arg import set_request_context_for_logging
79
from utils.lambda_response import ApiGatewayResponse
810
from utils.request_context import request_context
@@ -19,19 +21,26 @@
1921
"LLOYD_GEORGE_BUCKET_NAME",
2022
"PDM_BUCKET_NAME",
2123
"VIRUS_SCAN_STUB",
24+
"DOCUMENT_REVIEW_DYNAMODB_NAME",
25+
"PENDING_REVIEW_BUCKET_NAME"
2226
]
2327
)
24-
@handle_lambda_exceptions
2528
def lambda_handler(event, context):
2629
request_context.app_interaction = LoggingAppInteraction.VIRUS_SCAN.value
2730
upload_document_reference_service = UploadDocumentReferenceService()
31+
review_upload_document_reference_service = StagedDocumentReviewProcessingService()
2832

2933
for s3_object in event.get("Records"):
3034
object_key = s3_object["s3"]["object"]["key"]
3135
object_size = s3_object["s3"]["object"]["size"]
32-
upload_document_reference_service.handle_upload_document_reference_request(
33-
object_key, object_size
34-
)
36+
if object_key.startswith("review/"):
37+
logger.info("Using review document service")
38+
service = review_upload_document_reference_service
39+
else:
40+
logger.info("Using upload document service")
41+
service = upload_document_reference_service
42+
43+
service.handle_upload_document_reference_request(object_key, object_size)
3544

3645
return ApiGatewayResponse(
3746
200, "Virus Scan was successful", "POST"

lambdas/requirements/layers/requirements_core_lambda_layer.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PyJWT==2.8.0
22
PyYAML==6.0.1
3-
boto3==1.40.71
4-
botocore==1.40.71
3+
boto3==1.42.1
4+
botocore==1.42.1
55
charset-normalizer==3.2.0
66
cryptography==44.0.1
77
idna==3.7

lambdas/requirements/requirements_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
MarkupSafe==2.1.3
22
black==24.3.0
33
freezegun==1.5.4
4-
isort==5.13.0
4+
isort==7.0.0
55
pip-audit==2.6.1
66
pytest-cov==4.1.0
77
pytest-mock==3.11.1

lambdas/services/base/dynamo_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def update_item(
196196
table_name: str,
197197
key_pair: dict[str, str],
198198
updated_fields: dict,
199-
condition_expression: str | None = None,
199+
condition_expression: str | ConditionBase| None = None,
200200
expression_attribute_values: dict | None = None,
201201
):
202202
table = self.get_table(table_name)

lambdas/services/base/s3_service.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -116,22 +116,25 @@ def copy_across_bucket(
116116
source_file_key: str,
117117
dest_bucket: str,
118118
dest_file_key: str,
119-
if_none_match: str | None = None,
119+
if_none_match: bool = False,
120+
retry_on_conflict: bool = True,
120121
):
121-
if if_none_match is not None:
122-
return self.client.copy_object(
123-
Bucket=dest_bucket,
124-
Key=dest_file_key,
125-
CopySource={"Bucket": source_bucket, "Key": source_file_key},
126-
IfNoneMatch=if_none_match,
127-
StorageClass="INTELLIGENT_TIERING",
128-
)
129-
return self.client.copy_object(
130-
Bucket=dest_bucket,
131-
Key=dest_file_key,
132-
CopySource={"Bucket": source_bucket, "Key": source_file_key},
133-
StorageClass="INTELLIGENT_TIERING",
134-
)
122+
copy_source_params = {"Bucket": source_bucket, "Key": source_file_key}
123+
copy_object_params = {"Bucket": dest_bucket, "Key": dest_file_key, "CopySource": copy_source_params, "StorageClass": "INTELLIGENT_TIERING"}
124+
if if_none_match:
125+
copy_object_params["IfNoneMatch"] = '*'
126+
try:
127+
return self.client.copy_object(**copy_object_params)
128+
except ClientError as e:
129+
if e.response["ResponseMetadata"]["HTTPStatusCode"] == 409:
130+
logger.info(f"Copy failed due to conflict, retrying: {e}")
131+
if retry_on_conflict:
132+
return self.copy_across_bucket(source_bucket, source_file_key, dest_bucket, dest_file_key, if_none_match, False)
133+
else:
134+
raise e
135+
else:
136+
logger.error(f"Copy failed: {e}")
137+
raise e
135138

136139
def delete_object(
137140
self, s3_bucket_name: str, file_key: str, version_id: str | None = None

lambdas/services/document_service.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def get_item(
142142
return document
143143

144144
except ValidationError as e:
145+
logger.error(f"Validation error on document: {response.get('Item')}")
145146
logger.error(f"{e}")
146147
return None
147148

@@ -213,11 +214,11 @@ def delete_document_object(self, bucket: str, key: str):
213214
def update_document(
214215
self,
215216
table_name: str | None = None,
216-
update_key: dict[str, str] | None = None,
217217
document: BaseModel = None,
218218
update_fields_name: set[str] | None = None,
219219
condition_expression: str | Attr | ConditionBase = None,
220220
expression_attribute_values: dict = None,
221+
key_pair: dict | None = None
221222
):
222223
"""Update document in specified or configured table."""
223224
table_name = table_name or self.table_name
@@ -228,8 +229,8 @@ def update_document(
228229
exclude_none=True, by_alias=True, include=update_fields_name
229230
),
230231
}
231-
if update_key:
232-
update_kwargs["key_pair"] = update_key
232+
if key_pair:
233+
update_kwargs["key_pair"] = key_pair
233234
else:
234235
update_kwargs["key_pair"] = {
235236
DocumentReferenceMetadataFields.ID.value: document.id

lambdas/services/document_upload_review_service.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pydantic import ValidationError
1111
from services.document_service import DocumentService
1212
from utils.audit_logging_setup import LoggingService
13+
from utils.aws_transient_error_check import is_transient_error
1314
from utils.dynamo_query_filter_builder import DynamoQueryFilterBuilder
1415
from utils.dynamo_utils import build_transaction_item
1516
from utils.exceptions import DocumentReviewException
@@ -49,7 +50,7 @@ def query_docs_pending_review_by_custodian_with_limit(
4950
) -> tuple[list[DocumentUploadReviewReference], dict | None]:
5051
logger.info(f"Getting review document references for custodian: {ods_code}")
5152

52-
filter_expression = self.build_review_query_filter(
53+
filter_expression = self.build_review_dynamo_filter(
5354
nhs_number=nhs_number, uploader=uploader
5455
)
5556

@@ -72,7 +73,7 @@ def query_docs_pending_review_by_custodian_with_limit(
7273

7374
except ClientError as e:
7475
logger.error(e)
75-
raise DocumentReviewException("Failed to query document reviews")
76+
raise DocumentReviewException("Error querying document review references")
7677

7778
def _validate_review_references(
7879
self, items: list[dict]
@@ -85,9 +86,7 @@ def _validate_review_references(
8586
return review_references
8687
except ValidationError as e:
8788
logger.error(e)
88-
raise DocumentReviewException(
89-
"Failed to validate document review references"
90-
)
89+
raise DocumentReviewException("Error validating document review references")
9190

9291
def get_document(
9392
self, document_id: str, version: int | None
@@ -120,10 +119,29 @@ def update_document_review_custodian(
120119

121120
self.update_document(
122121
document=review,
123-
update_key={"ID": review.id, "Version": review.version},
122+
key_pair={"ID": review.id, "Version": review.version},
124123
update_fields_name=review_update_field,
125124
)
126125

126+
def update_document_review_status(
127+
self,
128+
review_document: DocumentUploadReviewReference,
129+
condition_expression: str | ConditionBase | None = None,
130+
):
131+
review_update_field = {"review_status", "files"}
132+
try:
133+
self.update_document(
134+
document=review_document,
135+
key_pair={"ID": review_document.id, "Version": review_document.version},
136+
update_fields_name=review_update_field,
137+
condition_expression=condition_expression,
138+
)
139+
except ClientError as e:
140+
logger.error(e)
141+
if is_transient_error(e):
142+
raise e
143+
raise DocumentReviewException("Error updating document review status")
144+
127145
def get_document_review_by_id(self, document_id: str, document_version: int):
128146
return self.get_item(document_id, {"Version": document_version})
129147

@@ -169,7 +187,7 @@ def update_document_review_for_patient(
169187
try:
170188
return self.update_document(
171189
document=review_update,
172-
update_key={"ID": review_update.id, "Version": review_update.version},
190+
key_pair={"ID": review_update.id, "Version": review_update.version},
173191
update_fields_name=field_names,
174192
condition_expression=condition_expression,
175193
)
@@ -273,13 +291,14 @@ def delete_document_review_files(
273291
logger.warning(f"Skipping file deletion for {file.file_name}")
274292
continue
275293

276-
def build_review_query_filter(
277-
self, nhs_number: str | None = None, uploader: str | None = None
294+
def build_review_dynamo_filter(
295+
self, nhs_number: str | None = None, uploader: str | None = None, status: DocumentReviewStatus | None = DocumentReviewStatus.PENDING_REVIEW
278296
) -> Attr | ConditionBase:
279297
filter_builder = DynamoQueryFilterBuilder()
280-
filter_builder.add_condition(
281-
"ReviewStatus", AttributeOperator.EQUAL, DocumentReviewStatus.PENDING_REVIEW
282-
)
298+
if status:
299+
filter_builder.add_condition(
300+
"ReviewStatus", AttributeOperator.EQUAL, status
301+
)
283302

284303
if nhs_number:
285304
filter_builder.add_condition(

lambdas/services/mock_virus_scan_service.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,16 @@
1919

2020

2121
class MockVirusScanService:
22+
_instance = None
23+
24+
def __new__(cls, *args, **kwargs):
25+
if cls._instance is None:
26+
cls._instance = super(MockVirusScanService, cls).__new__(cls)
27+
return cls._instance
28+
2229
def __init__(self):
30+
if hasattr(self, "_initialized") and self._initialized:
31+
return
2332
logger.info("Virus scan service is set to mock virus scan service")
2433

2534
infected_nhs_numbers_str = os.getenv("INFECTED_NHS_NUMBERS")
@@ -30,6 +39,7 @@ def __init__(self):
3039
)
3140
if self.infected_nhs_numbers:
3241
logger.info(f"Infected NHS numbers are set to: {self.infected_nhs_numbers}")
42+
self._initialized = True
3343

3444
def scan_file(self, file_ref: str, *args, **kwargs) -> VirusScanResult:
3545
nhs_number = kwargs.get("nhs_number")

0 commit comments

Comments
 (0)