Skip to content

Commit 41e3832

Browse files
authored
[PRM-52] Edge lambda S3 presign url changes (#607)
* [PRM-52] Changes to edge lambda to hide pre-sign url * [PRM-52] Reformat edge service * [PRM-52] Change expiry
1 parent c451231 commit 41e3832

File tree

11 files changed

+182
-279
lines changed

11 files changed

+182
-279
lines changed
Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,24 @@
1-
import logging
2-
31
from services.edge_presign_service import EdgePresignService
2+
from utils.audit_logging_setup import LoggingService
43
from utils.decorators.handle_edge_exceptions import handle_edge_exceptions
54
from utils.decorators.override_error_check import override_error_check
65
from utils.decorators.set_audit_arg import set_request_context_for_logging
7-
from utils.decorators.validate_s3_request import validate_s3_request
86

9-
logger = logging.getLogger()
10-
logger.setLevel(logging.INFO)
7+
logger = LoggingService(__name__)
118

129

1310
@set_request_context_for_logging
1411
@override_error_check
1512
@handle_edge_exceptions
16-
@validate_s3_request
1713
def lambda_handler(event, context):
1814
request: dict = event["Records"][0]["cf"]["request"]
1915
logger.info("Edge received S3 request")
16+
logger.info(f"Request: {request}")
2017

2118
edge_presign_service = EdgePresignService()
22-
request_values: dict = edge_presign_service.filter_request_values(request)
23-
edge_presign_service.use_presign(request_values)
19+
modified_request = edge_presign_service.use_presigned(request)
2420

25-
forwarded_request: dict = edge_presign_service.update_s3_headers(
26-
request, request_values
27-
)
21+
forwarded_request: dict = edge_presign_service.update_s3_headers(modified_request)
2822

2923
logger.info("Edge forwarding S3 request")
3024
return forwarded_request

lambdas/requirements/requirements_edge_lambda.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
asposestorage==1.0.2
21
boto3==1.35.3
32
botocore==1.35.3
43
pydantic==2.8.2

lambdas/services/base/dynamo_service.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,13 @@ def update_item(
128128
"UpdateExpression": update_expression,
129129
"ExpressionAttributeNames": expression_attribute_names,
130130
"ExpressionAttributeValues": generated_expression_attribute_values,
131+
"ReturnValues": "ALL_NEW",
131132
}
132133

133134
if condition_expression:
134135
update_item_args["ConditionExpression"] = condition_expression
135136

136-
table.update_item(**update_item_args)
137+
return table.update_item(**update_item_args)
137138

138139
def delete_item(self, table_name: str, key: dict):
139140
try:
Lines changed: 53 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
import hashlib
21
import re
32

43
from botocore.exceptions import ClientError
54
from enums.lambda_error import LambdaError
65
from services.base.dynamo_service import DynamoDBService
7-
from services.base.s3_service import S3Service
86
from services.base.ssm_service import SSMService
97
from utils.audit_logging_setup import LoggingService
108
from utils.lambda_exceptions import CloudFrontEdgeException
@@ -15,83 +13,74 @@
1513
class EdgePresignService:
1614
def __init__(self):
1715
self.dynamo_service = DynamoDBService()
18-
self.s3_service = S3Service()
1916
self.ssm_service = SSMService()
2017
self.table_name_ssm_param = "EDGE_REFERENCE_TABLE"
2118

22-
def use_presign(self, request_values: dict):
23-
uri: str = request_values["uri"]
24-
querystring: str = request_values["querystring"]
25-
domain_name: str = request_values["domain_name"]
19+
def use_presigned(self, request_values: dict) -> dict:
20+
request_id = self._extract_request_id(request_values)
21+
domain_name = self._extract_domain_name(request_values)
2622

27-
presign_string: str = f"{uri}?{querystring}"
28-
encoded_presign_string = presign_string.encode("utf-8")
29-
presign_credentials_hash: str = hashlib.md5(encoded_presign_string).hexdigest()
23+
presigned_url = self._attempt_presigned_ingestion(request_id, domain_name)
24+
self._update_request_with_presigned_url(request_values, presigned_url)
3025

31-
self.attempt_presign_ingestion(
32-
uri_hash=presign_credentials_hash,
33-
domain_name=domain_name,
34-
)
26+
return request_values
3527

36-
def attempt_presign_ingestion(self, uri_hash: str, domain_name: str) -> None:
28+
def _attempt_presigned_ingestion(self, request_id: str, domain_name: str) -> str:
3729
try:
38-
environment = self.filter_domain_for_env(domain_name)
39-
logger.info(f"Environment found: {environment}")
40-
base_table_name: str = self.ssm_service.get_ssm_parameter(
41-
self.table_name_ssm_param
42-
)
43-
formatted_table_name: str = self.extend_table_name(
44-
base_table_name, environment
45-
)
46-
logger.info(f"Table: {formatted_table_name}")
47-
self.dynamo_service.update_item(
48-
table_name=formatted_table_name,
49-
key_pair={"ID": uri_hash},
50-
updated_fields={"IsRequested": True},
51-
condition_expression="attribute_not_exists(IsRequested) OR IsRequested = :false",
52-
expression_attribute_values={":false": False},
53-
)
30+
environment = self._filter_domain_for_env(domain_name)
31+
table_name = self._get_formatted_table_name(environment)
32+
updated_item = self._update_dynamo_item(table_name, request_id)
33+
return self._extract_presigned_url(updated_item)
5434
except ClientError as e:
5535
logger.error(f"{str(e)}", {"Result": LambdaError.EdgeNoClient.to_str()})
5636
raise CloudFrontEdgeException(400, LambdaError.EdgeNoClient)
5737

58-
@staticmethod
59-
def update_s3_headers(request: dict, request_values: dict):
60-
domain_name = request_values["domain_name"]
61-
if "authorization" in request["headers"]:
62-
del request["headers"]["authorization"]
38+
def update_s3_headers(self, request: dict) -> dict:
39+
domain_name = self._extract_domain_name(request)
40+
request["headers"].pop("authorization", None)
6341
request["headers"]["host"] = [{"key": "Host", "value": domain_name}]
64-
6542
return request
6643

67-
@staticmethod
68-
def filter_request_values(request: dict) -> dict:
69-
try:
70-
uri: str = request["uri"]
71-
querystring: str = request["querystring"]
72-
headers: dict = request["headers"]
73-
origin: dict = request.get("origin", {})
74-
domain_name: str = origin["s3"]["domainName"]
75-
except KeyError as e:
76-
logger.error(f"Missing request component: {str(e)}")
77-
raise CloudFrontEdgeException(500, LambdaError.EdgeNoOrigin)
78-
79-
return {
80-
"uri": uri,
81-
"querystring": querystring,
82-
"headers": headers,
83-
"domain_name": domain_name,
84-
}
44+
def _extract_request_id(self, request_values: dict) -> str:
45+
return request_values.get("uri", "").lstrip("/")
8546

86-
@staticmethod
87-
def filter_domain_for_env(domain_name: str) -> str:
47+
def _extract_domain_name(self, request_values: dict) -> str:
48+
return request_values.get("origin", {}).get("s3", {}).get("domainName", "")
49+
50+
def _update_request_with_presigned_url(
51+
self, request_values: dict, presigned_url: str
52+
):
53+
question_mark_index = presigned_url.find("?")
54+
querystring = (
55+
presigned_url[question_mark_index + 1 :]
56+
if question_mark_index != -1
57+
else ""
58+
)
59+
url_parts = (
60+
presigned_url[:question_mark_index].split("/")
61+
if question_mark_index != -1
62+
else presigned_url.split("/")
63+
)
64+
request_values["querystring"] = querystring
65+
request_values["uri"] = "/" + "/".join(url_parts[3:])
66+
67+
def _filter_domain_for_env(self, domain_name: str) -> str:
8868
match = re.match(r"^[^-]+(?:-[^-]+)?(?=-lloyd)", domain_name)
89-
if match:
90-
return match.group(0)
91-
return ""
69+
return match.group(0) if match else ""
70+
71+
def _get_formatted_table_name(self, environment: str) -> str:
72+
base_table_name = self.ssm_service.get_ssm_parameter(self.table_name_ssm_param)
73+
return f"{environment}_{base_table_name}" if environment else base_table_name
74+
75+
def _update_dynamo_item(self, table_name: str, request_id: str) -> dict:
76+
return self.dynamo_service.update_item(
77+
table_name=table_name,
78+
key_pair={"ID": request_id},
79+
updated_fields={"IsRequested": True},
80+
condition_expression="attribute_not_exists(IsRequested) OR IsRequested = :false",
81+
expression_attribute_values={":false": False},
82+
)
9283

9384
@staticmethod
94-
def extend_table_name(base_table_name: str, environment: str) -> str:
95-
if environment:
96-
return f"{environment}_{base_table_name}"
97-
return base_table_name
85+
def _extract_presigned_url(updated_item: dict) -> str:
86+
return updated_item.get("Attributes", {}).get("presignedUrl", "")

lambdas/services/lloyd_george_stitch_job_service.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
2-
from datetime import datetime, timedelta
2+
import uuid
3+
from datetime import datetime, timedelta, timezone
34

45
from botocore.exceptions import ClientError
56
from enums.dynamo_filter import AttributeOperator
@@ -30,7 +31,7 @@ def __init__(self):
3031
self.document_service = DocumentService()
3132
self.stitch_trace_table = os.environ.get("STITCH_METADATA_DYNAMODB_NAME")
3233
self.lloyd_george_table_name = os.environ.get("LLOYD_GEORGE_DYNAMODB_NAME")
33-
34+
self.cloudfront_table_name = os.environ.get("EDGE_REFERENCE_TABLE")
3435
self.cloudfront_url = os.environ.get("CLOUDFRONT_URL")
3536
self.lloyd_george_bucket_name = os.environ.get("LLOYD_GEORGE_BUCKET_NAME")
3637

@@ -162,7 +163,20 @@ def create_document_stitch_presigned_url(self, stitched_file_location):
162163
s3_bucket_name=self.lloyd_george_bucket_name,
163164
file_key=stitched_file_location,
164165
)
165-
return format_cloudfront_url(presign_url_response, self.cloudfront_url)
166+
presigned_id = str(uuid.uuid4())
167+
deletion_date = datetime.now(timezone.utc)
168+
169+
ttl_half_an_hour_in_seconds = self.s3_service.presigned_url_expiry
170+
dynamo_item_ttl = int(deletion_date.timestamp() + ttl_half_an_hour_in_seconds)
171+
self.dynamo_service.create_item(
172+
self.cloudfront_table_name,
173+
{
174+
"ID": presigned_id,
175+
"presignedUrl": presign_url_response,
176+
"TTL": dynamo_item_ttl,
177+
},
178+
)
179+
return format_cloudfront_url(presigned_id, self.cloudfront_url)
166180

167181
def check_lloyd_george_record_for_patient(self, nhs_number) -> None:
168182
try:

lambdas/tests/unit/enums/test_edge_presign_values.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@
3636
EXPECTED_EDGE_MALFORMED_ERROR_MESSAGE = LambdaError.EdgeMalformed.value["message"]
3737
EXPECTED_EDGE_MALFORMED_ERROR_CODE = LambdaError.EdgeMalformed.value["err_code"]
3838

39-
39+
MOCK_PRESIGNED_URL = "https://presigned-url.com/path/to/resource"
4040
MOCK_S3_EDGE_EVENT = {
4141
"Records": [
4242
{
4343
"cf": {
4444
"request": {
4545
"headers": MOCKED_HEADERS,
46-
"querystring": MOCKED_AUTH_QUERY,
46+
"querystring": "",
4747
"uri": "/some/path",
4848
"origin": {
4949
"s3": {

0 commit comments

Comments
 (0)