Skip to content

Commit 5271e16

Browse files
PRMP 1036 - Fix PDF Intermittence
1 parent 9d9cb03 commit 5271e16

File tree

10 files changed

+413
-236
lines changed

10 files changed

+413
-236
lines changed

app/src/helpers/requests/getLloydGeorgeRecord.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,8 @@ export const pollForPresignedUrl = async ({
8686
if (data.jobStatus === JOB_STATUS.COMPLETED && !data.presignedUrl.startsWith('https://')) {
8787
return Promise.reject({ response: { status: 500 } });
8888
}
89-
90-
return {
91-
...data,
92-
presignedUrl: `${data.presignedUrl}&origin=${
93-
typeof window !== 'undefined' ? window.location.href : ''
94-
}`,
95-
};
89+
const result: LloydGeorgeStitchResult = data;
90+
return result;
9691
};
92+
9793
export default getLloydGeorgeRecord;

lambdas/enums/lambda_error.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,12 +396,29 @@ def to_str(self) -> str:
396396
"""
397397
EdgeMalformed = {
398398
"err_code": "CE_5001",
399-
"message": "Malformed event structure or missing data",
399+
"message": "Malformed cloudfront request",
400400
}
401+
401402
EdgeNoOrigin = {
402403
"err_code": "CE_5002",
403404
"message": "The request is missing an origin",
404405
}
406+
407+
EdgeNoQuery = {
408+
"err_code": "CE_5003",
409+
"message": "The request is missing a querystring",
410+
}
411+
412+
EdgeRequiredQuery = {
413+
"err_code": "CE_5004",
414+
"message": "Missing required querystring values",
415+
}
416+
417+
EdgeRequiredHeaders = {
418+
"err_code": "CE_5005",
419+
"message": "Malformed header structure or missing data",
420+
}
421+
405422
EdgeNoClient = {"err_code": "CE_4001", "message": "Document not found"}
406423

407424
"""
Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
1-
import hashlib
2-
import json
31
import logging
4-
from urllib.parse import parse_qs
52

6-
from enums.lambda_error import LambdaError
73
from services.edge_presign_service import EdgePresignService
84
from utils.decorators.handle_edge_exceptions import handle_edge_exceptions
95
from utils.decorators.override_error_check import override_error_check
106
from utils.decorators.set_audit_arg import set_request_context_for_logging
11-
from utils.lambda_exceptions import CloudFrontEdgeException
7+
from utils.decorators.validate_s3_request import validate_s3_request
128

139
logger = logging.getLogger()
1410
logger.setLevel(logging.INFO)
@@ -17,40 +13,18 @@
1713
@set_request_context_for_logging
1814
@override_error_check
1915
@handle_edge_exceptions
16+
@validate_s3_request
2017
def lambda_handler(event, context):
21-
try:
22-
request: dict = event["Records"][0]["cf"]["request"]
23-
logger.info("CloudFront received S3 request", {"Result": {json.dumps(request)}})
24-
uri: str = request.get("uri", "")
25-
presign_query_string: str = request.get("querystring", "")
26-
27-
except (KeyError, IndexError) as e:
28-
logger.error(
29-
f"{str(e)}",
30-
{"Result": {LambdaError.EdgeMalformed.to_str()}},
31-
)
32-
raise CloudFrontEdgeException(500, LambdaError.EdgeMalformed)
33-
34-
s3_presign_credentials = parse_qs(presign_query_string)
35-
origin_url = s3_presign_credentials.get("origin", [""])[0]
36-
if not origin_url:
37-
logger.error(
38-
"No Origin",
39-
{"Result": {LambdaError.EdgeNoOrigin.to_str()}},
40-
)
41-
raise CloudFrontEdgeException(500, LambdaError.EdgeNoOrigin)
42-
43-
presign_string = f"{uri}?{presign_query_string}"
44-
encoded_presign_string: str = presign_string.encode("utf-8")
45-
presign_credentials_hash = hashlib.md5(encoded_presign_string).hexdigest()
18+
request: dict = event["Records"][0]["cf"]["request"]
19+
logger.info("Edge received S3 request")
4620

4721
edge_presign_service = EdgePresignService()
48-
edge_presign_service.attempt_url_update(
49-
uri_hash=presign_credentials_hash, origin_url=origin_url
50-
)
22+
request_values: dict = edge_presign_service.filter_request_values(request)
23+
edge_presign_service.use_presign(request_values)
5124

52-
headers: dict = request.get("headers", {})
53-
if "authorization" in headers:
54-
del headers["authorization"]
25+
forwarded_request: dict = edge_presign_service.update_s3_headers(
26+
request, request_values
27+
)
5528

56-
return request
29+
logger.info("Edge forwarding S3 request")
30+
return forwarded_request

lambdas/services/edge_presign_service.py

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import re
23

34
from botocore.exceptions import ClientError
@@ -12,23 +13,37 @@
1213

1314

1415
class EdgePresignService:
15-
1616
def __init__(self):
1717
self.dynamo_service = DynamoDBService()
1818
self.s3_service = S3Service()
1919
self.ssm_service = SSMService()
2020
self.table_name_ssm_param = "EDGE_REFERENCE_TABLE"
2121

22-
def attempt_url_update(self, uri_hash, origin_url) -> None:
22+
def use_presign(self, request_values: dict):
23+
uri: str = request_values["uri"]
24+
querystring: str = request_values["querystring"]
25+
domain_name: str = request_values["domain_name"]
26+
27+
presign_string: str = f"{uri}?{querystring}"
28+
encoded_presign_string: str = presign_string.encode("utf-8")
29+
presign_credentials_hash: str = hashlib.md5(encoded_presign_string).hexdigest()
30+
31+
self.attempt_presign_ingestion(
32+
uri_hash=presign_credentials_hash,
33+
domain_name=domain_name,
34+
)
35+
36+
def attempt_presign_ingestion(self, uri_hash: str, domain_name: str) -> None:
2337
try:
24-
environment = self.extract_environment_from_url(origin_url)
38+
environment = self.filter_domain_for_env(domain_name)
39+
logger.info(f"Environment found: {environment}")
2540
base_table_name: str = self.ssm_service.get_ssm_parameter(
2641
self.table_name_ssm_param
2742
)
2843
formatted_table_name: str = self.extend_table_name(
2944
base_table_name, environment
3045
)
31-
46+
logger.info(f"Table: {formatted_table_name}")
3247
self.dynamo_service.update_item(
3348
table_name=formatted_table_name,
3449
key=uri_hash,
@@ -40,13 +55,43 @@ def attempt_url_update(self, uri_hash, origin_url) -> None:
4055
logger.error(f"{str(e)}", {"Result": LambdaError.EdgeNoClient.to_str()})
4156
raise CloudFrontEdgeException(400, LambdaError.EdgeNoClient)
4257

43-
def extract_environment_from_url(self, url: str) -> str:
44-
match = re.search(r"https://([^.]+)\.[^.]+\.[^.]+\.[^.]+", url)
58+
@staticmethod
59+
def update_s3_headers(request: dict, request_values: dict):
60+
domain_name = request_values["domain_name"]
61+
if "authorization" in request["headers"]:
62+
del request["headers"]["authorization"]
63+
request["headers"]["host"] = [{"key": "Host", "value": domain_name}]
64+
65+
return request
66+
67+
@staticmethod
68+
def filter_request_values(request: dict) -> dict:
69+
try:
70+
uri: str = request["uri"]
71+
querystring: str = request["querystring"]
72+
headers: dict = request["headers"]
73+
origin: str = request.get("origin", {})
74+
domain_name: str = origin["s3"]["domainName"]
75+
except KeyError as e:
76+
logger.error(f"Missing request component: {str(e)}")
77+
raise CloudFrontEdgeException(500, LambdaError.EdgeNoOrigin)
78+
79+
return {
80+
"uri": uri,
81+
"querystring": querystring,
82+
"headers": headers,
83+
"domain_name": domain_name,
84+
}
85+
86+
@staticmethod
87+
def filter_domain_for_env(domain_name: str) -> str:
88+
match = re.match(r"^[^-]+(?:-[^-]+)?(?=-lloyd)", domain_name)
4589
if match:
46-
return match.group(1)
90+
return match.group(0)
4791
return ""
4892

49-
def extend_table_name(self, base_table_name, environment) -> str:
93+
@staticmethod
94+
def extend_table_name(base_table_name: str, environment: str) -> str:
5095
if environment:
5196
return f"{environment}_{base_table_name}"
5297
return base_table_name

lambdas/tests/unit/conftest.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414

1515
REGION_NAME = "eu-west-2"
1616

17-
MOCK_CLOUDFRONT_URL = "test-cloudfront-url.com"
1817
MOCK_TABLE_NAME = "test-table"
1918
MOCK_BUCKET = "test-s3-bucket"
20-
19+
MOCK_CLOUDFRONT_URL = "test-cloudfront-url.com"
20+
MOCKED_LG_BUCKET_ENV = "test"
21+
MOCKED_LG_BUCKET_URL = f"{MOCKED_LG_BUCKET_ENV}-lloyd-test-test.com"
2122
MOCK_ARF_TABLE_NAME_ENV_NAME = "DOCUMENT_STORE_DYNAMODB_NAME"
2223
MOCK_ARF_BUCKET_ENV_NAME = "DOCUMENT_STORE_BUCKET_NAME"
2324

lambdas/tests/unit/enums/test_edge_presign_values.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,58 @@
1-
# test_enums.py
2-
31
from enums.lambda_error import LambdaError
2+
from tests.unit.conftest import MOCKED_LG_BUCKET_URL
43

5-
ENV = "test"
4+
MOCKED_AUTH_QUERY = (
5+
"X-Amz-Algorithm=algo&X-Amz-Credential=cred&X-Amz-Date=date"
6+
"&X-Amz-Expires=3600&X-Amz-SignedHeaders=signed"
7+
"&X-Amz-Signature=sig&X-Amz-Security-Token=token"
8+
)
9+
MOCKED_PARTIAL_QUERY = (
10+
"X-Amz-Algorithm=algo&X-Amz-Credential=cred&X-Amz-Date=date" "&X-Amz-Expires=3600"
11+
)
612

7-
TABLE_NAME = "CloudFrontEdgeReference"
13+
MOCKED_HEADERS = {
14+
"cloudfront-viewer-country": [{"key": "CloudFront-Viewer-Country", "value": "US"}],
15+
"x-forwarded-for": [{"key": "X-Forwarded-For", "value": "1.2.3.4"}],
16+
"host": [{"key": "Host", "value": MOCKED_LG_BUCKET_URL}],
17+
}
818

9-
NHS_DOMAIN = "example.gov.uk"
19+
EXPECTED_EDGE_NO_QUERY_MESSAGE = LambdaError.EdgeNoQuery.value["message"]
20+
EXPECTED_EDGE_NO_QUERY_ERROR_CODE = LambdaError.EdgeNoQuery.value["err_code"]
21+
EXPECTED_EDGE_MALFORMED_QUERY_MESSAGE = LambdaError.EdgeRequiredQuery.value["message"]
22+
EXPECTED_EDGE_MALFORMED_QUERY_ERROR_CODE = LambdaError.EdgeRequiredQuery.value[
23+
"err_code"
24+
]
25+
EXPECTED_EDGE_MALFORMED_HEADER_MESSAGE = LambdaError.EdgeRequiredHeaders.value[
26+
"message"
27+
]
28+
EXPECTED_EDGE_MALFORMED_HEADER_ERROR_CODE = LambdaError.EdgeRequiredHeaders.value[
29+
"err_code"
30+
]
31+
EXPECTED_EDGE_NO_ORIGIN_ERROR_MESSAGE = LambdaError.EdgeNoOrigin.value["message"]
32+
EXPECTED_EDGE_NO_ORIGIN_ERROR_CODE = LambdaError.EdgeNoOrigin.value["err_code"]
1033

1134
EXPECTED_EDGE_NO_CLIENT_ERROR_MESSAGE = LambdaError.EdgeNoClient.value["message"]
12-
1335
EXPECTED_EDGE_NO_CLIENT_ERROR_CODE = LambdaError.EdgeNoClient.value["err_code"]
36+
EXPECTED_EDGE_MALFORMED_ERROR_MESSAGE = LambdaError.EdgeMalformed.value["message"]
37+
EXPECTED_EDGE_MALFORMED_ERROR_CODE = LambdaError.EdgeMalformed.value["err_code"]
1438

15-
EXPECTED_DYNAMO_DB_CONDITION_EXPRESSION = (
16-
"attribute_not_exists(IsRequested) OR IsRequested = :false"
17-
)
18-
EXPECTED_DYNAMO_DB_EXPRESSION_ATTRIBUTE_VALUES = {":false": False}
19-
20-
EXPECTED_SSM_PARAMETER_KEY = "EDGE_REFERENCE_TABLE"
21-
22-
EXPECTED_SUCCESS_RESPONSE = None
2339

24-
VALID_EVENT_MODEL = {
40+
MOCK_S3_EDGE_EVENT = {
2541
"Records": [
2642
{
2743
"cf": {
2844
"request": {
29-
"headers": {
30-
"authorization": [
31-
{"key": "Authorization", "value": "Bearer token"}
32-
],
33-
"host": [{"key": "Host", "value": NHS_DOMAIN}],
34-
},
35-
"querystring": f"origin=https://test.{NHS_DOMAIN}&other=param",
45+
"headers": MOCKED_HEADERS,
46+
"querystring": MOCKED_AUTH_QUERY,
3647
"uri": "/some/path",
37-
}
38-
}
39-
}
40-
]
41-
}
42-
43-
MISSING_ORIGIN_EVENT_MODEL = {
44-
"Records": [
45-
{
46-
"cf": {
47-
"request": {
48-
"headers": {
49-
"authorization": [
50-
{"key": "Authorization", "value": "Bearer token"}
51-
],
52-
"host": [{"key": "Host", "value": NHS_DOMAIN}],
48+
"origin": {
49+
"s3": {
50+
"authMethod": "none",
51+
"customHeaders": {},
52+
"domainName": MOCKED_LG_BUCKET_URL,
53+
"path": "",
54+
}
5355
},
54-
"querystring": "other=param",
55-
"uri": "/some/path",
5656
}
5757
}
5858
}

0 commit comments

Comments
 (0)