Skip to content

Commit b49f3fb

Browse files
authored
[NDR-100] Document Reference metadata changes (#654)
* [NDR-100] migrate NhsDocumentReference to use DocumentReference * [NDR-100] migrate SearchDocumentReference to use DocumentReference
1 parent d6d8e2d commit b49f3fb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1620
-1457
lines changed

lambdas/enums/dynamo_filter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class AttributeOperator(Enum):
99
LESS_THAN = "lt"
1010
LESS_THAN_OR_EQUAL = "lte"
1111
IN = "is_in"
12+
NOT_EXISTS = "not_exists"
1213

1314

1415
class ConditionOperator(Enum):

lambdas/enums/lambda_error.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,22 @@
88

99
class LambdaError(Enum):
1010
def create_error_response(
11-
self, params: Optional[dict] = None, **kwargs) -> ErrorResponse:
11+
self, params: Optional[dict] = None, **kwargs
12+
) -> ErrorResponse:
1213
err_code = self.value["err_code"]
1314
message = self.value["message"]
1415
if "%" in message and params:
1516
message = message % params
1617
interaction_id = getattr(request_context, "request_id", None)
1718
error_response = ErrorResponse(
18-
err_code=err_code,
19-
message=message,
20-
interaction_id=interaction_id,
21-
**kwargs
19+
err_code=err_code, message=message, interaction_id=interaction_id, **kwargs
2220
)
2321
return error_response
2422

2523
def to_str(self) -> str:
2624
return f"[{self.value['err_code']}] {self.value['message']}"
2725

28-
def create_error_body(
29-
self, params: Optional[dict] = None, **kwargs
30-
) -> str:
26+
def create_error_body(self, params: Optional[dict] = None, **kwargs) -> str:
3127
return self.create_error_response(params, **kwargs).create()
3228

3329
"""

lambdas/enums/snomed_codes.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from enum import Enum
2+
from typing import Optional
23

34
from pydantic import BaseModel
45

@@ -16,3 +17,19 @@ class SnomedCodes(Enum):
1617
GENERAL_MEDICAL_PRACTICE = SnomedCode(
1718
code="1060971000000108", display_name="General practice service"
1819
)
20+
21+
@classmethod
22+
def find_by_code(cls, code: str) -> Optional["SnomedCode"]:
23+
"""
24+
Find a SnomedCodes enum value by its code string.
25+
26+
Args:
27+
code: The SNOMED code string to search for (e.g. "16521000000101")
28+
29+
Returns:
30+
The matching SnomedCodes enum value or None if not found
31+
"""
32+
for snomed_enum in cls:
33+
if snomed_enum.value.code == code:
34+
return snomed_enum.value
35+
return None

lambdas/enums/virus_scan_result.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from enum import Enum
1+
from enum import StrEnum
22

33

4-
class VirusScanResult(str, Enum):
4+
class VirusScanResult(StrEnum):
55
CLEAN = "Clean"
66
INFECTED = "Infected"
77
INFECTED_ALLOWED = "InfectedAllowed"

lambdas/models/access_audit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717

1818
class AccessAuditReason(BaseModel):
19-
model_config = ConfigDict(populate_by_name=True, alias_generator=to_pascal)
19+
model_config = ConfigDict(validate_by_name=True, alias_generator=to_pascal)
2020
nhs_number: str = Field(exclude=True)
2121
request_type: AccessAuditRequestType = Field(exclude=True)
2222
user_session_id: str
Lines changed: 126 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,27 @@
11
import pathlib
22
from datetime import datetime, timezone
3-
from typing import Optional
3+
from typing import Literal, Optional
44

55
from enums.metadata_field_names import DocumentReferenceMetadataFields
6+
from enums.snomed_codes import SnomedCodes
67
from enums.supported_document_types import SupportedDocumentTypes
7-
from pydantic import BaseModel, ConfigDict, Field
8+
from pydantic import BaseModel, ConfigDict, Field, model_validator
89
from pydantic.alias_generators import to_camel, to_pascal
9-
from utils.exceptions import InvalidDocumentReferenceException
10+
11+
# Constants
12+
DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
13+
DEFAULT_CONTENT_TYPE = "application/pdf"
14+
S3_PREFIX = "s3://"
15+
THREE_MINUTES_IN_SECONDS = 60 * 3
16+
17+
18+
class UploadRequestDocument(BaseModel):
19+
model_config = ConfigDict(use_enum_values=True)
20+
21+
fileName: str
22+
contentType: str
23+
docType: SupportedDocumentTypes
24+
clientId: str
1025

1126

1227
class UploadDocumentReference(BaseModel):
@@ -19,43 +34,115 @@ class UploadDocumentReferences(BaseModel):
1934
files: list[UploadDocumentReference] = Field(...)
2035

2136

22-
class SearchDocumentReference(BaseModel):
23-
model_config = ConfigDict(
24-
alias_generator=to_camel,
25-
populate_by_name=True,
26-
)
27-
id: str
28-
created: str
29-
file_name: str
30-
virus_scanner_result: str
31-
file_size: int
32-
33-
3437
class DocumentReference(BaseModel):
3538
model_config = ConfigDict(
39+
validate_by_alias=True,
40+
validate_by_name=True,
3641
alias_generator=to_pascal,
3742
use_enum_values=True,
38-
populate_by_name=True,
3943
)
4044

4145
id: str = Field(..., alias=str(DocumentReferenceMetadataFields.ID.value))
42-
content_type: str
43-
created: str
44-
deleted: str
45-
file_location: str
46+
author: str = Field(default=None, exclude=True)
47+
content_type: str = Field(default=DEFAULT_CONTENT_TYPE)
48+
created: str = Field(
49+
default_factory=lambda: datetime.now(timezone.utc).strftime(DATE_FORMAT)
50+
)
51+
document_scan_creation: str = Field(
52+
default_factory=lambda: datetime.date(datetime.now()).isoformat(),
53+
)
54+
current_gp_ods: str = Field(default=None)
55+
custodian: str = Field(default=None)
56+
deleted: str = Field(default=None)
57+
doc_status: Literal[
58+
"registered",
59+
"partial",
60+
"preliminary",
61+
"final",
62+
"amended",
63+
"corrected",
64+
"appended",
65+
"cancelled",
66+
"entered-in-error",
67+
"deprecated",
68+
"unknown",
69+
] = Field(default="preliminary")
70+
doc_type: str = Field(default=None, exclude=True)
71+
document_snomed_code_type: Optional[str] = Field(
72+
default=SnomedCodes.LLOYD_GEORGE.value.code
73+
)
74+
file_location: str = ""
4675
file_name: str
76+
file_size: int = Field(default=None)
77+
last_updated: int = Field(
78+
default_factory=lambda: int(datetime.now(timezone.utc).timestamp()),
79+
)
4780
nhs_number: str
81+
s3_bucket_name: str = Field(exclude=True, default=None)
82+
s3_file_key: str = Field(exclude=True, default=None)
83+
status: Literal["current", "superseded", "entered-in-error"] = Field(
84+
default="current"
85+
)
86+
sub_folder: str = Field(default=None, exclude=True)
4887
ttl: Optional[int] = Field(
4988
alias=str(DocumentReferenceMetadataFields.TTL.value), default=None
5089
)
51-
virus_scanner_result: str
52-
# Allow current_gp_ods to be nullable so that we can cope with existing records.
53-
# After we updated all existing records with this field, consider to set this as non-Optional
54-
current_gp_ods: Optional[str] = None
55-
uploaded: bool
56-
uploading: bool
57-
last_updated: int
90+
uploaded: bool = Field(default=False)
91+
uploading: bool = Field(default=False)
92+
version: str = Field(default="1")
93+
virus_scanner_result: str = Field(default=None)
94+
95+
def model_dump_camel_case(self, *args, **kwargs):
96+
model_dump_results = self.model_dump(*args, **kwargs)
97+
camel_case_model_dump_results = {}
98+
for key in model_dump_results:
99+
camel_case_model_dump_results[to_camel(key)] = model_dump_results[key]
100+
return camel_case_model_dump_results
101+
102+
@model_validator(mode="before")
103+
@classmethod
104+
def set_location_properties(cls, data, *args, **kwargs):
105+
"""Set S3 location properties based on available data."""
106+
if "file_location" in data or "FileLocation" in data:
107+
file_location = data.get("file_location") or data.get("FileLocation")
108+
bucket, key = cls._parse_s3_location(file_location)
109+
data["s3_bucket_name"] = bucket
110+
data["s3_file_key"] = key
111+
elif "s3_bucket_name" in data:
112+
data["s3_file_key"] = cls._build_s3_key(data)
113+
data["file_location"] = cls._build_s3_location(
114+
data["s3_bucket_name"], data["s3_file_key"]
115+
)
116+
return data
58117

118+
@staticmethod
119+
def _parse_s3_location(file_location: str) -> list[str]:
120+
"""Parse S3 location into bucket and key components."""
121+
location_without_prefix = file_location.replace(S3_PREFIX, "")
122+
return location_without_prefix.split("/", 1)
123+
124+
@staticmethod
125+
def _build_s3_key(data: dict) -> str:
126+
"""Build the S3 key from document data."""
127+
key_parts = []
128+
129+
if "sub_folder" in data:
130+
key_parts.append(data["sub_folder"])
131+
if "doc_type" in data:
132+
key_parts.append(data["doc_type"])
133+
134+
key_parts.extend([data["nhs_number"], data["id"]])
135+
s3_key = "/".join(key_parts)
136+
137+
return s3_key
138+
139+
@staticmethod
140+
def _build_s3_location(bucket: str, key: str) -> str:
141+
"""Build a complete S3 location from bucket and key."""
142+
normalized_key = key[1:] if key.startswith("/") else key
143+
return f"{S3_PREFIX}{bucket}/{normalized_key}"
144+
145+
# File path handling methods
59146
def get_file_name_path(self):
60147
return pathlib.Path(self.file_name)
61148

@@ -65,54 +152,21 @@ def get_base_name(self):
65152
def get_file_extension(self):
66153
return self.get_file_name_path().suffix
67154

68-
def get_file_bucket(self):
69-
try:
70-
file_bucket = self.file_location.replace("s3://", "").split("/")[0]
71-
if file_bucket:
72-
return file_bucket
73-
raise InvalidDocumentReferenceException(
74-
"Failed to parse bucket from file location"
75-
)
76-
except IndexError:
77-
raise InvalidDocumentReferenceException(
78-
"Failed to parse bucket from file location"
79-
)
80-
81-
def get_file_key(self):
82-
try:
83-
file_key = self.file_location.replace("s3://", "").split("/", 1)[1]
84-
if file_key:
85-
return file_key
86-
raise InvalidDocumentReferenceException(
87-
"Failed to parse object key from file location"
88-
)
89-
except IndexError:
90-
raise InvalidDocumentReferenceException(
91-
"Failed to parse object key from file location"
92-
)
93-
94155
def create_unique_filename(self, duplicates: int):
95156
return f"{self.get_base_name()}({duplicates}){self.get_file_extension()}"
96157

158+
# Status methods
97159
def last_updated_within_three_minutes(self) -> bool:
98-
three_minutes_ago = datetime.now(timezone.utc).timestamp() - 60 * 3
160+
three_minutes_ago = (
161+
datetime.now(timezone.utc).timestamp() - THREE_MINUTES_IN_SECONDS
162+
)
99163
return self.last_updated >= three_minutes_ago
100164

101-
def __eq__(self, other):
102-
if isinstance(other, DocumentReference):
103-
return (
104-
self.id == other.id
105-
and self.content_type == other.content_type
106-
and self.created == other.created
107-
and self.deleted == other.deleted
108-
and self.file_location == other.file_location
109-
and self.file_name == other.file_name
110-
and self.nhs_number == other.nhs_number
111-
and self.ttl == other.ttl
112-
and self.virus_scanner_result == other.virus_scanner_result
113-
and self.current_gp_ods == other.current_gp_ods
114-
and self.uploaded == other.uploaded
115-
and self.uploading == other.uploading
116-
and self.last_updated == other.last_updated
117-
)
118-
return False
165+
def set_deleted(self) -> None:
166+
self.deleted = datetime.now(timezone.utc).strftime(DATE_FORMAT)
167+
168+
def set_virus_scanner_result(self, updated_virus_scanner_result) -> None:
169+
self.virus_scanner_result = updated_virus_scanner_result
170+
171+
def set_uploaded_to_true(self):
172+
self.uploaded = True

lambdas/models/feedback_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
class Feedback(BaseModel):
8-
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
8+
model_config = ConfigDict(validate_by_name=True, alias_generator=to_camel)
99

1010
feedback_content: str
1111
experience: str = Field(alias="howSatisfied")

lambdas/models/fhir/R4/fhir_document_reference.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import datetime
21
from typing import Any, Dict, List, Literal, Optional
32

43
from enums.snomed_codes import SnomedCode, SnomedCodes
@@ -113,7 +112,7 @@ class DocumentReference(BaseModel):
113112
"deprecated",
114113
"unknown",
115114
] = "final"
116-
status: Literal["current"] = "current"
115+
status: Literal["current", "superseded"] = "current"
117116
type: Optional[CodeableConcept] = None
118117
category: Optional[List[CodeableConcept]] = None
119118
subject: Optional[Reference] = None
@@ -228,18 +227,23 @@ def create_fhir_document_reference_object(
228227

229228
return DocumentReference(
230229
id=document.id,
230+
docStatus=document.doc_status,
231231
type=CodeableConcept(
232232
coding=self._create_snomed_coding(self.snomed_code_doc_type)
233233
),
234234
subject=Reference(**self._create_identifier("nhs-number", self.nhs_number)),
235235
content=[DocumentReferenceContent(attachment=self.attachment)],
236-
date=datetime.datetime.now().isoformat(),
236+
date=document.created,
237237
author=[
238238
Reference(
239-
**self._create_identifier("ods-organization-code", self.custodian)
239+
**self._create_identifier(
240+
"ods-organization-code", document.author or self.custodian
241+
)
240242
)
241243
],
242244
custodian=Reference(
243-
**self._create_identifier("ods-organization-code", self.custodian)
245+
**self._create_identifier(
246+
"ods-organization-code", document.custodian or self.custodian
247+
)
244248
),
245249
)

0 commit comments

Comments
 (0)