11import pathlib
22from datetime import datetime , timezone
3- from typing import Optional
3+ from typing import Literal , Optional
44
55from enums .metadata_field_names import DocumentReferenceMetadataFields
6+ from enums .snomed_codes import SnomedCodes
67from enums .supported_document_types import SupportedDocumentTypes
7- from pydantic import BaseModel , ConfigDict , Field
8+ from pydantic import BaseModel , ConfigDict , Field , model_validator
89from pydantic .alias_generators import to_camel , to_pascal
9- from utils .exceptions import InvalidDocumentReferenceException
10+
11+ # Constants
12+ DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
13+ DEFAULT_CONTENT_TYPE = "application/pdf"
14+ S3_PREFIX = "s3://"
15+ THREE_MINUTES_IN_SECONDS = 60 * 3
16+
17+
18+ class UploadRequestDocument (BaseModel ):
19+ model_config = ConfigDict (use_enum_values = True )
20+
21+ fileName : str
22+ contentType : str
23+ docType : SupportedDocumentTypes
24+ clientId : str
1025
1126
1227class UploadDocumentReference (BaseModel ):
@@ -19,43 +34,115 @@ class UploadDocumentReferences(BaseModel):
1934 files : list [UploadDocumentReference ] = Field (...)
2035
2136
22- class SearchDocumentReference (BaseModel ):
23- model_config = ConfigDict (
24- alias_generator = to_camel ,
25- populate_by_name = True ,
26- )
27- id : str
28- created : str
29- file_name : str
30- virus_scanner_result : str
31- file_size : int
32-
33-
3437class DocumentReference (BaseModel ):
3538 model_config = ConfigDict (
39+ validate_by_alias = True ,
40+ validate_by_name = True ,
3641 alias_generator = to_pascal ,
3742 use_enum_values = True ,
38- populate_by_name = True ,
3943 )
4044
4145 id : str = Field (..., alias = str (DocumentReferenceMetadataFields .ID .value ))
42- content_type : str
43- created : str
44- deleted : str
45- file_location : str
46+ author : str = Field (default = None , exclude = True )
47+ content_type : str = Field (default = DEFAULT_CONTENT_TYPE )
48+ created : str = Field (
49+ default_factory = lambda : datetime .now (timezone .utc ).strftime (DATE_FORMAT )
50+ )
51+ document_scan_creation : str = Field (
52+ default_factory = lambda : datetime .date (datetime .now ()).isoformat (),
53+ )
54+ current_gp_ods : str = Field (default = None )
55+ custodian : str = Field (default = None )
56+ deleted : str = Field (default = None )
57+ doc_status : Literal [
58+ "registered" ,
59+ "partial" ,
60+ "preliminary" ,
61+ "final" ,
62+ "amended" ,
63+ "corrected" ,
64+ "appended" ,
65+ "cancelled" ,
66+ "entered-in-error" ,
67+ "deprecated" ,
68+ "unknown" ,
69+ ] = Field (default = "preliminary" )
70+ doc_type : str = Field (default = None , exclude = True )
71+ document_snomed_code_type : Optional [str ] = Field (
72+ default = SnomedCodes .LLOYD_GEORGE .value .code
73+ )
74+ file_location : str = ""
4675 file_name : str
76+ file_size : int = Field (default = None )
77+ last_updated : int = Field (
78+ default_factory = lambda : int (datetime .now (timezone .utc ).timestamp ()),
79+ )
4780 nhs_number : str
81+ s3_bucket_name : str = Field (exclude = True , default = None )
82+ s3_file_key : str = Field (exclude = True , default = None )
83+ status : Literal ["current" , "superseded" , "entered-in-error" ] = Field (
84+ default = "current"
85+ )
86+ sub_folder : str = Field (default = None , exclude = True )
4887 ttl : Optional [int ] = Field (
4988 alias = str (DocumentReferenceMetadataFields .TTL .value ), default = None
5089 )
51- virus_scanner_result : str
52- # Allow current_gp_ods to be nullable so that we can cope with existing records.
53- # After we updated all existing records with this field, consider to set this as non-Optional
54- current_gp_ods : Optional [str ] = None
55- uploaded : bool
56- uploading : bool
57- last_updated : int
90+ uploaded : bool = Field (default = False )
91+ uploading : bool = Field (default = False )
92+ version : str = Field (default = "1" )
93+ virus_scanner_result : str = Field (default = None )
94+
95+ def model_dump_camel_case (self , * args , ** kwargs ):
96+ model_dump_results = self .model_dump (* args , ** kwargs )
97+ camel_case_model_dump_results = {}
98+ for key in model_dump_results :
99+ camel_case_model_dump_results [to_camel (key )] = model_dump_results [key ]
100+ return camel_case_model_dump_results
101+
102+ @model_validator (mode = "before" )
103+ @classmethod
104+ def set_location_properties (cls , data , * args , ** kwargs ):
105+ """Set S3 location properties based on available data."""
106+ if "file_location" in data or "FileLocation" in data :
107+ file_location = data .get ("file_location" ) or data .get ("FileLocation" )
108+ bucket , key = cls ._parse_s3_location (file_location )
109+ data ["s3_bucket_name" ] = bucket
110+ data ["s3_file_key" ] = key
111+ elif "s3_bucket_name" in data :
112+ data ["s3_file_key" ] = cls ._build_s3_key (data )
113+ data ["file_location" ] = cls ._build_s3_location (
114+ data ["s3_bucket_name" ], data ["s3_file_key" ]
115+ )
116+ return data
58117
118+ @staticmethod
119+ def _parse_s3_location (file_location : str ) -> list [str ]:
120+ """Parse S3 location into bucket and key components."""
121+ location_without_prefix = file_location .replace (S3_PREFIX , "" )
122+ return location_without_prefix .split ("/" , 1 )
123+
124+ @staticmethod
125+ def _build_s3_key (data : dict ) -> str :
126+ """Build the S3 key from document data."""
127+ key_parts = []
128+
129+ if "sub_folder" in data :
130+ key_parts .append (data ["sub_folder" ])
131+ if "doc_type" in data :
132+ key_parts .append (data ["doc_type" ])
133+
134+ key_parts .extend ([data ["nhs_number" ], data ["id" ]])
135+ s3_key = "/" .join (key_parts )
136+
137+ return s3_key
138+
139+ @staticmethod
140+ def _build_s3_location (bucket : str , key : str ) -> str :
141+ """Build a complete S3 location from bucket and key."""
142+ normalized_key = key [1 :] if key .startswith ("/" ) else key
143+ return f"{ S3_PREFIX } { bucket } /{ normalized_key } "
144+
145+ # File path handling methods
59146 def get_file_name_path (self ):
60147 return pathlib .Path (self .file_name )
61148
@@ -65,54 +152,21 @@ def get_base_name(self):
65152 def get_file_extension (self ):
66153 return self .get_file_name_path ().suffix
67154
68- def get_file_bucket (self ):
69- try :
70- file_bucket = self .file_location .replace ("s3://" , "" ).split ("/" )[0 ]
71- if file_bucket :
72- return file_bucket
73- raise InvalidDocumentReferenceException (
74- "Failed to parse bucket from file location"
75- )
76- except IndexError :
77- raise InvalidDocumentReferenceException (
78- "Failed to parse bucket from file location"
79- )
80-
81- def get_file_key (self ):
82- try :
83- file_key = self .file_location .replace ("s3://" , "" ).split ("/" , 1 )[1 ]
84- if file_key :
85- return file_key
86- raise InvalidDocumentReferenceException (
87- "Failed to parse object key from file location"
88- )
89- except IndexError :
90- raise InvalidDocumentReferenceException (
91- "Failed to parse object key from file location"
92- )
93-
94155 def create_unique_filename (self , duplicates : int ):
95156 return f"{ self .get_base_name ()} ({ duplicates } ){ self .get_file_extension ()} "
96157
158+ # Status methods
97159 def last_updated_within_three_minutes (self ) -> bool :
98- three_minutes_ago = datetime .now (timezone .utc ).timestamp () - 60 * 3
160+ three_minutes_ago = (
161+ datetime .now (timezone .utc ).timestamp () - THREE_MINUTES_IN_SECONDS
162+ )
99163 return self .last_updated >= three_minutes_ago
100164
101- def __eq__ (self , other ):
102- if isinstance (other , DocumentReference ):
103- return (
104- self .id == other .id
105- and self .content_type == other .content_type
106- and self .created == other .created
107- and self .deleted == other .deleted
108- and self .file_location == other .file_location
109- and self .file_name == other .file_name
110- and self .nhs_number == other .nhs_number
111- and self .ttl == other .ttl
112- and self .virus_scanner_result == other .virus_scanner_result
113- and self .current_gp_ods == other .current_gp_ods
114- and self .uploaded == other .uploaded
115- and self .uploading == other .uploading
116- and self .last_updated == other .last_updated
117- )
118- return False
165+ def set_deleted (self ) -> None :
166+ self .deleted = datetime .now (timezone .utc ).strftime (DATE_FORMAT )
167+
168+ def set_virus_scanner_result (self , updated_virus_scanner_result ) -> None :
169+ self .virus_scanner_result = updated_virus_scanner_result
170+
171+ def set_uploaded_to_true (self ):
172+ self .uploaded = True
0 commit comments