22# Handles the transformation logic for each field based on the schema
33# Root and base type expression checker functions
44import ExceptionMessages
5- from datetime import datetime ,timedelta
5+ from datetime import datetime ,timezone
66from zoneinfo import ZoneInfo
77import re
88from LookUpData import LookUpData
@@ -72,10 +72,18 @@ def convertData(self, expressionType, expressionRule, fieldName, fieldValue):
7272 return self ._convertToChangeTo (
7373 expressionRule , fieldName , fieldValue , self .summarise , self .report_unexpected_exception
7474 )
75+ case "BOOLEAN" :
76+ return self ._convertToBoolean (
77+ expressionRule , fieldName , fieldValue , self .summarise , self .report_unexpected_exception
78+ )
7579 case "LOOKUP" :
7680 return self ._convertToLookUp (
7781 expressionRule , fieldName , fieldValue , self .summarise , self .report_unexpected_exception
7882 )
83+ case "SNOMED" :
84+ return self ._convertToSnomed (
85+ expressionRule , fieldName , fieldValue , self .summarise , self .report_unexpected_exception
86+ )
7987 case "DEFAULT" :
8088 return self ._convertToDefaultTo (
8189 expressionRule , fieldName , fieldValue , self .summarise , self .report_unexpected_exception
@@ -85,25 +93,116 @@ def convertData(self, expressionType, expressionRule, fieldName, fieldValue):
8593 expressionRule , fieldName , fieldValue , self .summarise , self .report_unexpected_exception
8694 )
8795 case _:
88- return "Schema expression not found! Check your expression type : " + expressionType
96+ raise ValueError ("Schema expression not found! Check your expression type : " + expressionType )
97+
98+ # Utility function for logging errors
99+ def _log_error (self , fieldName , fieldValue , e , code = ExceptionMessages .RECORD_CHECK_FAILED ):
100+ if isinstance (e , Exception ):
101+ message = ExceptionMessages .MESSAGES [ExceptionMessages .UNEXPECTED_EXCEPTION ] % (e .__class__ .__name__ , str (e ))
102+ else :
103+ message = str (e ) # if a simple string message was passed
104+
105+ self .errorRecords .append ({
106+ "code" : code ,
107+ "field" : fieldName ,
108+ "value" : fieldValue ,
109+ "message" : message
110+ })
89111
90- # Convert ISO date string to a specific format (e.g. YYYYMMDD)
91112 def _convertToDate (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
92113 if not fieldValue :
93114 return ""
94115
95116 if not isinstance (fieldValue , str ):
117+ if report_unexpected_exception :
118+ self ._log_error (fieldName , fieldValue , "Value is not a string" )
96119 return ""
97- # Reject partial dates like "2024" or "2024-05"
98- if re .match (r"^\d{4}(-\d{2})?$" , fieldValue ):
120+
121+ # Normalize expression rule
122+ format_str = expressionRule .replace ("format:" , "" ).strip ()
123+
124+ # Reject partial ISO dates like "2024" or "2024-05"
125+ if format_str == "%Y%m%d" and re .match (r"^\d{4}(-\d{2})?$" , fieldValue ):
126+ if report_unexpected_exception :
127+ self ._log_error (fieldName , fieldValue , "Partial date not accepted" )
99128 return ""
129+
130+ # Handle only the recorded field with extended ISO + timezone support
131+ if fieldName == "recorded" :
132+ # Accept "YYYY-MM-DD" and return as is
133+ if re .match (r"^\d{4}-\d{2}-\d{2}$" , fieldValue ):
134+ try :
135+ dt = datetime .strptime (fieldValue , "%Y-%m-%d" )
136+ if dt .date () > datetime .now (ZoneInfo ("UTC" )).date ():
137+ if report_unexpected_exception :
138+ self ._log_error (fieldName , fieldValue , "Date cannot be in the future" )
139+ return ""
140+ return fieldValue
141+ except ValueError :
142+ if report_unexpected_exception :
143+ self ._log_error (fieldName , fieldValue , "Invalid date format" )
144+ return ""
145+ try :
146+ # Parse ISO format with or without microseconds and TZ
147+ dt = datetime .fromisoformat (fieldValue )
148+ except ValueError :
149+ if report_unexpected_exception :
150+ self ._log_error (fieldName , fieldValue , "Invalid date format" )
151+ return ""
152+
153+ # Assign UTC if tzinfo is missing
154+ if dt .tzinfo is None :
155+ dt = dt .replace (tzinfo = ZoneInfo ("UTC" ))
156+
157+ now_utc = datetime .now (ZoneInfo ("UTC" ))
158+ if dt .astimezone (ZoneInfo ("UTC" )) > now_utc :
159+ if report_unexpected_exception :
160+ self ._log_error (fieldName , fieldValue , "Date cannot be in the future" )
161+ return ""
162+
163+ # Validate timezone offset
164+ offset = dt .utcoffset ()
165+ allowed_offsets = [
166+ ZoneInfo ("UTC" ).utcoffset (dt ),
167+ ZoneInfo ("Europe/London" ).utcoffset (dt ),
168+ ]
169+
170+ if offset not in allowed_offsets :
171+ if report_unexpected_exception :
172+ self ._log_error (fieldName , fieldValue , f"Unsupported offset: { offset } " )
173+ return ""
174+
175+ dt_utc = dt .astimezone (ZoneInfo ("UTC" )).replace (microsecond = 0 )
176+
177+ # Format and return with custom suffix
178+ formatted = dt_utc .strftime ("%Y%m%dT%H%M%S%z" )
179+ return formatted .replace ("+0000" , "00" ).replace ("+0100" , "01" )
180+
181+ # For all other fields, apply standard %Y%m%d processing
182+ if format_str == "%Y%m%d" :
183+ fieldValue = fieldValue .replace ("-" , "" ).replace ("/" , "" )
184+ # Validate expected raw input format if using %Y%m%d
185+ if not re .match (r"^\d{8}$" , fieldValue ):
186+ if report_unexpected_exception :
187+ self ._log_error (fieldName , fieldValue , "Date must be in YYYYMMDD format" )
188+ return ""
189+
100190 try :
101- dt = datetime .fromisoformat (fieldValue )
102- format_str = expressionRule .replace ("format:" , "" )
191+ dt = datetime .strptime (fieldValue , format_str )
192+
193+ # Reject future dates if the field is BirthDate
194+ if fieldName in "contained|#:Patient|birthDate" :
195+ today_utc = datetime .now (ZoneInfo ("UTC" )).date ()
196+ if dt .date () > today_utc :
197+ if report_unexpected_exception :
198+ self ._log_error (fieldName , fieldValue , "Birthdate cannot be in the future" )
199+ return ""
200+
103201 return dt .strftime (format_str )
104- except ValueError :
202+ except ValueError as e :
105203 if report_unexpected_exception :
106- return f"Unexpected format: { fieldValue } "
204+ self ._log_error (fieldName , fieldValue , e )
205+ return ""
107206
108207 # Convert FHIR datetime into CSV-safe UTC format
109208 def _convertToDateTime (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
@@ -148,13 +247,15 @@ def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, r
148247 # Not Empty Validate - Returns exactly what is in the extracted fields no parsing or logic needed
149248 def _convertToNotEmpty (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
150249 try :
151- if len ( str ( fieldValue )) > 0 :
250+ if isinstance ( fieldValue , str ) and fieldValue . strip () :
152251 return fieldValue
252+ self ._log_error (fieldName , fieldValue , "Value not a String" )
153253 return ""
154254 except Exception as e :
155255 if report_unexpected_exception :
156256 message = ExceptionMessages .MESSAGES [ExceptionMessages .UNEXPECTED_EXCEPTION ] % (e .__class__ .__name__ , e )
157- return message
257+ self ._log_error (fieldName , fieldValue , message )
258+ return
158259
159260 # NHSNumber Validate
160261 def _convertToNHSNumber (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
@@ -164,7 +265,7 @@ def _convertToNHSNumber(self, expressionRule, fieldName, fieldValue, summarise,
164265 # If it is outright empty, return back an empty string
165266 if not fieldValue :
166267 return ""
167-
268+
168269 try :
169270 regexRule = r"^\d{10}$"
170271 if isinstance (fieldValue , str ) and re .fullmatch (regexRule , fieldValue ):
@@ -197,7 +298,7 @@ def _convertToGender(self, expressionRule, fieldName, fieldValue, summarise, rep
197298 "other" : "9" ,
198299 "unknown" : "0"
199300 }
200-
301+
201302 # Normalize input
202303 normalized_gender = str (fieldValue ).lower ()
203304
@@ -223,21 +324,20 @@ def _convertToDose(self, expressionRule, fieldName, fieldValue, summarise, repor
223324 return fieldValue
224325 return ""
225326
226- # Change to Lookup
327+ # Change to Lookup (loads expected data as is but if empty use lookup extraction to populate value)
227328 def _convertToLookUp (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
329+ if isinstance (fieldValue , str ) and any (char .isalpha () for char in fieldValue ) and not fieldValue .isdigit ():
330+ return fieldValue
228331 try :
229- if fieldValue != "" :
230- return fieldValue
231- try :
232332 lookUpValue = self .dataParser .getKeyValue (expressionRule )
233333 IdentifiedLookup = self .dataLookUp .findLookUp (lookUpValue [0 ])
234334 return IdentifiedLookup
235- except :
236- return ""
335+
237336 except Exception as e :
238337 if report_unexpected_exception :
239338 message = ExceptionMessages .MESSAGES [ExceptionMessages .UNEXPECTED_EXCEPTION ] % (e .__class__ .__name__ , e )
240- return message
339+ self ._log_error (fieldName , fieldValue , message )
340+ return ""
241341
242342 # Default to Validate
243343 def _convertToDefaultTo (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
@@ -266,3 +366,41 @@ def _convertToOnlyIfTo(self, expressionRule, fieldName, fieldValue, summarise, r
266366 if report_unexpected_exception :
267367 message = ExceptionMessages .MESSAGES [ExceptionMessages .UNEXPECTED_EXCEPTION ] % (e .__class__ .__name__ , e )
268368 return message
369+
370+ # Check if Snomed code is numeric and reject other forms
371+ def _convertToSnomed (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
372+ """
373+ Validates that a SNOMED code is a non-empty string containing only digits.
374+ """
375+ try :
376+ if not fieldValue :
377+ return fieldValue
378+ if not isinstance (fieldValue , str ) or not fieldValue .isdigit ():
379+ raise ValueError (f"Invalid SNOMED code: { fieldValue } " )
380+ return fieldValue
381+ except Exception as e :
382+ if report_unexpected_exception :
383+ message = ExceptionMessages .MESSAGES [ExceptionMessages .UNEXPECTED_EXCEPTION ] % (e .__class__ .__name__ , e )
384+ self ._log_error (fieldName , fieldValue , message )
385+ return ""
386+
387+ # Check if Input is boolean or if input is a string with true or false, convert to Boolean
388+ def _convertToBoolean (self , expressionRule , fieldName , fieldValue , summarise , report_unexpected_exception ):
389+ try :
390+ if isinstance (fieldValue , bool ):
391+ return fieldValue
392+
393+ if str (fieldValue ).strip ().lower () == "true" :
394+ return True
395+ if str (fieldValue ).strip ().lower () == "false" :
396+ return False
397+ elif report_unexpected_exception :
398+ self ._log_error (fieldName , fieldValue , "Invalid String Data" )
399+ return ""
400+ except Exception as e :
401+ if report_unexpected_exception :
402+ self ._log_error (fieldName , fieldValue , e )
403+ return ""
404+
405+ def get_error_records (self ):
406+ return self .errorRecords
0 commit comments