Skip to content

Commit f10088d

Browse files
authored
VED-165-FHIR-TO-FLAT-JSON3-SNOMED (#386)
* snomed validation added * modifying primarysource field and unit test fixes * finalize edge cases * new field and extended error handling * birthdate validation * recorded date validation * unit test patch * remove skipped test * remove whitespace * correct unit test * improve coverage * whitespaces removed * reviewed changes * primary source logic * coverage test * overview check * review changes2 * remove whitespace * error logs
1 parent 54f63fa commit f10088d

File tree

7 files changed

+464
-88
lines changed

7 files changed

+464
-88
lines changed

delta_backend/.coverage

-52 KB
Binary file not shown.

delta_backend/src/ConversionChecker.py

Lines changed: 158 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Handles the transformation logic for each field based on the schema
33
# Root and base type expression checker functions
44
import ExceptionMessages
5-
from datetime import datetime,timedelta
5+
from datetime import datetime,timezone
66
from zoneinfo import ZoneInfo
77
import re
88
from LookUpData import LookUpData
@@ -72,10 +72,18 @@ def convertData(self, expressionType, expressionRule, fieldName, fieldValue):
7272
return self._convertToChangeTo(
7373
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
7474
)
75+
case "BOOLEAN":
76+
return self._convertToBoolean(
77+
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
78+
)
7579
case "LOOKUP":
7680
return self._convertToLookUp(
7781
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
7882
)
83+
case "SNOMED":
84+
return self._convertToSnomed(
85+
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
86+
)
7987
case "DEFAULT":
8088
return self._convertToDefaultTo(
8189
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
@@ -85,25 +93,116 @@ def convertData(self, expressionType, expressionRule, fieldName, fieldValue):
8593
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
8694
)
8795
case _:
88-
return "Schema expression not found! Check your expression type : " + expressionType
96+
raise ValueError("Schema expression not found! Check your expression type : " + expressionType)
97+
98+
# Utility function for logging errors
99+
def _log_error(self, fieldName, fieldValue, e, code=ExceptionMessages.RECORD_CHECK_FAILED):
100+
if isinstance(e, Exception):
101+
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, str(e))
102+
else:
103+
message = str(e) # if a simple string message was passed
104+
105+
self.errorRecords.append({
106+
"code": code,
107+
"field": fieldName,
108+
"value": fieldValue,
109+
"message": message
110+
})
89111

90-
# Convert ISO date string to a specific format (e.g. YYYYMMDD)
91112
def _convertToDate(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
92113
if not fieldValue:
93114
return ""
94115

95116
if not isinstance(fieldValue, str):
117+
if report_unexpected_exception:
118+
self._log_error(fieldName, fieldValue, "Value is not a string")
96119
return ""
97-
# Reject partial dates like "2024" or "2024-05"
98-
if re.match(r"^\d{4}(-\d{2})?$", fieldValue):
120+
121+
# Normalize expression rule
122+
format_str = expressionRule.replace("format:", "").strip()
123+
124+
# Reject partial ISO dates like "2024" or "2024-05"
125+
if format_str == "%Y%m%d" and re.match(r"^\d{4}(-\d{2})?$", fieldValue):
126+
if report_unexpected_exception:
127+
self._log_error(fieldName, fieldValue, "Partial date not accepted")
99128
return ""
129+
130+
# Handle only the recorded field with extended ISO + timezone support
131+
if fieldName == "recorded":
132+
# Accept "YYYY-MM-DD" and return as is
133+
if re.match(r"^\d{4}-\d{2}-\d{2}$", fieldValue):
134+
try:
135+
dt = datetime.strptime(fieldValue, "%Y-%m-%d")
136+
if dt.date() > datetime.now(ZoneInfo("UTC")).date():
137+
if report_unexpected_exception:
138+
self._log_error(fieldName, fieldValue, "Date cannot be in the future")
139+
return ""
140+
return fieldValue
141+
except ValueError:
142+
if report_unexpected_exception:
143+
self._log_error(fieldName, fieldValue, "Invalid date format")
144+
return ""
145+
try:
146+
# Parse ISO format with or without microseconds and TZ
147+
dt = datetime.fromisoformat(fieldValue)
148+
except ValueError:
149+
if report_unexpected_exception:
150+
self._log_error(fieldName, fieldValue, "Invalid date format")
151+
return ""
152+
153+
# Assign UTC if tzinfo is missing
154+
if dt.tzinfo is None:
155+
dt = dt.replace(tzinfo=ZoneInfo("UTC"))
156+
157+
now_utc = datetime.now(ZoneInfo("UTC"))
158+
if dt.astimezone(ZoneInfo("UTC")) > now_utc:
159+
if report_unexpected_exception:
160+
self._log_error(fieldName, fieldValue, "Date cannot be in the future")
161+
return ""
162+
163+
# Validate timezone offset
164+
offset = dt.utcoffset()
165+
allowed_offsets = [
166+
ZoneInfo("UTC").utcoffset(dt),
167+
ZoneInfo("Europe/London").utcoffset(dt),
168+
]
169+
170+
if offset not in allowed_offsets:
171+
if report_unexpected_exception:
172+
self._log_error(fieldName, fieldValue, f"Unsupported offset: {offset}")
173+
return ""
174+
175+
dt_utc = dt.astimezone(ZoneInfo("UTC")).replace(microsecond=0)
176+
177+
# Format and return with custom suffix
178+
formatted = dt_utc.strftime("%Y%m%dT%H%M%S%z")
179+
return formatted.replace("+0000", "00").replace("+0100", "01")
180+
181+
# For all other fields, apply standard %Y%m%d processing
182+
if format_str == "%Y%m%d":
183+
fieldValue = fieldValue.replace("-", "").replace("/", "")
184+
# Validate expected raw input format if using %Y%m%d
185+
if not re.match(r"^\d{8}$", fieldValue):
186+
if report_unexpected_exception:
187+
self._log_error(fieldName, fieldValue, "Date must be in YYYYMMDD format")
188+
return ""
189+
100190
try:
101-
dt = datetime.fromisoformat(fieldValue)
102-
format_str = expressionRule.replace("format:", "")
191+
dt = datetime.strptime(fieldValue, format_str)
192+
193+
# Reject future dates if the field is BirthDate
194+
if fieldName in "contained|#:Patient|birthDate":
195+
today_utc = datetime.now(ZoneInfo("UTC")).date()
196+
if dt.date() > today_utc:
197+
if report_unexpected_exception:
198+
self._log_error(fieldName, fieldValue, "Birthdate cannot be in the future")
199+
return ""
200+
103201
return dt.strftime(format_str)
104-
except ValueError:
202+
except ValueError as e:
105203
if report_unexpected_exception:
106-
return f"Unexpected format: {fieldValue}"
204+
self._log_error(fieldName, fieldValue, e)
205+
return ""
107206

108207
# Convert FHIR datetime into CSV-safe UTC format
109208
def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
@@ -148,13 +247,15 @@ def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, r
148247
# Not Empty Validate - Returns exactly what is in the extracted fields no parsing or logic needed
149248
def _convertToNotEmpty(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
150249
try:
151-
if len(str(fieldValue)) > 0:
250+
if isinstance(fieldValue, str) and fieldValue.strip():
152251
return fieldValue
252+
self._log_error(fieldName, fieldValue, "Value not a String")
153253
return ""
154254
except Exception as e:
155255
if report_unexpected_exception:
156256
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
157-
return message
257+
self._log_error(fieldName, fieldValue, message)
258+
return
158259

159260
# NHSNumber Validate
160261
def _convertToNHSNumber(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
@@ -164,7 +265,7 @@ def _convertToNHSNumber(self, expressionRule, fieldName, fieldValue, summarise,
164265
# If it is outright empty, return back an empty string
165266
if not fieldValue:
166267
return ""
167-
268+
168269
try:
169270
regexRule = r"^\d{10}$"
170271
if isinstance(fieldValue, str) and re.fullmatch(regexRule, fieldValue):
@@ -197,7 +298,7 @@ def _convertToGender(self, expressionRule, fieldName, fieldValue, summarise, rep
197298
"other": "9",
198299
"unknown": "0"
199300
}
200-
301+
201302
# Normalize input
202303
normalized_gender = str(fieldValue).lower()
203304

@@ -223,21 +324,20 @@ def _convertToDose(self, expressionRule, fieldName, fieldValue, summarise, repor
223324
return fieldValue
224325
return ""
225326

226-
# Change to Lookup
327+
# Change to Lookup (loads expected data as is but if empty use lookup extraction to populate value)
227328
def _convertToLookUp(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
329+
if isinstance(fieldValue, str) and any(char.isalpha() for char in fieldValue) and not fieldValue.isdigit():
330+
return fieldValue
228331
try:
229-
if fieldValue != "":
230-
return fieldValue
231-
try:
232332
lookUpValue = self.dataParser.getKeyValue(expressionRule)
233333
IdentifiedLookup = self.dataLookUp.findLookUp(lookUpValue[0])
234334
return IdentifiedLookup
235-
except:
236-
return ""
335+
237336
except Exception as e:
238337
if report_unexpected_exception:
239338
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
240-
return message
339+
self._log_error(fieldName, fieldValue, message)
340+
return ""
241341

242342
# Default to Validate
243343
def _convertToDefaultTo(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
@@ -266,3 +366,41 @@ def _convertToOnlyIfTo(self, expressionRule, fieldName, fieldValue, summarise, r
266366
if report_unexpected_exception:
267367
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
268368
return message
369+
370+
# Check if Snomed code is numeric and reject other forms
371+
def _convertToSnomed(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
372+
"""
373+
Validates that a SNOMED code is a non-empty string containing only digits.
374+
"""
375+
try:
376+
if not fieldValue:
377+
return fieldValue
378+
if not isinstance(fieldValue, str) or not fieldValue.isdigit():
379+
raise ValueError(f"Invalid SNOMED code: {fieldValue}")
380+
return fieldValue
381+
except Exception as e:
382+
if report_unexpected_exception:
383+
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
384+
self._log_error(fieldName, fieldValue, message)
385+
return ""
386+
387+
# Check if Input is boolean or if input is a string with true or false, convert to Boolean
388+
def _convertToBoolean(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
389+
try:
390+
if isinstance(fieldValue, bool):
391+
return fieldValue
392+
393+
if str(fieldValue).strip().lower() == "true":
394+
return True
395+
if str(fieldValue).strip().lower() == "false":
396+
return False
397+
elif report_unexpected_exception:
398+
self._log_error(fieldName, fieldValue, "Invalid String Data")
399+
return ""
400+
except Exception as e:
401+
if report_unexpected_exception:
402+
self._log_error(fieldName, fieldValue, e)
403+
return ""
404+
405+
def get_error_records(self):
406+
return self.errorRecords

delta_backend/src/ConversionLayout.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,15 @@
140140
"expression": {
141141
"expressionName": "Date Convert",
142142
"expressionType": "DATECONVERT",
143-
"expressionRule": "format:%Y%m%d"
143+
"expressionRule": "%Y%m%d"
144144
}
145145
},
146146
{
147147
"fieldNameFHIR": "primarySource",
148148
"fieldNameFlat": "PRIMARY_SOURCE",
149149
"expression": {
150150
"expressionName": "Not Empty",
151-
"expressionType": "NOTEMPTY",
151+
"expressionType": "BOOLEAN",
152152
"expressionRule": ""
153153
}
154154
},
@@ -157,7 +157,7 @@
157157
"fieldNameFlat": "VACCINATION_PROCEDURE_CODE",
158158
"expression": {
159159
"expressionName": "Not Empty",
160-
"expressionType": "NOTEMPTY",
160+
"expressionType": "SNOMED",
161161
"expressionRule": ""
162162
}
163163
},
@@ -184,7 +184,7 @@
184184
"fieldNameFlat": "VACCINE_PRODUCT_CODE",
185185
"expression": {
186186
"expressionName": "Not Empty",
187-
"expressionType": "NOTEMPTY",
187+
"expressionType": "SNOMED",
188188
"expressionRule": ""
189189
}
190190
},
@@ -221,15 +221,15 @@
221221
"expression": {
222222
"expressionName": "Date Convert",
223223
"expressionType": "DATECONVERT",
224-
"expressionRule": "format:%Y%m%d"
224+
"expressionRule": "%Y%m%d"
225225
}
226226
},
227227
{
228228
"fieldNameFHIR": "site|coding|#:http://snomed.info/sct|code",
229229
"fieldNameFlat": "SITE_OF_VACCINATION_CODE",
230230
"expression": {
231231
"expressionName": "Not Empty",
232-
"expressionType": "NOTEMPTY",
232+
"expressionType": "SNOMED",
233233
"expressionRule": ""
234234
}
235235
},
@@ -247,7 +247,7 @@
247247
"fieldNameFlat": "ROUTE_OF_VACCINATION_CODE",
248248
"expression": {
249249
"expressionName": "Not Empty",
250-
"expressionType": "NOTEMPTY",
250+
"expressionType": "SNOMED",
251251
"expressionRule": ""
252252
}
253253
},
@@ -265,7 +265,7 @@
265265
"fieldNameFlat": "DOSE_AMOUNT",
266266
"expression": {
267267
"expressionName": "Not Empty",
268-
"expressionType": "NOTEMPTY",
268+
"expressionType": "DEFAULT",
269269
"expressionRule": ""
270270
}
271271
},
@@ -292,7 +292,7 @@
292292
"fieldNameFlat": "INDICATION_CODE",
293293
"expression": {
294294
"expressionName": "Not Empty",
295-
"expressionType": "NOTEMPTY",
295+
"expressionType": "SNOMED",
296296
"expressionRule": ""
297297
}
298298
},

0 commit comments

Comments
 (0)