Skip to content

Commit 51d6be7

Browse files
authored
VED-227- FHIR-FLAT-JSON (#396)
* refactor test * changed specs * snake_case * corrections * empty string * date conversion * coverage * add milliseconds logic
1 parent be10923 commit 51d6be7

File tree

5 files changed

+67
-150
lines changed

5 files changed

+67
-150
lines changed

delta_backend/src/ConversionChecker.py

Lines changed: 15 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -110,100 +110,27 @@ def _log_error(self, fieldName, fieldValue, e, code=ExceptionMessages.RECORD_CHE
110110
})
111111

112112
def _convertToDate(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
113-
if not fieldValue:
114-
return ""
115-
116-
if not isinstance(fieldValue, str):
117-
if report_unexpected_exception:
118-
self._log_error(fieldName, fieldValue, "Value is not a string")
119-
return ""
120-
121-
# Normalize expression rule
122-
format_str = expressionRule.replace("format:", "").strip()
123-
124-
# Reject partial ISO dates like "2024" or "2024-05"
125-
if format_str == "%Y%m%d" and re.match(r"^\d{4}(-\d{2})?$", fieldValue):
126-
if report_unexpected_exception:
127-
self._log_error(fieldName, fieldValue, "Partial date not accepted")
128-
return ""
129-
130-
# Handle only the recorded field with extended ISO + timezone support
131-
if fieldName == "recorded":
132-
# Accept "YYYY-MM-DD" and return as is
133-
if re.match(r"^\d{4}-\d{2}-\d{2}$", fieldValue):
134-
try:
135-
dt = datetime.strptime(fieldValue, "%Y-%m-%d")
136-
if dt.date() > datetime.now(ZoneInfo("UTC")).date():
137-
if report_unexpected_exception:
138-
self._log_error(fieldName, fieldValue, "Date cannot be in the future")
139-
return ""
140-
return fieldValue
141-
except ValueError:
142-
if report_unexpected_exception:
143-
self._log_error(fieldName, fieldValue, "Invalid date format")
144-
return ""
145-
try:
146-
# Parse ISO format with or without microseconds and TZ
147-
dt = datetime.fromisoformat(fieldValue)
148-
except ValueError:
149-
if report_unexpected_exception:
150-
self._log_error(fieldName, fieldValue, "Invalid date format")
151-
return ""
152-
153-
# Assign UTC if tzinfo is missing
154-
if dt.tzinfo is None:
155-
dt = dt.replace(tzinfo=ZoneInfo("UTC"))
156-
157-
now_utc = datetime.now(ZoneInfo("UTC"))
158-
if dt.astimezone(ZoneInfo("UTC")) > now_utc:
159-
if report_unexpected_exception:
160-
self._log_error(fieldName, fieldValue, "Date cannot be in the future")
113+
"""
114+
Convert a date string according to match YYYYMMDD format.
115+
"""
116+
if not fieldValue:
161117
return ""
162118

163-
# Validate timezone offset
164-
offset = dt.utcoffset()
165-
allowed_offsets = [
166-
ZoneInfo("UTC").utcoffset(dt),
167-
ZoneInfo("Europe/London").utcoffset(dt),
168-
]
169-
170-
if offset not in allowed_offsets:
119+
# 1. Data type must be a string
120+
if not isinstance(fieldValue, str):
171121
if report_unexpected_exception:
172-
self._log_error(fieldName, fieldValue, f"Unsupported offset: {offset}")
122+
self._log_error(fieldName, fieldValue, "Value is not a string")
173123
return ""
174-
175-
dt_utc = dt.astimezone(ZoneInfo("UTC")).replace(microsecond=0)
176-
177-
# Format and return with custom suffix
178-
formatted = dt_utc.strftime("%Y%m%dT%H%M%S%z")
179-
return formatted.replace("+0000", "00").replace("+0100", "01")
180-
181-
# For all other fields, apply standard %Y%m%d processing
182-
if format_str == "%Y%m%d":
183-
fieldValue = fieldValue.replace("-", "").replace("/", "")
184-
# Validate expected raw input format if using %Y%m%d
185-
if not re.match(r"^\d{8}$", fieldValue):
124+
try:
125+
fieldValue = re.sub(r"\.\d+(?=[+-]\d{2}:\d{2}$)", "", fieldValue) # Remove milliseconds
126+
dt = datetime.fromisoformat(fieldValue)
127+
return dt.strftime(expressionRule)
128+
except ValueError as e:
129+
# 5. Unexpected parsing errors
186130
if report_unexpected_exception:
187-
self._log_error(fieldName, fieldValue, "Date must be in YYYYMMDD format")
131+
self._log_error(fieldName, fieldValue, e)
188132
return ""
189133

190-
try:
191-
dt = datetime.strptime(fieldValue, format_str)
192-
193-
# Reject future dates if the field is BirthDate
194-
if fieldName in "contained|#:Patient|birthDate":
195-
today_utc = datetime.now(ZoneInfo("UTC")).date()
196-
if dt.date() > today_utc:
197-
if report_unexpected_exception:
198-
self._log_error(fieldName, fieldValue, "Birthdate cannot be in the future")
199-
return ""
200-
201-
return dt.strftime(format_str)
202-
except ValueError as e:
203-
if report_unexpected_exception:
204-
self._log_error(fieldName, fieldValue, e)
205-
return ""
206-
207134
# Convert FHIR datetime into CSV-safe UTC format
208135
def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
209136
if not fieldValue:
@@ -255,7 +182,7 @@ def _convertToNotEmpty(self, expressionRule, fieldName, fieldValue, summarise, r
255182
if report_unexpected_exception:
256183
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
257184
self._log_error(fieldName, fieldValue, message)
258-
return
185+
return ""
259186

260187
# NHSNumber Validate
261188
def _convertToNHSNumber(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):

delta_backend/src/ConversionLayout.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@
157157
"fieldNameFlat": "VACCINATION_PROCEDURE_CODE",
158158
"expression": {
159159
"expressionName": "Not Empty",
160-
"expressionType": "SNOMED",
160+
"expressionType": "NOTEMPTY",
161161
"expressionRule": ""
162162
}
163163
},
@@ -184,7 +184,7 @@
184184
"fieldNameFlat": "VACCINE_PRODUCT_CODE",
185185
"expression": {
186186
"expressionName": "Not Empty",
187-
"expressionType": "SNOMED",
187+
"expressionType": "NOTEMPTY",
188188
"expressionRule": ""
189189
}
190190
},
@@ -229,7 +229,7 @@
229229
"fieldNameFlat": "SITE_OF_VACCINATION_CODE",
230230
"expression": {
231231
"expressionName": "Not Empty",
232-
"expressionType": "SNOMED",
232+
"expressionType": "NOTEMPTY",
233233
"expressionRule": ""
234234
}
235235
},
@@ -247,7 +247,7 @@
247247
"fieldNameFlat": "ROUTE_OF_VACCINATION_CODE",
248248
"expression": {
249249
"expressionName": "Not Empty",
250-
"expressionType": "SNOMED",
250+
"expressionType": "NOTEMPTY",
251251
"expressionRule": ""
252252
}
253253
},
@@ -292,7 +292,7 @@
292292
"fieldNameFlat": "INDICATION_CODE",
293293
"expression": {
294294
"expressionName": "Not Empty",
295-
"expressionType": "SNOMED",
295+
"expressionType": "NOTEMPTY",
296296
"expressionRule": ""
297297
}
298298
},

delta_backend/tests/sample_data/fhir_sample.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
}
2929
],
3030
"gender": "other",
31-
"birthDate": "2023-03-01",
31+
"birthDate": "2026-03-10",
3232
"address": [
3333
{
3434
"use": "home",
@@ -73,7 +73,7 @@
7373
"reference": "#Pat1"
7474
},
7575
"occurrenceDateTime": "2021-02-07T13:28:17+00:00",
76-
"recorded": "2029-04-09",
76+
"recorded": "2025-02-07",
7777
"primarySource": "True",
7878
"manufacturer": {
7979
"display": "AstraZeneca Ltd"
@@ -86,7 +86,7 @@
8686
}
8787
},
8888
"lotNumber": "4120Z001",
89-
"expirationDate": "2026-07-02",
89+
"expirationDate": "2024-10-12",
9090
"site": {
9191
"coding": [
9292
{
@@ -153,4 +153,4 @@
153153
"doseNumberPositiveInt": 2
154154
}
155155
]
156-
}
156+
}

delta_backend/tests/test_convert_to_flat_json.py

Lines changed: 39 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def test_conversion_exceptions(self, mock_get_key_value, mock_get_conversions):
315315
def test_log_error(self, MockLookUpData):
316316
# Instantiate ConversionChecker
317317
checker = ConversionChecker(dataParser=None, summarise=False, report_unexpected_exception=True)
318-
318+
319319
# Simulate an exception
320320
exception = ValueError("Invalid value")
321321

@@ -349,6 +349,38 @@ def test_convert_to_not_empty(self, MockLookUpData):
349349
result = checker._convertToNotEmpty(None, "fieldName", "", False, True)
350350
self.assertEqual(result, "")
351351

352+
# Test for value that is not a string
353+
checker._log_error = Mock()
354+
result = checker._convertToNotEmpty(None, "fieldName", 12345, False, True)
355+
self.assertEqual(result, "")
356+
357+
checker._log_error.assert_called_once()
358+
359+
field, value, err = checker._log_error.call_args[0]
360+
self.assertEqual((field, value), ("fieldName",12345))
361+
self.assertIsInstance(err, str)
362+
self.assertIn("Value not a String", err)
363+
364+
checker._log_error.reset_mock()
365+
366+
# Simulate a fieldValue whose .strip() crashes to test exception handling
367+
checker._log_error = Mock()
368+
369+
class BadString(str):
370+
def strip(self):
371+
raise RuntimeError("Simulated crash during strip")
372+
373+
bad_value = BadString("some bad string")
374+
375+
# Make the .strip() method crash
376+
result = checker._convertToNotEmpty(None, "fieldName", bad_value, False, True)
377+
self.assertEqual(result, "") # Should return empty string on exception
378+
checker._log_error.assert_called_once()
379+
field, value, message = checker._log_error.call_args[0]
380+
self.assertEqual((field, value), ("fieldName", bad_value))
381+
self.assertIsInstance(message, str)
382+
self.assertIn("RuntimeError", message)
383+
352384
@patch("ConversionChecker.LookUpData")
353385
def test_convert_to_nhs_number(self, MockLookUpData):
354386

@@ -381,9 +413,9 @@ def test_convert_to_date(self, MockLookUpData):
381413
result = checker._convertToDate("%Y%m%d", "fieldName", "2022-01-01", False, True)
382414
self.assertEqual(result, "20220101")
383415

384-
# 2. Partial ISO date (should trigger "Partial date not accepted")
385-
result = checker._convertToDate("%Y%m%d", "fieldName", "2022-01", False, True)
386-
self.assertEqual(result, "")
416+
# 2.Full ISO date should be transformed to YYYYMMDD
417+
result = checker._convertToDate("%Y%m%d", "fieldName", "2022-01-01T12:00:00+00:00", False, True)
418+
self.assertEqual(result, "20220101")
387419

388420
# 3. Invalid string date format (should trigger "Date must be in YYYYMMDD format")
389421
result = checker._convertToDate("%Y%m%d", "fieldName", "invalid_date", False, True)
@@ -397,60 +429,18 @@ def test_convert_to_date(self, MockLookUpData):
397429
result = checker._convertToDate("%Y%m%d", "fieldName", 12345678, False, True)
398430
self.assertEqual(result, "")
399431

400-
# 6. Future date for birthDate (should trigger "Date cannot be in the future")
401-
future_date = "20991231"
402-
result = checker._convertToDate("%Y%m%d", "contained|#:Patient|birthDate", future_date, False, True)
403-
self.assertEqual(result, "")
404-
405-
# 8. Empty string
432+
# 6 Empty string
406433
result = checker._convertToDate("%Y%m%d", "fieldName", "", False, True)
407434
self.assertEqual(result, "")
408435

409-
# 9. Valid recorded date with timezone
410-
valid_recorded = "2021-02-07T13:28:17+00:00"
411-
result = checker._convertToDate("format:%Y-%m-%d", "recorded", valid_recorded, False, True)
412-
self.assertEqual(result, "20210207T13281700")
413-
414-
# 10. Recorded field: unsupported timezone offset (+02:00)
415-
result = checker._convertToDate("%Y%m%d", "recorded", "2022-01-01T12:00:00+02:00", False, True)
416-
self.assertEqual(result, "")
417-
418-
# 11. Recorded date with invalid format
419-
result = checker._convertToDate("format:%Y-%m-%d", "recorded", "invalid_date", False, True)
420-
self.assertEqual(result, "")
421-
422-
# 12. Recorded date with invalid format
423-
result = checker._convertToDate("format:%Y-%m-%d", "recorded", "invalid_date", False, True)
424-
self.assertEqual(result, "")
425-
426-
# recorded datetime (no tz) treated as UTC and formatted “YYYYMMDDTHHMMSS00”
427-
past_date = "2023-04-15T10:30:00"
428-
format = "format:%Y-%m-%dT%H:%M:%S"
429-
result = checker._convertToDate(format,"recorded",past_date,False,True)
430-
431-
# 13 expect to parse as UTC, then emit YYYYMMDDTHHMMSS and “00” for +0000
432-
expected = "20230415T103000"
433-
self.assertTrue(result.endswith("00"),f"Expected prefix {expected}, got {result!r}")
434-
435-
# 14. Recorded timestamp without tzinfo in the future → rejected
436-
future_naive = "2099-12-31T23:59:59"
437-
result = checker._convertToDate(format,"recorded",future_naive,False,True)
438-
self.assertEqual(result, "")
439-
440-
# 15 Validate all error logs of various responses
436+
# 7 Validate all error logs of various responses
441437
messages = [err["message"] for err in checker.errorRecords]
442438
print(f"Error Test Case, {messages}")
443439

444-
self.assertIn("Date must be in YYYYMMDD format", messages)
445440
self.assertIn("Value is not a string", messages)
446-
self.assertIn("Partial date not accepted", messages)
447-
self.assertIn("Date cannot be in the future", messages)
448-
self.assertIn("Birthdate cannot be in the future", messages)
449-
self.assertTrue(any(m.startswith("Unsupported offset") for m in messages))
450-
self.assertIn("Invalid date format", messages)
451441

452442
# Confirm Total Errors Per conversion
453-
self.assertEqual(len(checker.errorRecords), 8)
443+
self.assertEqual(len(checker.errorRecords), 2)
454444

455445
# Test for value Error
456446
checker._log_error = Mock()

delta_backend/tests/utils_for_converter_tests.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def get_expected_imms(expected_action_flag):
183183
"ACTION_FLAG": expected_action_flag,
184184
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
185185
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
186-
"RECORDED_DATE": "20210207T13281700",
186+
"RECORDED_DATE": "20210207",
187187
"PRIMARY_SOURCE": True,
188188
"VACCINATION_PROCEDURE_CODE": "13246814444444",
189189
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
@@ -223,7 +223,7 @@ def get_expected_imms(expected_action_flag):
223223
"ACTION_FLAG": "update",
224224
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
225225
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
226-
"RECORDED_DATE": "20210207T13281700",
226+
"RECORDED_DATE": "20210207",
227227
"PRIMARY_SOURCE": True,
228228
"VACCINATION_PROCEDURE_CODE": "13246814444444",
229229
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
@@ -263,7 +263,7 @@ def get_expected_imms(expected_action_flag):
263263
"ACTION_FLAG": "update",
264264
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
265265
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
266-
"RECORDED_DATE": "20210207T13281700",
266+
"RECORDED_DATE": "20210207",
267267
"PRIMARY_SOURCE": True,
268268
"VACCINATION_PROCEDURE_CODE": "13246814444444",
269269
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
@@ -417,7 +417,7 @@ def get_expected_imms_error_output(expected_action_flag):
417417
"ACTION_FLAG": "UPDATE",
418418
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
419419
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
420-
"RECORDED_DATE": "20210207T13281700",
420+
"RECORDED_DATE": "20210207",
421421
"PRIMARY_SOURCE": True,
422422
"VACCINATION_PROCEDURE_CODE": "13246814444444",
423423
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",

0 commit comments

Comments
 (0)