Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 24 additions & 86 deletions delta_backend/src/ConversionChecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,99 +110,35 @@ def _log_error(self, fieldName, fieldValue, e, code=ExceptionMessages.RECORD_CHE
})

def _convertToDate(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
if not fieldValue:
return ""

if not isinstance(fieldValue, str):
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Value is not a string")
return ""

# Normalize expression rule
format_str = expressionRule.replace("format:", "").strip()

# Reject partial ISO dates like "2024" or "2024-05"
if format_str == "%Y%m%d" and re.match(r"^\d{4}(-\d{2})?$", fieldValue):
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Partial date not accepted")
return ""

# Handle only the recorded field with extended ISO + timezone support
if fieldName == "recorded":
# Accept "YYYY-MM-DD" and return as is
if re.match(r"^\d{4}-\d{2}-\d{2}$", fieldValue):
try:
dt = datetime.strptime(fieldValue, "%Y-%m-%d")
if dt.date() > datetime.now(ZoneInfo("UTC")).date():
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Date cannot be in the future")
return ""
return fieldValue
except ValueError:
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Invalid date format")
return ""
try:
# Parse ISO format with or without microseconds and TZ
dt = datetime.fromisoformat(fieldValue)
except ValueError:
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Invalid date format")
return ""

# Assign UTC if tzinfo is missing
if dt.tzinfo is None:
dt = dt.replace(tzinfo=ZoneInfo("UTC"))

now_utc = datetime.now(ZoneInfo("UTC"))
if dt.astimezone(ZoneInfo("UTC")) > now_utc:
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Date cannot be in the future")
return ""

# Validate timezone offset
offset = dt.utcoffset()
allowed_offsets = [
ZoneInfo("UTC").utcoffset(dt),
ZoneInfo("Europe/London").utcoffset(dt),
]

if offset not in allowed_offsets:
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, f"Unsupported offset: {offset}")
"""
Convert a date string according to match YYYYMMDD format.
"""
if not fieldValue:
return ""

dt_utc = dt.astimezone(ZoneInfo("UTC")).replace(microsecond=0)

# Format and return with custom suffix
formatted = dt_utc.strftime("%Y%m%dT%H%M%S%z")
return formatted.replace("+0000", "00").replace("+0100", "01")

# For all other fields, apply standard %Y%m%d processing
if format_str == "%Y%m%d":
fieldValue = fieldValue.replace("-", "").replace("/", "")
# Validate expected raw input format if using %Y%m%d
if not re.match(r"^\d{8}$", fieldValue):
# 1. Data type must be a string
if not isinstance(fieldValue, str):
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Date must be in YYYYMMDD format")
self._log_error(fieldName, fieldValue, "Value is not a string")
return ""

try:
dt = datetime.strptime(fieldValue, format_str)

# Reject future dates if the field is BirthDate
if fieldName in "contained|#:Patient|birthDate":
today_utc = datetime.now(ZoneInfo("UTC")).date()
if dt.date() > today_utc:
# 2. Use Expression Rule Format to parse the date, remove dashes and slashes
if expressionRule == "%Y%m%d":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As below, it seems odd that we only handle YYYYMMDD, YYYY-MM-DD or YYYY-MM-DDT... here. Why not just parse any valid ISO formatted date / datetime? Will check with Paul

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E.g. what about partial dates?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorted

fieldValue = fieldValue.split("T")[0]
fieldValue = fieldValue.replace("-", "").replace("/", "")
if not re.match(r"^\d{8}$", fieldValue):
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, "Birthdate cannot be in the future")
self._log_error(fieldName, fieldValue, "Date must be in YYYYMMDD format")
return ""

return dt.strftime(format_str)
except ValueError as e:
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, e)
return ""
try:
# Converts raw fieldvalue without delimiters to a date-time object
dt = datetime.strptime(fieldValue, expressionRule)
return dt.strftime(expressionRule)
except ValueError as e:
# 5. Unexpected parsing errors
if report_unexpected_exception:
self._log_error(fieldName, fieldValue, e)
return ""

# Convert FHIR datetime into CSV-safe UTC format
def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
Expand Down Expand Up @@ -246,6 +182,8 @@ def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, r

# Not Empty Validate - Returns exactly what is in the extracted fields no parsing or logic needed
def _convertToNotEmpty(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
if not fieldValue:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why has this been added? Maybe I'm missing the point of NOTEMPTY, but I'd expect None to be an error (which would already be picked up by the logic below)

Copy link
Contributor Author

@Akol125 Akol125 Apr 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was kinda thinking that if its empty the overall data from other data sources is also empty this means its an empty data, hence not exactly an error. what would you suggest?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might just be a naming issue - convertToNotEmpty makes me think it will never return an empty string, but I might be wrong. Will check with Paul what his intention was. Don't think this is a blocker for getting this MR merged

return ""
try:
if isinstance(fieldValue, str) and fieldValue.strip():
return fieldValue
Expand Down
10 changes: 5 additions & 5 deletions delta_backend/src/ConversionLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
"fieldNameFlat": "VACCINATION_PROCEDURE_CODE",
"expression": {
"expressionName": "Not Empty",
"expressionType": "SNOMED",
"expressionType": "NOTEMPTY",
"expressionRule": ""
}
},
Expand All @@ -184,7 +184,7 @@
"fieldNameFlat": "VACCINE_PRODUCT_CODE",
"expression": {
"expressionName": "Not Empty",
"expressionType": "SNOMED",
"expressionType": "NOTEMPTY",
"expressionRule": ""
}
},
Expand Down Expand Up @@ -229,7 +229,7 @@
"fieldNameFlat": "SITE_OF_VACCINATION_CODE",
"expression": {
"expressionName": "Not Empty",
"expressionType": "SNOMED",
"expressionType": "NOTEMPTY",
"expressionRule": ""
}
},
Expand All @@ -247,7 +247,7 @@
"fieldNameFlat": "ROUTE_OF_VACCINATION_CODE",
"expression": {
"expressionName": "Not Empty",
"expressionType": "SNOMED",
"expressionType": "NOTEMPTY",
"expressionRule": ""
}
},
Expand Down Expand Up @@ -292,7 +292,7 @@
"fieldNameFlat": "INDICATION_CODE",
"expression": {
"expressionName": "Not Empty",
"expressionType": "SNOMED",
"expressionType": "NOTEMPTY",
"expressionRule": ""
}
},
Expand Down
8 changes: 4 additions & 4 deletions delta_backend/tests/sample_data/fhir_sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
}
],
"gender": "other",
"birthDate": "2023-03-01",
"birthDate": "2024-03-10",
"address": [
{
"use": "home",
Expand Down Expand Up @@ -73,7 +73,7 @@
"reference": "#Pat1"
},
"occurrenceDateTime": "2021-02-07T13:28:17+00:00",
"recorded": "2029-04-09",
"recorded": "2029-01-01",
"primarySource": "True",
"manufacturer": {
"display": "AstraZeneca Ltd"
Expand All @@ -86,7 +86,7 @@
}
},
"lotNumber": "4120Z001",
"expirationDate": "2026-07-02",
"expirationDate": "2024-10-12",
"site": {
"coding": [
{
Expand Down Expand Up @@ -153,4 +153,4 @@
"doseNumberPositiveInt": 2
}
]
}
}
55 changes: 7 additions & 48 deletions delta_backend/tests/test_convert_to_flat_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def test_conversion_exceptions(self, mock_get_key_value, mock_get_conversions):
def test_log_error(self, MockLookUpData):
# Instantiate ConversionChecker
checker = ConversionChecker(dataParser=None, summarise=False, report_unexpected_exception=True)

# Simulate an exception
exception = ValueError("Invalid value")

Expand Down Expand Up @@ -381,9 +381,9 @@ def test_convert_to_date(self, MockLookUpData):
result = checker._convertToDate("%Y%m%d", "fieldName", "2022-01-01", False, True)
self.assertEqual(result, "20220101")

# 2. Partial ISO date (should trigger "Partial date not accepted")
result = checker._convertToDate("%Y%m%d", "fieldName", "2022-01", False, True)
self.assertEqual(result, "")
# 2.Full ISO date should be transformed to YYYYMMDD
result = checker._convertToDate("%Y%m%d", "fieldName", "2022-01-01T12:00:00+00:0", False, True)
self.assertEqual(result, "20220101")

# 3. Invalid string date format (should trigger "Date must be in YYYYMMDD format")
result = checker._convertToDate("%Y%m%d", "fieldName", "invalid_date", False, True)
Expand All @@ -397,60 +397,19 @@ def test_convert_to_date(self, MockLookUpData):
result = checker._convertToDate("%Y%m%d", "fieldName", 12345678, False, True)
self.assertEqual(result, "")

# 6. Future date for birthDate (should trigger "Date cannot be in the future")
future_date = "20991231"
result = checker._convertToDate("%Y%m%d", "contained|#:Patient|birthDate", future_date, False, True)
self.assertEqual(result, "")

# 8. Empty string
# 6 Empty string
result = checker._convertToDate("%Y%m%d", "fieldName", "", False, True)
self.assertEqual(result, "")

# 9. Valid recorded date with timezone
valid_recorded = "2021-02-07T13:28:17+00:00"
result = checker._convertToDate("format:%Y-%m-%d", "recorded", valid_recorded, False, True)
self.assertEqual(result, "20210207T13281700")

# 10. Recorded field: unsupported timezone offset (+02:00)
result = checker._convertToDate("%Y%m%d", "recorded", "2022-01-01T12:00:00+02:00", False, True)
self.assertEqual(result, "")

# 11. Recorded date with invalid format
result = checker._convertToDate("format:%Y-%m-%d", "recorded", "invalid_date", False, True)
self.assertEqual(result, "")

# 12. Recorded date with invalid format
result = checker._convertToDate("format:%Y-%m-%d", "recorded", "invalid_date", False, True)
self.assertEqual(result, "")

# recorded datetime (no tz) treated as UTC and formatted “YYYYMMDDTHHMMSS00”
past_date = "2023-04-15T10:30:00"
format = "format:%Y-%m-%dT%H:%M:%S"
result = checker._convertToDate(format,"recorded",past_date,False,True)

# 13 expect to parse as UTC, then emit YYYYMMDDTHHMMSS and “00” for +0000
expected = "20230415T103000"
self.assertTrue(result.endswith("00"),f"Expected prefix {expected}, got {result!r}")

# 14. Recorded timestamp without tzinfo in the future → rejected
future_naive = "2099-12-31T23:59:59"
result = checker._convertToDate(format,"recorded",future_naive,False,True)
self.assertEqual(result, "")

# 15 Validate all error logs of various responses
# 7 Validate all error logs of various responses
messages = [err["message"] for err in checker.errorRecords]
print(f"Error Test Case, {messages}")

self.assertIn("Date must be in YYYYMMDD format", messages)
self.assertIn("Value is not a string", messages)
self.assertIn("Partial date not accepted", messages)
self.assertIn("Date cannot be in the future", messages)
self.assertIn("Birthdate cannot be in the future", messages)
self.assertTrue(any(m.startswith("Unsupported offset") for m in messages))
self.assertIn("Invalid date format", messages)

# Confirm Total Errors Per conversion
self.assertEqual(len(checker.errorRecords), 8)
self.assertEqual(len(checker.errorRecords), 2)

# Test for value Error
checker._log_error = Mock()
Expand Down
8 changes: 4 additions & 4 deletions delta_backend/tests/utils_for_converter_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def get_expected_imms(expected_action_flag):
"ACTION_FLAG": expected_action_flag,
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
"RECORDED_DATE": "20210207T13281700",
"RECORDED_DATE": "20210207",
"PRIMARY_SOURCE": True,
"VACCINATION_PROCEDURE_CODE": "13246814444444",
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
Expand Down Expand Up @@ -223,7 +223,7 @@ def get_expected_imms(expected_action_flag):
"ACTION_FLAG": "update",
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
"RECORDED_DATE": "20210207T13281700",
"RECORDED_DATE": "20210207",
"PRIMARY_SOURCE": True,
"VACCINATION_PROCEDURE_CODE": "13246814444444",
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
Expand Down Expand Up @@ -263,7 +263,7 @@ def get_expected_imms(expected_action_flag):
"ACTION_FLAG": "update",
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
"RECORDED_DATE": "20210207T13281700",
"RECORDED_DATE": "20210207",
"PRIMARY_SOURCE": True,
"VACCINATION_PROCEDURE_CODE": "13246814444444",
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
Expand Down Expand Up @@ -417,7 +417,7 @@ def get_expected_imms_error_output(expected_action_flag):
"ACTION_FLAG": "UPDATE",
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
"RECORDED_DATE": "20210207T13281700",
"RECORDED_DATE": "20210207",
"PRIMARY_SOURCE": True,
"VACCINATION_PROCEDURE_CODE": "13246814444444",
"VACCINATION_PROCEDURE_TERM": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
Expand Down
Loading