Skip to content

Commit 78932d9

Browse files
committed
Merge branch 'master' into VED-187-e2e-search-pagination
2 parents da6cfd7 + 1525013 commit 78932d9

19 files changed

+240
-46
lines changed

delta_backend/README.md

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,76 @@ This project is designed to convert FHIR-compliant JSON data (e.g., Immunization
66

77
## 📁 File Structure Overview
88

9+
| File Name | What It Does |
10+
|------------------------|---------------|
11+
| **`converter.py`** | 🧠 The main brain — applies the schema, runs conversions, handles errors. |
12+
| **`FHIRParser.py`** | 🪜 Knows how to dig into nested FHIR structures and pull out values like dates, IDs, and patient names. |
13+
| **`SchemaParser.py`** | Reads your schema layout and tells the converter which FHIR fields to extract and how to rename/format them. |
14+
| **`ConversionLayout.py`** | A plain Python list that defines which fields you want, and how they should be formatted (e.g. date format, renaming rules). |
15+
| **`ConversionChecker.py`** | 🔧 Handles transformation logic — e.g. turning a FHIR datetime into `YYYY-MM-DD`, applying lookups, gender codes, defaults, etc. |
16+
| **`Extractor.py`** | Specialized logic to pull practitioner names, site codes, addresses, and apply time-aware rules. |
17+
| **`ExceptionMessages.py`** | Holds reusable error messages and codes for clean debugging and validation feedback. |
18+
19+
---
20+
21+
22+
## 🛠️ Key Features
23+
24+
- Schema-driven field extraction and formatting
25+
- Support for custom date formats like `YYYYMMDD`, and CSV-safe UTC timestamps
26+
- Robust handling of patient, practitioner, and address data using time-aware logic
27+
- Extendable structure with static helper methods and modular architecture
28+
29+
---
30+
31+
## Example Use Case
32+
33+
- Input: FHIR `Immunization` resource (with nested fields)
34+
- Output: Flat JSON object with 34 standardized key-value pairs
35+
- Purpose: To export into CSV or push into downstream ETL systems
36+
37+
---
38+
39+
## ✅ Getting Started with `check_conversion.py`
40+
41+
To quickly test your conversion, use the provided `check_conversion.py` script.
42+
This script loads sample FHIR data, runs it through the converter, and automatically saves the output in both JSON and CSV formats.
43+
44+
### 🔄 How to Use It
45+
46+
1. Add your FHIR data (e.g., a dictionary or sample JSON) into the `fhir_sample` variable inside `check_conversion.py`
47+
2. Ensure the field mapping in `ConversionLayout.py` matches your desired output
48+
3. Run the script from the `tests` folder:
49+
50+
```bash
51+
python check_conversion.py
52+
```
53+
54+
### Output Location
55+
When the script runs, it will automatically:
56+
- Save a **flat JSON file** as `output.json`
57+
- Save a **CSV file** as `output.csv`
58+
59+
These will be located one level up from the `tests/` folder:
60+
61+
```
62+
/mnt/c/Users/USER/desktop/shn/immunisation-fhir-api/delta_backend/output.json
63+
/mnt/c/Users/USER/desktop/shn/immunisation-fhir-api/delta_backend/output.csv
64+
```
65+
66+
### Visualization
67+
You can now:
68+
- Open `output.csv` in Excel or Google Sheets to view cleanly structured records
69+
- Inspect `output.json` to validate the flat key-value output programmatically
70+
71+
---# 🩺 FHIR to Flat JSON Conversion Engine
72+
73+
This project is designed to convert FHIR-compliant JSON data (e.g., Immunization records) into a flat JSON format based on a configurable schema layout. It is intended to support synchronization of Immunisation API generated data from external sources to DPS (Data Processing System) data system
74+
75+
---
76+
77+
## 📁 File Structure Overview
78+
979
| File Name | What It Does |
1080
|------------------------|---------------|
1181
| **`converter.py`** | 🧠 The main brain — applies the schema, runs conversions, handles errors. |

delta_backend/src/ConversionChecker.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(self, dataParser, summarise, report_unexpected_exception):
3939
self.dataLookUp = LookUpData() # used for generic look up
4040
self.summarise = summarise # instance attribute
4141
self.report_unexpected_exception = report_unexpected_exception # instance attribute
42+
self.errorRecords = [] # Store all errors here
4243

4344
# Main entry point called by converter.py
4445
def convertData(self, expressionType, expressionRule, fieldName, fieldValue):
@@ -55,6 +56,10 @@ def convertData(self, expressionType, expressionRule, fieldName, fieldValue):
5556
return self._convertToNotEmpty(
5657
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
5758
)
59+
case "DOSESEQUENCE":
60+
return self._convertToDose(
61+
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
62+
)
5863
case "GENDER":
5964
return self._convertToGender(
6065
expressionRule, fieldName, fieldValue, self.summarise, self.report_unexpected_exception
@@ -88,18 +93,10 @@ def _convertToDate(self, expressionRule, fieldName, fieldValue, summarise, repor
8893
return ""
8994

9095
if not isinstance(fieldValue, str):
91-
raise RecordError(
92-
ExceptionMessages.RECORD_CHECK_FAILED,
93-
f"{fieldName} rejected: not a string.",
94-
f"Received: {type(fieldValue)}",
95-
)
96+
return ""
9697
# Reject partial dates like "2024" or "2024-05"
9798
if re.match(r"^\d{4}(-\d{2})?$", fieldValue):
98-
raise RecordError(
99-
ExceptionMessages.RECORD_CHECK_FAILED,
100-
f"{fieldName} rejected: partial date not accepted.",
101-
f"Invalid partial date: {fieldValue}",
102-
)
99+
return ""
103100
try:
104101
dt = datetime.fromisoformat(fieldValue)
105102
format_str = expressionRule.replace("format:", "")
@@ -148,7 +145,7 @@ def _convertToDateTime(self, expressionRule, fieldName, fieldValue, summarise, r
148145

149146
return dt_utc.strftime(format_str)
150147

151-
# Not Empty Validate
148+
# Not Empty Validate - Returns exactly what is in the extracted fields no parsing or logic needed
152149
def _convertToNotEmpty(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
153150
try:
154151
if len(str(fieldValue)) > 0:
@@ -161,39 +158,70 @@ def _convertToNotEmpty(self, expressionRule, fieldName, fieldValue, summarise, r
161158

162159
# NHSNumber Validate
163160
def _convertToNHSNumber(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
161+
"""
162+
Validates that the NHS Number is exactly 10 digits.
163+
"""
164+
# If it is outright empty, return back an empty string
165+
if not fieldValue:
166+
return ""
167+
164168
try:
165-
regexRule = "^6[0-9]{10}$"
166-
result = re.search(regexRule, fieldValue)
167-
if not result:
168-
raise RecordError(
169-
ExceptionMessages.RECORD_CHECK_FAILED,
170-
"NHS Number check failed",
171-
"NHS Number does not meet regex rules, data- " + fieldValue,
172-
)
169+
regexRule = r"^\d{10}$"
170+
if isinstance(fieldValue, str) and re.fullmatch(regexRule, fieldValue):
171+
return fieldValue
172+
raise ValueError(f"NHS Number must be exactly 10 digits: {fieldValue}")
173173
except Exception as e:
174174
if report_unexpected_exception:
175175
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
176-
return message
176+
self.errorRecords.append({
177+
"field": fieldName,
178+
"value": fieldValue,
179+
"message": message
180+
})
181+
return ""
177182

178183
# Gender Validate
179184
def _convertToGender(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
185+
"""
186+
Converts gender string to numeric representation.
187+
Mapping:
188+
- "male" → "1"
189+
- "female" → "2"
190+
- "other" → "9"
191+
- "unknown" → "0"
192+
"""
180193
try:
181-
genderlist = {"male": "1", "female": "2", "other": "9", "unknown": "0"}
182-
genderNumber = genderlist[fieldValue]
183-
return genderNumber
194+
gender_map = {
195+
"male": "1",
196+
"female": "2",
197+
"other": "9",
198+
"unknown": "0"
199+
}
200+
201+
# Normalize input
202+
normalized_gender = str(fieldValue).lower()
203+
204+
if normalized_gender not in gender_map:
205+
return ""
206+
return gender_map[normalized_gender]
207+
184208
except Exception as e:
185209
if report_unexpected_exception:
186-
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
187-
return message
210+
return f"Unexpected exception [{e.__class__.__name__}]: {str(e)}"
188211

189-
# Change to Validate
212+
# Code for converting Action Flag
190213
def _convertToChangeTo(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
191214
try:
192215
return expressionRule
193216
except Exception as e:
194217
if report_unexpected_exception:
195218
message = ExceptionMessages.MESSAGES[ExceptionMessages.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e)
196219
return message
220+
# Code for converting Dose Sequence
221+
def _convertToDose(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):
222+
if isinstance(fieldValue, (int, float)) and 1 <= fieldValue <= 9:
223+
return fieldValue
224+
return ""
197225

198226
# Change to Lookup
199227
def _convertToLookUp(self, expressionRule, fieldName, fieldValue, summarise, report_unexpected_exception):

delta_backend/src/ConversionLayout.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
"fieldNameFHIR": "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value",
1313
"fieldNameFlat": "NHS_NUMBER",
1414
"expression": {
15-
"expressionName": "Not Empty",
16-
"expressionType": "NOTEMPTY",
15+
"expressionName": "NHS NUMBER",
16+
"expressionType": "NHSNUMBER",
1717
"expressionRule": ""
1818
}
1919
},
@@ -175,7 +175,7 @@
175175
"fieldNameFlat": "DOSE_SEQUENCE",
176176
"expression": {
177177
"expressionName": "Not Empty",
178-
"expressionType": "NOTEMPTY",
178+
"expressionType": "DOSESEQUENCE",
179179
"expressionRule": ""
180180
}
181181
},

delta_backend/tests/.coverage

68 KB
Binary file not shown.

delta_backend/tests/sample_data/fhir_sample.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@
2727
"given": ["Sarah"]
2828
}
2929
],
30-
"gender": "unknown",
30+
"gender": "other",
3131
"birthDate": "1965-02-28",
3232
"address": [
3333
{
34-
"postalCode": "EC1A 1BB"
34+
"use": "home",
35+
"line": ["123 High Street"],
36+
"city": "London",
37+
"postalCode": "SW1A 1AA"
3538
}
3639
]
3740
}
@@ -147,7 +150,7 @@
147150
]
148151
}
149152
],
150-
"doseNumberPositiveInt": 1
153+
"doseNumberPositiveInt": 2
151154
}
152155
]
153156
}

delta_backend/tests/test_convert_to_flat_json.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
@mock_dynamodb
3232
@mock_sqs
3333
class TestConvertToFlatJson(unittest.TestCase):
34+
maxDiff = None
3435
maxDiff = None
3536
def setUp(self):
3637
"""Set up mock DynamoDB table."""
@@ -312,13 +313,21 @@ def test_convert_to_nhs_number(self, MockLookUpData):
312313
dataParser = Mock()
313314

314315
checker = ConversionChecker(dataParser, summarise=False, report_unexpected_exception=True)
316+
317+
# Test empty NHS number
318+
empty_nhs_number = ""
319+
result = checker._convertToNHSNumber(None, "fieldName", empty_nhs_number, False, True)
320+
self.assertEqual(result, "", "Expected empty string for empty NHS number input")
315321

322+
# Test valid NHS number
316323
valid_nhs_number = "6000000000"
317-
result = checker._convertToNHSNumber(None, "fieldName", valid_nhs_number, False, True)
318-
self.assertTrue("NHS Number does not meet regex " in result)
324+
result = checker._convertToNHSNumber("NHSNUMBER", "fieldName", valid_nhs_number, False, True)
325+
self.assertEqual(result, "6000000000", "Valid NHS number should be returned as-is")
319326

320-
invalid_nhs_number = "1234567890"
321-
result = checker._convertToNHSNumber(None, "fieldName", invalid_nhs_number, False, True)
327+
# Test invalid NHS number
328+
invalid_nhs_number = "1234567890243"
329+
result = checker._convertToNHSNumber("NHSNUMBER","fieldName", invalid_nhs_number, False, True)
330+
self.assertEqual(result, "", "Invalid NHS number should return empty string")
322331

323332
@patch("ConversionChecker.LookUpData")
324333
def test_convert_to_date(self, MockLookUpData):
@@ -360,6 +369,25 @@ def test_convert_to_date_time(self, MockLookUpData):
360369
result = checker._convertToDateTime("format:%Y%m%dT%H%M%S", "fieldName", "", False, True)
361370
self.assertEqual(result, "")
362371

372+
#check for dose sequence
373+
@patch("ConversionChecker.LookUpData")
374+
def test_convert_to_dose(self, MockLookUpData):
375+
dataParser = Mock()
376+
377+
checker = ConversionChecker(dataParser, summarise=False, report_unexpected_exception=True)
378+
# Valid dose
379+
for dose in [1, 4, 9]:
380+
with self.subTest(dose=dose):
381+
result = checker._convertToDose("DOSESEQUENCE", "DOSE_AMOUNT", dose, False, True)
382+
self.assertEqual(result, dose)
383+
384+
# Invalid dose
385+
invalid_doses = [10, 10.1, 100, 9.0001]
386+
for dose in invalid_doses:
387+
with self.subTest(dose=dose):
388+
result = checker._convertToDose("DOSESEQUENCE", "DOSE_AMOUNT", dose, False, True)
389+
self.assertEqual(result, "", f"Expected empty string for invalid dose {dose}")
390+
363391
def clear_table(self):
364392
scan = self.table.scan()
365393
with self.table.batch_writer() as batch:

0 commit comments

Comments
 (0)