Skip to content

Commit dbd034f

Browse files
committed
NRL-1665 WIP add test data generation logic
1 parent 3ef1892 commit dbd034f

File tree

4 files changed

+345
-3
lines changed

4 files changed

+345
-3
lines changed

layer/nrlf/core/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
class Source(Enum):
55
NRLF = "NRLF"
6-
LEGACY = "NRL"
6+
LEGACY = "NRL" # not actually used
7+
PERFTEST = "NFT-SEED"
78

89

910
VALID_SOURCES = frozenset(item.value for item in Source.__members__.values())

layer/nrlf/core/dynamodb/model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,10 @@ def dict(self, **kwargs) -> dict[str, Any]:
9191

9292
@classmethod
9393
def from_document_reference(
94-
cls, resource: DocumentReference, created_on: Optional[str] = None
94+
cls,
95+
resource: DocumentReference,
96+
created_on: Optional[str] = None,
97+
source: str = "NRLF",
9598
) -> "DocumentPointer":
9699
resource_id = getattr(resource, "id")
97100

@@ -144,7 +147,7 @@ def from_document_reference(
144147
type_id=type_id,
145148
category=pointer_category,
146149
category_id=category_id,
147-
source="NRLF",
150+
source=source,
148151
version=1,
149152
document=resource.model_dump_json(exclude_none=True),
150153
created_on=created_on or create_fhir_instant(),

scripts/seed_nft_tables.py

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
from datetime import datetime, timedelta, timezone
2+
from itertools import cycle
3+
from math import gcd
4+
from random import shuffle
5+
from typing import Any
6+
7+
import boto3
8+
import fire
9+
10+
from nrlf.consumer.fhir.r4.model import DocumentReference
11+
from nrlf.core.constants import (
12+
CATEGORY_ATTRIBUTES,
13+
SNOMED_SYSTEM_URL,
14+
TYPE_ATTRIBUTES,
15+
TYPE_CATEGORIES,
16+
Categories,
17+
PointerTypes,
18+
)
19+
from nrlf.core.dynamodb.model import DocumentPointer
20+
from nrlf.core.logger import logger
21+
from nrlf.core.validators import DocumentReferenceValidator
22+
from nrlf.tests.data import load_document_reference
23+
24+
dynamodb = boto3.client("dynamodb")
25+
resource = boto3.resource("dynamodb")
26+
27+
logger.setLevel("ERROR")
28+
29+
DOC_REF_TEMPLATE = load_document_reference("NFT-template")
30+
31+
CHECKSUM_WEIGHTS = [i for i in range(10, 1, -1)]
32+
33+
# These are based on the Nov 7th 2025 pointer stats report
34+
DEFAULT_TYPE_DISTRIBUTIONS = {
35+
"736253002": 65, # mental health crisis plan
36+
"1382601000000107": 5, # respect form
37+
"887701000000100": 15, # emergency healthcare plan
38+
"861421000000109": 5, # eol care coordination summary
39+
"735324008": 5, # treatment escalation plan
40+
"824321000000109": 5, # summary record
41+
}
42+
43+
DEFAULT_CUSTODIAN_DISTRIBUTIONS = {
44+
"736253002": {
45+
"TRPG": 9,
46+
"TRHA": 1,
47+
"TRRE": 20,
48+
"TRAT": 10,
49+
"TWR4": 4,
50+
"TRKL": 9,
51+
"TRW1": 5,
52+
"TRH5": 1,
53+
"TRP7": 13,
54+
"TRWK": 8,
55+
"TRQY": 3,
56+
"TRV5": 3,
57+
"TRJ8": 2,
58+
"TRXA": 4,
59+
"T11X": 1,
60+
"TG6V": 2,
61+
},
62+
"1382601000000107": {"T8GX8": 3, "TQUY": 2}, # respect form
63+
"887701000000100": {
64+
"TV1": 1,
65+
"TV2": 2,
66+
"TV3": 1,
67+
"TV4": 1,
68+
"TV5": 3,
69+
"TV6": 1,
70+
}, # emergency healthcare plan
71+
"861421000000109": {
72+
"TV1": 2,
73+
"TV2": 2,
74+
"TV3": 1,
75+
"TV4": 1,
76+
"TV5": 3,
77+
"TV6": 1,
78+
}, # eol care coordination summary
79+
"735324008": {
80+
"TV1": 1,
81+
"TV2": 1,
82+
"TV3": 1,
83+
"TV4": 2,
84+
"TV5": 2,
85+
"TV6": 1,
86+
}, # treatment escalation plan
87+
"824321000000109": {
88+
"TRXT": 1,
89+
}, # summary record currently has only one supplier
90+
}
91+
92+
93+
class TestNhsNumbersIterator:
94+
def __iter__(self):
95+
self.first9 = 900000000
96+
return self
97+
98+
def __next__(self):
99+
if self.first9 > 999999999:
100+
raise StopIteration
101+
checksum = 10
102+
while checksum == 10:
103+
self.first9 += 1
104+
nhs_no_digits = list(map(int, str(self.first9)))
105+
checksum = (
106+
sum(
107+
weight * digit
108+
for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits)
109+
)
110+
* -1
111+
% 11
112+
)
113+
nhs_no = str(self.first9) + str(checksum)
114+
return nhs_no
115+
116+
117+
def _make_seed_pointer(
118+
type_code: str, custodian: str, nhs_number: str, counter: int
119+
) -> DocumentPointer:
120+
"""
121+
Populates the example pointer template with test data to create a valid NRL 3.0 pointer
122+
"""
123+
doc_ref = DOC_REF_TEMPLATE
124+
doc_ref.id = f"{custodian}-{str(counter).zfill(12)}" # deterministic to aid perftest script retrieval
125+
doc_ref.subject.identifier.value = nhs_number
126+
doc_ref.custodian.identifier.value = custodian
127+
doc_ref.author[0].identifier.value = "X26NFT"
128+
doc_ref.type.coding[0].code = type_code
129+
doc_ref.type.coding[0].display = TYPE_ATTRIBUTES.get(
130+
f"{SNOMED_SYSTEM_URL}|{type_code}"
131+
).get("display")
132+
type = f"{SNOMED_SYSTEM_URL}|{type_code}"
133+
category = TYPE_CATEGORIES.get(type)
134+
doc_ref.category[0].coding[0].code = category.split("|")[-1]
135+
doc_ref.category[0].coding[0].display = CATEGORY_ATTRIBUTES.get(category).get(
136+
"display"
137+
)
138+
nft_pointer = DocumentPointer.from_document_reference(doc_ref, source="NFT-SEED")
139+
return nft_pointer
140+
141+
142+
def _populate_seed_table(
143+
table_name: str,
144+
px_with_pointers: int,
145+
pointers_per_px: float = 1.0,
146+
type_dists=DEFAULT_TYPE_DISTRIBUTIONS,
147+
custodian_dists=DEFAULT_CUSTODIAN_DISTRIBUTIONS,
148+
):
149+
150+
table = resource.Table(table_name)
151+
152+
# set up iterations
153+
type_iter = _set_up_cyclical_iterator(type_dists)
154+
custodian_iters = _set_up_custodian_iterators(custodian_dists)
155+
testnum_cls = TestNhsNumbersIterator()
156+
testnum_iter = iter(testnum_cls)
157+
158+
px_counter = 0
159+
doc_ref_target = pointers_per_px * px_with_pointers
160+
print(
161+
f"Will upsert {doc_ref_target} test pointers for {px_with_pointers} patients."
162+
)
163+
doc_ref_counter = 0
164+
165+
start_time = datetime.now(tz=timezone.utc)
166+
167+
while px_counter < px_with_pointers:
168+
new_px = next(testnum_iter)
169+
new_type = next(type_iter)
170+
new_custodian = next(custodian_iters[new_type])
171+
px_counter += 1
172+
doc_ref_counter += 1
173+
try:
174+
print(f"Putting item {doc_ref_counter}....")
175+
pointer = _make_seed_pointer(
176+
new_type, new_custodian, new_px, doc_ref_counter
177+
)
178+
table.put_item(Item=pointer.model_dump())
179+
except Exception as e:
180+
print(f"Unable to upsert pointer for item {doc_ref_counter}. Error: {e}")
181+
182+
end_time = datetime.now(tz=timezone.utc)
183+
print(
184+
f"Created {doc_ref_counter} pointers in {timedelta.total_seconds(end_time - start_time)} seconds."
185+
)
186+
187+
188+
def _set_up_cyclical_iterator(dists: dict[str, int]) -> iter:
189+
"""
190+
Given a dict of values and their relative frequencies,
191+
returns an iterator that will cycle through a the reduced and shuffled set of values.
192+
This should result in more live-like data than e.g. creating a bulk amount of each pointer type/custodian in series.
193+
It also means each batch will contain a representative sample of the distribution.
194+
"""
195+
d = gcd(*dists.values())
196+
value_list = []
197+
for entry in dists:
198+
value_list.extend([entry] * (dists[entry] // d))
199+
shuffle(value_list)
200+
return cycle(value_list)
201+
202+
203+
def _set_up_custodian_iterators(
204+
custodian_dists: dict[dict[str, int]]
205+
) -> dict[str, iter]:
206+
custodian_iters = {}
207+
for pointer_type in custodian_dists:
208+
custodian_iters[pointer_type] = _set_up_cyclical_iterator(
209+
custodian_dists[pointer_type]
210+
)
211+
return custodian_iters
212+
213+
214+
if __name__ == "__main__":
215+
fire.Fire(_populate_seed_table)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
{
2+
"resourceType": "DocumentReference",
3+
"id": "X26-00000000000000000000",
4+
"status": "current",
5+
"docStatus": "final",
6+
"type": {
7+
"coding": [
8+
{
9+
"system": "http://snomed.info/sct",
10+
"code": "736253002",
11+
"display": "Mental health crisis plan"
12+
}
13+
]
14+
},
15+
"category": [
16+
{
17+
"coding": [
18+
{
19+
"system": "http://snomed.info/sct",
20+
"code": "734163000",
21+
"display": "Care plan"
22+
}
23+
]
24+
}
25+
],
26+
"subject": {
27+
"identifier": {
28+
"system": "https://fhir.nhs.uk/Id/nhs-number",
29+
"value": "9999999999"
30+
}
31+
},
32+
"author": [
33+
{
34+
"identifier": {
35+
"system": "https://fhir.nhs.uk/Id/ods-organization-code",
36+
"value": "X26"
37+
}
38+
}
39+
],
40+
"custodian": {
41+
"identifier": {
42+
"system": "https://fhir.nhs.uk/Id/ods-organization-code",
43+
"value": "X26"
44+
}
45+
},
46+
"description": "This is a bulk generated test pointer created to seed a table for performance testing that otherwise resembles the structure of real NRL3 pointers",
47+
"securityLabel": [
48+
{
49+
"coding": [
50+
{
51+
"system": "http://terminology.hl7.org/CodeSystem/v3-Confidentiality",
52+
"code": "V",
53+
"display": "very restricted"
54+
}
55+
]
56+
}
57+
],
58+
"content": [
59+
{
60+
"attachment": {
61+
"contentType": "application/pdf",
62+
"language": "en-UK",
63+
"url": "ssp://nrl-example.thirdparty.nhs.uk/exampledocument/exampleid.pdf",
64+
"title": "Example document for NFT testing",
65+
"creation": "2025-11-25T10:45:41+11:00"
66+
},
67+
"format": {
68+
"system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode",
69+
"code": "urn:nhs-ic:unstructured",
70+
"display": "Unstructured Document"
71+
},
72+
"extension": [
73+
{
74+
"url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability",
75+
"valueCodeableConcept": {
76+
"coding": [
77+
{
78+
"system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability",
79+
"code": "static",
80+
"display": "Static"
81+
}
82+
]
83+
}
84+
},
85+
{
86+
"url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism",
87+
"valueCodeableConcept": {
88+
"coding": [
89+
{
90+
"system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLRetrievalMechanism",
91+
"code": "SSP",
92+
"display": "Spine Secure Proxy"
93+
}
94+
]
95+
}
96+
}
97+
]
98+
}
99+
],
100+
"context": {
101+
"practiceSetting": {
102+
"coding": [
103+
{
104+
"system": "http://snomed.info/sct",
105+
"code": "788002001",
106+
"display": "Adult mental health service"
107+
}
108+
]
109+
},
110+
"related": [
111+
{
112+
"identifier": {
113+
"system": "https://fhir.nhs.uk/Id/nhsSpineASID",
114+
"value": "012345678910"
115+
}
116+
}
117+
]
118+
},
119+
"masterIdentifier": {
120+
"system": "https://fhir.nhs.uk/Id/NFT-document-reference-master-id",
121+
"value": "NFT-0000000000"
122+
}
123+
}

0 commit comments

Comments
 (0)