Skip to content

Commit f0d01b3

Browse files
committed
[NRL-1559] Add script to find patients with org pointers
1 parent 3376f04 commit f0d01b3

File tree

2 files changed

+116
-3
lines changed

2 files changed

+116
-3
lines changed

reports/calculate_pointer_stats.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import fire
77

88
from nrlf.consumer.fhir.r4.model import DocumentReference
9-
from nrlf.core.constants import PointerTypes
109
from nrlf.core.logger import logger
1110
from nrlf.core.validators import DocumentReferenceValidator
1211

@@ -19,8 +18,6 @@
1918

2019
logger.setLevel("ERROR")
2120

22-
type_to_name = {pointer_type.value: pointer_type.name for pointer_type in PointerTypes}
23-
2421

2522
def _calc_type_stats(producer: str, type_str: str, stats: dict[str, Any]) -> None:
2623
stats["type_counts"] = stats.get("type_counts", {})
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import json
2+
from datetime import datetime, timedelta, timezone
3+
from typing import Any
4+
5+
import boto3
6+
import fire
7+
8+
from nrlf.consumer.fhir.r4.model import DocumentReference
9+
from nrlf.core.constants import PointerTypes
10+
from nrlf.core.logger import logger
11+
12+
type PatientCounter = dict[int, int]
13+
type TypePatientCounter = dict[str, PatientCounter]
14+
type OrgTypePatientCounter = dict[str, TypePatientCounter]
15+
16+
dynamodb = boto3.client("dynamodb")
17+
paginator = dynamodb.get_paginator("scan")
18+
19+
logger.setLevel("ERROR")
20+
21+
type_to_name = {pointer_type.value: pointer_type.name for pointer_type in PointerTypes}
22+
23+
24+
def _find_patients(
25+
table_name: str,
26+
number_of_patients: int = 1,
27+
number_of_pointers: int = 1,
28+
org_ods_code: str = "X26",
29+
pointer_type: str = PointerTypes.MENTAL_HEALTH_PLAN.value,
30+
) -> dict[str, float | int]:
31+
32+
print(f"Looking for {number_of_patients} patient(s)") # noqa
33+
print(f" with {number_of_pointers} or more pointers") # noqa
34+
print(f" of type {type_to_name[pointer_type]} ") # noqa
35+
print(f" produced by org {org_ods_code}") # noqa
36+
print(f" in table {table_name}") # noqa
37+
38+
params: dict[str, Any] = {
39+
"TableName": table_name,
40+
"PaginationConfig": {"PageSize": 50},
41+
}
42+
43+
total_scanned_count = 0
44+
45+
start_time = datetime.now(tz=timezone.utc)
46+
47+
found_patients: set[str] = set()
48+
patient_counters: dict[str, Any] = {}
49+
50+
for page in paginator.paginate(**params):
51+
for item in page["Items"]:
52+
document = item.get("document", {}).get("S", "")
53+
# TODO - Dont need to use doc for these attrs - switch to other attrs
54+
55+
# Do validations
56+
try:
57+
docref = DocumentReference.model_validate_json(document)
58+
except Exception:
59+
continue
60+
61+
patient_number = (
62+
docref.subject.identifier.value
63+
if docref.subject
64+
and docref.subject.identifier
65+
and docref.subject.identifier.value
66+
else "unknown"
67+
)
68+
producer = (
69+
docref.custodian.identifier.value
70+
if docref.custodian
71+
and docref.custodian.identifier
72+
and docref.custodian.identifier.value
73+
else "unknown"
74+
)
75+
type_coding = (
76+
docref.type.coding[0] if docref.type and docref.type.coding else None
77+
)
78+
type_str = (
79+
f"{type_coding.system}|{type_coding.code}" if type_coding else "unknown"
80+
)
81+
82+
if producer != org_ods_code or type_str != pointer_type:
83+
continue
84+
85+
patient_counters[patient_number] = (
86+
patient_counters.get(patient_number, 0) + 1
87+
)
88+
89+
if patient_counters[patient_number] >= number_of_pointers:
90+
found_patients.add(patient_number)
91+
92+
if len(found_patients) >= number_of_patients:
93+
print(f"Found {len(found_patients)} patients")
94+
break
95+
96+
total_scanned_count += page["ScannedCount"]
97+
98+
if total_scanned_count % 1000 == 0:
99+
print(".", end="", flush=True) # noqa
100+
101+
if total_scanned_count % 100000 == 0:
102+
print(f"scanned={total_scanned_count}") # noqa
103+
104+
end_time = datetime.now(tz=timezone.utc)
105+
106+
print("Done") # noqa
107+
108+
return {
109+
"scanned_count": total_scanned_count,
110+
"took-secs": timedelta.total_seconds(end_time - start_time),
111+
"patients": str(found_patients),
112+
}
113+
114+
115+
if __name__ == "__main__":
116+
fire.Fire(_find_patients)

0 commit comments

Comments
 (0)