Skip to content

Commit ef1435f

Browse files
Merge pull request #1043 from NHSDigital/feature/made14-NRL-1606-masterids-report
[NRL-1606] Add report to get masterids for pointers
2 parents a4aeccd + 63e5d45 commit ef1435f

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import json
2+
import os
3+
from datetime import datetime, timedelta, timezone
4+
from typing import Any
5+
6+
import boto3
7+
import fire
8+
9+
INCLUDE_PATIENT_IDS = os.environ.get("INCLUDE_PATIENT_IDS", "false").lower() == "true"
10+
11+
dynamodb = boto3.client("dynamodb")
12+
paginator = dynamodb.get_paginator("scan")
13+
14+
15+
def _get_masterids_for_custodians(table_name: str, custodians: str | tuple[str]) -> Any:
16+
"""
17+
Get masterids for pointers in the given table for a list of custodians.
18+
Parameters:
19+
- table_name: The name of the pointers table to use.
20+
"""
21+
custodian_list = (
22+
custodians.split(",") if isinstance(custodians, str) else list(custodians)
23+
)
24+
25+
print( # noqa
26+
f"Getting masterids for custodians {custodian_list} in table {table_name}...."
27+
)
28+
29+
required_attributes = ["id", "type_id", "master_identifier", "custodian"]
30+
if INCLUDE_PATIENT_IDS:
31+
required_attributes.append("nhs_number")
32+
33+
expression_names_str = ",".join(
34+
[f":param{custodian}" for custodian in custodian_list]
35+
)
36+
expression_values_list = {
37+
f":param{custodian}": {"S": custodian} for custodian in custodian_list
38+
}
39+
40+
params: dict[str, Any] = {
41+
"TableName": table_name,
42+
"PaginationConfig": {"PageSize": 50},
43+
"FilterExpression": f"custodian IN ({expression_names_str})",
44+
"ExpressionAttributeValues": expression_values_list,
45+
"ProjectionExpression": ",".join(required_attributes),
46+
}
47+
48+
pointers_info: list[dict[str, str]] = []
49+
total_scanned_count = 0
50+
51+
start_time = datetime.now(tz=timezone.utc)
52+
53+
for page in paginator.paginate(**params):
54+
for item in page["Items"]:
55+
pointer_id = item.get("id", {}).get("S", "no-id")
56+
pointer_type = item.get("type_id", {}).get("S", "no-type")
57+
master_id = item.get("master_identifier", {}).get("S", "no-master-id")
58+
custodian = item.get("custodian", {}).get("S", "no-custodian")
59+
patient_id = item.get("nhs_number", {}).get("S", "no-patient-id")
60+
61+
pointers_info.append(
62+
{
63+
"nrl-id": pointer_id,
64+
"pointer-type": pointer_type,
65+
"master_identifier": master_id,
66+
"custodian": custodian,
67+
"patient_id": patient_id if INCLUDE_PATIENT_IDS else "not-included",
68+
}
69+
)
70+
71+
total_scanned_count += page["ScannedCount"]
72+
73+
if total_scanned_count % 1000 == 0:
74+
print(".", end="", flush=True) # noqa
75+
76+
if total_scanned_count % 100000 == 0:
77+
print(f"scanned={total_scanned_count} found={len(pointers_info)} ") # noqa
78+
79+
end_time = datetime.now(tz=timezone.utc)
80+
81+
print(" Done") # noqa
82+
83+
print(f"Writing pointers to file ./pointer-masterids.txt ...") # noqa
84+
with open("pointer-masterids.txt", "w") as f:
85+
f.write(json.dumps(pointers_info, indent=2))
86+
87+
return {
88+
"output-file": "pointer-masterids.txt",
89+
"pointers-found": len(pointers_info),
90+
"scanned-count": total_scanned_count,
91+
"took-secs": timedelta.total_seconds(end_time - start_time),
92+
}
93+
94+
95+
if __name__ == "__main__":
96+
fire.Fire(_get_masterids_for_custodians)

0 commit comments

Comments
 (0)