Skip to content

Commit 7cadb31

Browse files
Merge branch 'develop' into feature/made14-NRL-1739-schedule-powerbi-updates
2 parents e2325fa + 08fdf73 commit 7cadb31

File tree

4 files changed

+132
-7
lines changed

4 files changed

+132
-7
lines changed

reports/find_duplicate_pointers.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import json
2+
from datetime import datetime, timedelta, timezone
3+
from typing import Any
4+
5+
import boto3
6+
import fire
7+
8+
from nrlf.core.logger import logger
9+
10+
dynamodb = boto3.client("dynamodb")
11+
paginator = dynamodb.get_paginator("scan")
12+
13+
logger.setLevel("ERROR")
14+
15+
REQUIRED_ATTRIBUTES = [
16+
"nhs_number",
17+
"custodian",
18+
"id",
19+
"master_identifier",
20+
"type_id",
21+
"created_on",
22+
]
23+
24+
25+
def _get_duplicates(
26+
table_name: str, custodians: str | tuple[str], filename: str = "duplicates"
27+
) -> Any:
28+
"""
29+
Get masterids for duplicate pointers in the given table for a list of custodians.
30+
Parameters:
31+
- table_name: The name of the pointers table to use.
32+
- custodians: The ODS codes of the custodian(s) to check.
33+
- filename: A name for the output text file containing the list of affected pointers.
34+
"""
35+
custodian_list = (
36+
custodians.split(",") if isinstance(custodians, str) else list(custodians)
37+
)
38+
39+
print( # noqa
40+
f"Finding duplicate pointers for custodians {custodian_list} in table {table_name}...."
41+
)
42+
43+
required_attributes = REQUIRED_ATTRIBUTES
44+
45+
expression_names_str = ",".join(
46+
[f":param{custodian}" for custodian in custodian_list]
47+
)
48+
expression_values_list = {
49+
f":param{custodian}": {"S": custodian} for custodian in custodian_list
50+
}
51+
52+
params: dict[str, Any] = {
53+
"TableName": table_name,
54+
"PaginationConfig": {"PageSize": 50},
55+
"FilterExpression": f"custodian IN ({expression_names_str})",
56+
"ExpressionAttributeValues": expression_values_list,
57+
"ProjectionExpression": ",".join(required_attributes),
58+
}
59+
pointers_by_key = {}
60+
total_scanned_count = 0
61+
duplicate_count = 0
62+
duplicates_set = set()
63+
64+
start_time = datetime.now(tz=timezone.utc)
65+
66+
for page in paginator.paginate(**params):
67+
for item in page["Items"]:
68+
pointer_id = item.get("id", {}).get("S", "no-id")
69+
pointer_type = item.get("type_id", {}).get("S", "no-type")
70+
master_id = item.get("master_identifier", {}).get("S", "no-master-id")
71+
custodian = item.get("custodian", {}).get("S", "no-custodian")
72+
patient_id = item.get("nhs_number", {}).get("S", "no-patient-id")
73+
created_on = item.get("created_on", {}).get("S", "no-creation-datetime")
74+
75+
pointer_data = {
76+
"id": pointer_id,
77+
"master_id": master_id,
78+
"datetime": created_on,
79+
}
80+
81+
px_type_ods_key = f"{custodian}-{patient_id}-{pointer_type}"
82+
83+
if px_type_ods_key not in pointers_by_key:
84+
pointers_by_key[px_type_ods_key] = [pointer_data]
85+
else:
86+
pointers_by_key[px_type_ods_key].append(pointer_data)
87+
duplicate_count += 1
88+
duplicates_set.add(px_type_ods_key)
89+
90+
total_scanned_count += page["ScannedCount"]
91+
92+
if total_scanned_count % 1000 == 0:
93+
print(".", end="", flush=True) # noqa
94+
95+
if total_scanned_count % 100000 == 0:
96+
print( # noqa
97+
f"scanned={total_scanned_count} found={duplicate_count} potential duplicates "
98+
)
99+
100+
end_time = datetime.now(tz=timezone.utc)
101+
102+
print(" Table scan completed") # noqa
103+
104+
output_pointers = {}
105+
106+
for key in sorted(duplicates_set):
107+
output_pointers[key] = pointers_by_key[key]
108+
109+
print(f"Writing pointers to file ./{filename}.txt ...") # noqa
110+
with open(f"{filename}.txt", "w") as f:
111+
f.write(json.dumps(output_pointers, indent=2))
112+
113+
return {
114+
"output_file": f"{filename}.txt",
115+
"duplicates-found": duplicate_count,
116+
"scanned-count": total_scanned_count,
117+
"took-secs": timedelta.total_seconds(end_time - start_time),
118+
}
119+
120+
121+
if __name__ == "__main__":
122+
fire.Fire(_get_duplicates)

terraform/account-wide-infrastructure/prod/aws-backup.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,12 @@ module "source" {
128128
"rules" : [
129129
{
130130
"copy_action" : [{
131-
"delete_after" : 4,
131+
"delete_after" : 30,
132132
}],
133133
"lifecycle" : {
134134
"delete_after" : 2
135135
},
136-
"name" : "daily_kept_for_2_days",
136+
"name" : "daily_kept_for_30",
137137
"schedule" : "cron(0 0 * * ? *)"
138138
}
139139
],
@@ -150,7 +150,7 @@ module "source" {
150150
"name" : "daily",
151151
"schedule" : "cron(0 0 * * ? *)",
152152
"copy_action" : [{
153-
"delete_after" : 4,
153+
"delete_after" : 28,
154154
}],
155155

156156
"lifecycle" : {
@@ -161,8 +161,8 @@ module "source" {
161161
"name" : "monthly"
162162
"schedule" : "cron(30 0 ? * 4#1)" # first Thursday each month from 00:30
163163
"copy_action" : [{
164-
"cold_storage_after" : 3,
165-
"delete_after" : 100 # ensures there will always be min 3
164+
"cold_storage_after" : 35,
165+
"delete_after" : 400 # ensures 1 from previous restore test
166166
}],
167167
"lifecycle" : {
168168
"delete_after" : 2

terraform/account-wide-infrastructure/prod/dynamodb__pointers-table.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ module "prod-pointers-table" {
44
enable_deletion_protection = true
55
enable_pitr = true
66
kms_deletion_window_in_days = 30
7+
enable_backups = true
78
}
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
module "prod-permissions-store-bucket" {
2-
source = "../modules/permissions-store-bucket"
3-
name_prefix = "nhsd-nrlf--prod"
2+
source = "../modules/permissions-store-bucket"
3+
name_prefix = "nhsd-nrlf--prod"
4+
enable_backups = true
45
}
56

67
module "prod-truststore-bucket" {
78
source = "../modules/truststore-bucket"
89
name_prefix = "nhsd-nrlf--prod"
910
server_certificate_file = "../../../truststore/server/prod.pem"
11+
enable_backups = true
1012
}

0 commit comments

Comments
 (0)