Skip to content

Commit bbcf87f

Browse files
committed
NRL-1705 save output as json in textfile
1 parent 3d8c4e1 commit bbcf87f

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

reports/find_duplicate_pointers.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from datetime import datetime, timedelta, timezone
23
from typing import Any
34

@@ -21,11 +22,15 @@
2122
]
2223

2324

24-
def _get_duplicates(table_name: str, custodians: str | tuple[str]) -> Any:
25+
def _get_duplicates(
26+
table_name: str, custodians: str | tuple[str], filename: str = "duplicates"
27+
) -> Any:
2528
"""
2629
Get masterids for duplicate pointers in the given table for a list of custodians.
2730
Parameters:
2831
- table_name: The name of the pointers table to use.
32+
- custodians: The ODS codes of the custodian(s) to check.
33+
- filename: A name for the output text file containing the list of affected pointers.
2934
"""
3035
custodian_list = (
3136
custodians.split(",") if isinstance(custodians, str) else list(custodians)
@@ -73,7 +78,7 @@ def _get_duplicates(table_name: str, custodians: str | tuple[str]) -> Any:
7378
"datetime": created_on,
7479
}
7580

76-
px_type_ods_key = f"{patient_id}-{custodian}-{pointer_type}"
81+
px_type_ods_key = f"{custodian}-{patient_id}-{pointer_type}"
7782

7883
if px_type_ods_key not in pointers_by_key:
7984
pointers_by_key[px_type_ods_key] = [pointer_data]
@@ -96,11 +101,17 @@ def _get_duplicates(table_name: str, custodians: str | tuple[str]) -> Any:
96101

97102
print(" Table scan completed") # noqa
98103

99-
for key in duplicates_set:
100-
print(f"Duplicates for {key}:") # noqa
101-
print(pointers_by_key[key]) # noqa
104+
output_pointers = dict()
105+
106+
for key in sorted(duplicates_set):
107+
output_pointers[key] = pointers_by_key[key]
108+
109+
print(f"Writing pointers to file ./{filename}.txt ...") # noqa
110+
with open(f"{filename}.txt", "w") as f:
111+
f.write(json.dumps(output_pointers, indent=2))
102112

103113
return {
114+
"output_file": f"{filename}.txt",
104115
"duplicates-found": duplicate_count,
105116
"scanned-count": total_scanned_count,
106117
"took-secs": timedelta.total_seconds(end_time - start_time),

0 commit comments

Comments
 (0)