1+ import json
12from datetime import datetime , timedelta , timezone
23from typing import Any
34
2122]
2223
2324
24- def _get_duplicates (table_name : str , custodians : str | tuple [str ]) -> Any :
25+ def _get_duplicates (
26+ table_name : str , custodians : str | tuple [str ], filename : str = "duplicates"
27+ ) -> Any :
2528 """
2629 Get masterids for duplicate pointers in the given table for a list of custodians.
2730 Parameters:
2831 - table_name: The name of the pointers table to use.
32+ - custodians: The ODS codes of the custodian(s) to check.
33+ - filename: A name for the output text file containing the list of affected pointers.
2934 """
3035 custodian_list = (
3136 custodians .split ("," ) if isinstance (custodians , str ) else list (custodians )
@@ -73,7 +78,7 @@ def _get_duplicates(table_name: str, custodians: str | tuple[str]) -> Any:
7378 "datetime" : created_on ,
7479 }
7580
76- px_type_ods_key = f"{ patient_id } -{ custodian } -{ pointer_type } "
81+ px_type_ods_key = f"{ custodian } -{ patient_id } -{ pointer_type } "
7782
7883 if px_type_ods_key not in pointers_by_key :
7984 pointers_by_key [px_type_ods_key ] = [pointer_data ]
@@ -96,11 +101,17 @@ def _get_duplicates(table_name: str, custodians: str | tuple[str]) -> Any:
96101
97102 print (" Table scan completed" ) # noqa
98103
99- for key in duplicates_set :
100- print (f"Duplicates for { key } :" ) # noqa
101- print (pointers_by_key [key ]) # noqa
104+ output_pointers = dict ()
105+
106+ for key in sorted (duplicates_set ):
107+ output_pointers [key ] = pointers_by_key [key ]
108+
109+ print (f"Writing pointers to file ./{ filename } .txt ..." ) # noqa
110+ with open (f"{ filename } .txt" , "w" ) as f :
111+ f .write (json .dumps (output_pointers , indent = 2 ))
102112
103113 return {
114+ "output_file" : f"{ filename } .txt" ,
104115 "duplicates-found" : duplicate_count ,
105116 "scanned-count" : total_scanned_count ,
106117 "took-secs" : timedelta .total_seconds (end_time - start_time ),
0 commit comments