22import json
33import os
44import tempfile
5+ from dataclasses import dataclass
56from datetime import datetime , timedelta , timezone
67from typing import Any , Dict , List
78
@@ -19,89 +20,109 @@ def _load_pointers_from_file(pointers_file: str) -> list[str]:
1920
2021 Returns a list of pointer id strings. Prints a warning for skipped malformed JSON entries.
2122 """
22- with open (pointers_file , "r" ) as fh :
23- content = fh .read ().strip ()
23+ with open (pointers_file , "r" ) as file :
24+ content = file .read ().strip ()
2425
2526 if not content :
2627 return []
2728
28- # JSON path
2929 if content .startswith ("[" ) or content .startswith ("{" ):
30- try :
31- data = json .loads (content )
32- except json .JSONDecodeError as e :
33- raise ValueError (f"Failed to parse JSON file { pointers_file } : { e } " ) from e
34-
35- if not isinstance (data , list ):
36- raise ValueError ("JSON file must contain an array of objects" )
37-
38- parsed_ids : list [str ] = []
39- skipped_count = 0
40- for item in data :
41- if (
42- isinstance (item , dict )
43- and "id" in item
44- and isinstance (item ["id" ], str )
45- and item ["id" ].strip ()
46- ):
47- parsed_ids .append (item ["id" ].strip ())
48- else :
49- skipped_count += 1
30+ return _parse_json_pointers (content , pointers_file )
31+
32+ return _parse_plain_text_pointers (content )
5033
51- if skipped_count :
52- print (
53- f"Warning: skipped { skipped_count } malformed entries in JSON file { pointers_file } "
54- )
5534
56- return parsed_ids
35+ def _parse_json_pointers (content : str , pointers_file : str ) -> list [str ]:
36+
37+ try :
38+ data = json .loads (content )
39+ except json .JSONDecodeError as e :
40+ raise ValueError (f"Failed to parse JSON file { pointers_file } : { e } " ) from e
41+
42+ if not isinstance (data , list ):
43+ raise ValueError ("JSON file must contain an array of objects" )
44+
45+ parsed_ids : list [str ] = []
46+ skipped_count = 0
47+
48+ for item in data :
49+ if _is_valid_pointer (item ):
50+ parsed_ids .append (item ["id" ].strip ())
51+ else :
52+ skipped_count += 1
5753
58- # Plain text fallback
54+ if skipped_count :
55+ # TODO should we log these?
56+ print (
57+ f"Warning: skipped { skipped_count } malformed entries in JSON file { pointers_file } "
58+ )
59+
60+ return parsed_ids
61+
62+
63+ def _is_valid_pointer (item : Any ) -> bool :
64+ return (
65+ isinstance (item , dict )
66+ and "id" in item
67+ and isinstance (item ["id" ], str )
68+ and item ["id" ].strip ()
69+ )
70+
71+
72+ def _parse_plain_text_pointers (content : str ) -> list [str ]:
5973 return [line .strip () for line in content .splitlines () if line .strip ()]
6074
6175
62- def _build_and_write_result (
63- pointers_to_delete ,
64- ods_code ,
65- matched_pointers ,
66- mismatched_pointers ,
67- not_found_pointers ,
68- pointers_deleted ,
69- failed_deletes ,
70- start_time ,
71- end_time ,
72- output_file_path ,
73- ):
76+ @dataclass
77+ class PointerDeletionContext :
78+ pointers_to_delete : list [str ]
79+ ods_code : str
80+ matched_pointers : list [str ]
81+ mismatched_pointers : list [str ]
82+ not_found_pointers : list [str ]
83+ pointers_deleted : list [str ]
84+ failed_deletes : list [str ]
85+ start_time : datetime
86+ end_time : datetime
87+ output_file_path : str
88+
89+
90+ def _build_and_write_result (ctx : PointerDeletionContext ) -> Dict [str , Any ]:
91+
7492 result = {
75- "pointers_to_delete" : len (pointers_to_delete ),
76- "ods_code" : ods_code ,
77- "ods_code_matched" : {"count" : len (matched_pointers ), "ids" : matched_pointers },
93+ "pointers_to_delete" : len (ctx .pointers_to_delete ),
94+ "ods_code" : ctx .ods_code ,
95+ "ods_code_matched" : {
96+ "count" : len (ctx .matched_pointers ),
97+ "ids" : ctx .matched_pointers ,
98+ },
7899 "ods_code_mismatched" : {
79- "count" : len (mismatched_pointers ),
80- "ids" : mismatched_pointers ,
100+ "count" : len (ctx . mismatched_pointers ),
101+ "ids" : ctx . mismatched_pointers ,
81102 },
82103 "pointer_not_found" : {
83- "count" : len (not_found_pointers ),
84- "ids" : not_found_pointers ,
104+ "count" : len (ctx . not_found_pointers ),
105+ "ids" : ctx . not_found_pointers ,
85106 },
86- "deleted_pointers" : {"count" : len (pointers_deleted ), "ids" : pointers_deleted },
87- "failed_deletes" : {"count" : len (failed_deletes ), "ids" : failed_deletes },
88- "deletes-took-secs" : timedelta .total_seconds (end_time - start_time ),
107+ "deleted_pointers" : {
108+ "count" : len (ctx .pointers_deleted ),
109+ "ids" : ctx .pointers_deleted ,
110+ },
111+ "failed_deletes" : {"count" : len (ctx .failed_deletes ), "ids" : ctx .failed_deletes },
112+ "deletes-took-secs" : timedelta .total_seconds (ctx .end_time - ctx .start_time ),
89113 }
90114 try :
91- _write_result_file (result , output_file_path )
115+ _write_result_file (result , ctx . output_file_path )
92116 except Exception as exc :
93117 result ["_output_error" ] = (
94- f"Failed to write result file { output_file_path } : { exc } "
118+ f"Failed to write result file { ctx . output_file_path } : { exc } "
95119 )
96120 _print_summary (result )
97121 return result
98122
99123
100124def _write_result_file (result : Dict [str , Any ], output_file : str ) -> None :
101- """
102- Atomically write result dict to output_file as JSON.
103- Raises on failure.
104- """
125+
105126 out_dir = os .path .dirname (os .path .abspath (output_file )) or "."
106127 with tempfile .NamedTemporaryFile (
107128 "w" , delete = False , dir = out_dir , prefix = ".tmp_delete_results_" , suffix = ".json"
@@ -115,10 +136,7 @@ def _write_result_file(result: Dict[str, Any], output_file: str) -> None:
115136def _check_pointers_match_ods_code (
116137 ods_code : str , pointer_ids : List [str ]
117138) -> tuple [List [str ], List [str ]]:
118- """
119- Validate that pointer IDs are in line with the provided ODS code.
120- Returns (matched_ids, mismatched_ids)
121- """
139+
122140 matched = []
123141 mismatched = []
124142
@@ -173,7 +191,6 @@ def _batch_delete_pointers(
173191) -> tuple [List [str ], List [str ]]:
174192 """
175193 Delete pointers using BatchWriteItem (max 25 items per request).
176- Returns (deleted_ids, failed_ids)
177194 """
178195 pointers_deleted = []
179196 failed_deletes_set : set [str ] = set ()
@@ -212,9 +229,6 @@ def _batch_delete_pointers(
212229
213230
214231def _print_summary (result : Dict [str , Any ]) -> None :
215- """
216- Print a concise summary of the result to stdout.
217- """
218232
219233 def count_from (field ):
220234 val = result .get (field )
@@ -256,38 +270,46 @@ def _delete_pointers_by_id(
256270 - ods_code: ODS code of the organisation that the pointers belong to
257271 - pointers_to_delete: list of pointer ids to delete
258272 - pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line)
273+
274+ Sample usage:
275+ - Delete by list of ids:
276+ python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_to_delete '["ABC123-12345678910", "ABC123-109876543210"]'
277+ - Delete by JSON file:
278+ python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/pointers.json
279+ - Delete by text file:
280+ python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/ids.txt
259281 """
260282 if pointers_to_delete is None and pointers_file is None :
261283 raise ValueError ("Provide either pointers_to_delete or pointers_file" )
262284
263285 if pointers_to_delete is not None and pointers_file is not None :
264286 raise ValueError ("Provide either pointers_to_delete or pointers_file, not both" )
265287
266- # Load pointers from file if provided
267288 if pointers_file :
268289 pointers_to_delete = _load_pointers_from_file (pointers_file )
269290
270- # establish start_time early so any early-return can use it for filename
271291 start_time = datetime .now (tz = timezone .utc )
272- stamp = start_time .strftime ("%Y%m%dT%H%M%SZ" )
292+ timestamp = start_time .strftime ("%Y%m%dT%H%M%SZ" )
273293 script_dir = os .path .dirname (os .path .abspath (__file__ )) or "."
274294 output_file_path = os .path .join (
275- script_dir , f"delete_results_{ ods_code } _{ stamp } .json"
295+ script_dir , f"delete_results_{ ods_code } _{ timestamp } .json"
276296 )
277297
278298 if not pointers_to_delete :
279299 end_time = datetime .now (tz = timezone .utc )
280300 return _build_and_write_result (
281- pointers_to_delete ,
282- ods_code ,
283- [],
284- [],
285- [],
286- [],
287- [],
288- start_time ,
289- end_time ,
290- output_file_path ,
301+ PointerDeletionContext (
302+ pointers_to_delete = pointers_to_delete ,
303+ ods_code = ods_code ,
304+ matched_pointers = [],
305+ mismatched_pointers = [],
306+ not_found_pointers = [],
307+ pointers_deleted = [],
308+ failed_deletes = [],
309+ start_time = start_time ,
310+ end_time = end_time ,
311+ output_file_path = output_file_path ,
312+ )
291313 )
292314
293315 print (
@@ -305,16 +327,18 @@ def _delete_pointers_by_id(
305327 print (f"None of the pointer IDs are a match for ODS code { ods_code } . Exiting." )
306328 end_time = datetime .now (tz = timezone .utc )
307329 return _build_and_write_result (
308- pointers_to_delete ,
309- ods_code ,
310- matched_pointers ,
311- mismatched_pointers ,
312- [],
313- [],
314- [],
315- start_time ,
316- end_time ,
317- output_file_path ,
330+ PointerDeletionContext (
331+ pointers_to_delete = pointers_to_delete ,
332+ ods_code = ods_code ,
333+ matched_pointers = matched_pointers ,
334+ mismatched_pointers = mismatched_pointers ,
335+ not_found_pointers = [],
336+ pointers_deleted = [],
337+ failed_deletes = [],
338+ start_time = start_time ,
339+ end_time = end_time ,
340+ output_file_path = output_file_path ,
341+ )
318342 )
319343
320344 print (f"Checking existence of { len (matched_pointers )} pointers in { table_name } ..." )
@@ -330,16 +354,18 @@ def _delete_pointers_by_id(
330354 print ("No pointers found to delete. Exiting." )
331355 end_time = datetime .now (tz = timezone .utc )
332356 return _build_and_write_result (
333- pointers_to_delete ,
334- ods_code ,
335- matched_pointers ,
336- mismatched_pointers ,
337- not_found_pointers ,
338- [],
339- [],
340- start_time ,
341- end_time ,
342- output_file_path ,
357+ PointerDeletionContext (
358+ pointers_to_delete = pointers_to_delete ,
359+ ods_code = ods_code ,
360+ matched_pointers = matched_pointers ,
361+ mismatched_pointers = mismatched_pointers ,
362+ not_found_pointers = not_found_pointers ,
363+ pointers_deleted = [],
364+ failed_deletes = [],
365+ start_time = start_time ,
366+ end_time = end_time ,
367+ output_file_path = output_file_path ,
368+ )
343369 )
344370
345371 # Proceed with deletion using BatchWriteItem
@@ -349,16 +375,18 @@ def _delete_pointers_by_id(
349375
350376 end_time = datetime .now (tz = timezone .utc )
351377 result = _build_and_write_result (
352- pointers_to_delete ,
353- ods_code ,
354- matched_pointers ,
355- mismatched_pointers ,
356- not_found_pointers ,
357- pointers_deleted ,
358- failed_deletes ,
359- start_time ,
360- end_time ,
361- output_file_path ,
378+ PointerDeletionContext (
379+ pointers_to_delete = pointers_to_delete ,
380+ ods_code = ods_code ,
381+ matched_pointers = matched_pointers ,
382+ mismatched_pointers = mismatched_pointers ,
383+ not_found_pointers = not_found_pointers ,
384+ pointers_deleted = pointers_deleted ,
385+ failed_deletes = failed_deletes ,
386+ start_time = start_time ,
387+ end_time = end_time ,
388+ output_file_path = output_file_path ,
389+ )
362390 )
363391 print (" Done" )
364392 return result
0 commit comments