Skip to content

Commit 884a3d0

Browse files
NRL-1798 Add dataclass and update tests
1 parent 2f192ac commit 884a3d0

File tree

2 files changed

+236
-126
lines changed

2 files changed

+236
-126
lines changed

scripts/delete_pointers_by_id.py

Lines changed: 137 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import os
44
import tempfile
5+
from dataclasses import dataclass
56
from datetime import datetime, timedelta, timezone
67
from typing import Any, Dict, List
78

@@ -19,89 +20,109 @@ def _load_pointers_from_file(pointers_file: str) -> list[str]:
1920
2021
Returns a list of pointer id strings. Prints a warning for skipped malformed JSON entries.
2122
"""
22-
with open(pointers_file, "r") as fh:
23-
content = fh.read().strip()
23+
with open(pointers_file, "r") as file:
24+
content = file.read().strip()
2425

2526
if not content:
2627
return []
2728

28-
# JSON path
2929
if content.startswith("[") or content.startswith("{"):
30-
try:
31-
data = json.loads(content)
32-
except json.JSONDecodeError as e:
33-
raise ValueError(f"Failed to parse JSON file {pointers_file}: {e}") from e
34-
35-
if not isinstance(data, list):
36-
raise ValueError("JSON file must contain an array of objects")
37-
38-
parsed_ids: list[str] = []
39-
skipped_count = 0
40-
for item in data:
41-
if (
42-
isinstance(item, dict)
43-
and "id" in item
44-
and isinstance(item["id"], str)
45-
and item["id"].strip()
46-
):
47-
parsed_ids.append(item["id"].strip())
48-
else:
49-
skipped_count += 1
30+
return _parse_json_pointers(content, pointers_file)
31+
32+
return _parse_plain_text_pointers(content)
5033

51-
if skipped_count:
52-
print(
53-
f"Warning: skipped {skipped_count} malformed entries in JSON file {pointers_file}"
54-
)
5534

56-
return parsed_ids
35+
def _parse_json_pointers(content: str, pointers_file: str) -> list[str]:
36+
37+
try:
38+
data = json.loads(content)
39+
except json.JSONDecodeError as e:
40+
raise ValueError(f"Failed to parse JSON file {pointers_file}: {e}") from e
41+
42+
if not isinstance(data, list):
43+
raise ValueError("JSON file must contain an array of objects")
44+
45+
parsed_ids: list[str] = []
46+
skipped_count = 0
47+
48+
for item in data:
49+
if _is_valid_pointer(item):
50+
parsed_ids.append(item["id"].strip())
51+
else:
52+
skipped_count += 1
5753

58-
# Plain text fallback
54+
if skipped_count:
55+
# TODO should we log these?
56+
print(
57+
f"Warning: skipped {skipped_count} malformed entries in JSON file {pointers_file}"
58+
)
59+
60+
return parsed_ids
61+
62+
63+
def _is_valid_pointer(item: Any) -> bool:
64+
return (
65+
isinstance(item, dict)
66+
and "id" in item
67+
and isinstance(item["id"], str)
68+
and item["id"].strip()
69+
)
70+
71+
72+
def _parse_plain_text_pointers(content: str) -> list[str]:
5973
return [line.strip() for line in content.splitlines() if line.strip()]
6074

6175

62-
def _build_and_write_result(
63-
pointers_to_delete,
64-
ods_code,
65-
matched_pointers,
66-
mismatched_pointers,
67-
not_found_pointers,
68-
pointers_deleted,
69-
failed_deletes,
70-
start_time,
71-
end_time,
72-
output_file_path,
73-
):
76+
@dataclass
77+
class PointerDeletionContext:
78+
pointers_to_delete: list[str]
79+
ods_code: str
80+
matched_pointers: list[str]
81+
mismatched_pointers: list[str]
82+
not_found_pointers: list[str]
83+
pointers_deleted: list[str]
84+
failed_deletes: list[str]
85+
start_time: datetime
86+
end_time: datetime
87+
output_file_path: str
88+
89+
90+
def _build_and_write_result(ctx: PointerDeletionContext) -> Dict[str, Any]:
91+
7492
result = {
75-
"pointers_to_delete": len(pointers_to_delete),
76-
"ods_code": ods_code,
77-
"ods_code_matched": {"count": len(matched_pointers), "ids": matched_pointers},
93+
"pointers_to_delete": len(ctx.pointers_to_delete),
94+
"ods_code": ctx.ods_code,
95+
"ods_code_matched": {
96+
"count": len(ctx.matched_pointers),
97+
"ids": ctx.matched_pointers,
98+
},
7899
"ods_code_mismatched": {
79-
"count": len(mismatched_pointers),
80-
"ids": mismatched_pointers,
100+
"count": len(ctx.mismatched_pointers),
101+
"ids": ctx.mismatched_pointers,
81102
},
82103
"pointer_not_found": {
83-
"count": len(not_found_pointers),
84-
"ids": not_found_pointers,
104+
"count": len(ctx.not_found_pointers),
105+
"ids": ctx.not_found_pointers,
85106
},
86-
"deleted_pointers": {"count": len(pointers_deleted), "ids": pointers_deleted},
87-
"failed_deletes": {"count": len(failed_deletes), "ids": failed_deletes},
88-
"deletes-took-secs": timedelta.total_seconds(end_time - start_time),
107+
"deleted_pointers": {
108+
"count": len(ctx.pointers_deleted),
109+
"ids": ctx.pointers_deleted,
110+
},
111+
"failed_deletes": {"count": len(ctx.failed_deletes), "ids": ctx.failed_deletes},
112+
"deletes-took-secs": timedelta.total_seconds(ctx.end_time - ctx.start_time),
89113
}
90114
try:
91-
_write_result_file(result, output_file_path)
115+
_write_result_file(result, ctx.output_file_path)
92116
except Exception as exc:
93117
result["_output_error"] = (
94-
f"Failed to write result file {output_file_path}: {exc}"
118+
f"Failed to write result file {ctx.output_file_path}: {exc}"
95119
)
96120
_print_summary(result)
97121
return result
98122

99123

100124
def _write_result_file(result: Dict[str, Any], output_file: str) -> None:
101-
"""
102-
Atomically write result dict to output_file as JSON.
103-
Raises on failure.
104-
"""
125+
105126
out_dir = os.path.dirname(os.path.abspath(output_file)) or "."
106127
with tempfile.NamedTemporaryFile(
107128
"w", delete=False, dir=out_dir, prefix=".tmp_delete_results_", suffix=".json"
@@ -115,10 +136,7 @@ def _write_result_file(result: Dict[str, Any], output_file: str) -> None:
115136
def _check_pointers_match_ods_code(
116137
ods_code: str, pointer_ids: List[str]
117138
) -> tuple[List[str], List[str]]:
118-
"""
119-
Validate that pointer IDs are in line with the provided ODS code.
120-
Returns (matched_ids, mismatched_ids)
121-
"""
139+
122140
matched = []
123141
mismatched = []
124142

@@ -173,7 +191,6 @@ def _batch_delete_pointers(
173191
) -> tuple[List[str], List[str]]:
174192
"""
175193
Delete pointers using BatchWriteItem (max 25 items per request).
176-
Returns (deleted_ids, failed_ids)
177194
"""
178195
pointers_deleted = []
179196
failed_deletes_set: set[str] = set()
@@ -212,9 +229,6 @@ def _batch_delete_pointers(
212229

213230

214231
def _print_summary(result: Dict[str, Any]) -> None:
215-
"""
216-
Print a concise summary of the result to stdout.
217-
"""
218232

219233
def count_from(field):
220234
val = result.get(field)
@@ -256,38 +270,46 @@ def _delete_pointers_by_id(
256270
- ods_code: ODS code of the organisation that the pointers belong to
257271
- pointers_to_delete: list of pointer ids to delete
258272
- pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line)
273+
274+
Sample usage:
275+
- Delete by list of ids:
276+
python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_to_delete '["ABC123-12345678910", "ABC123-109876543210"]'
277+
- Delete by JSON file:
278+
python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/pointers.json
279+
- Delete by text file:
280+
python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/ids.txt
259281
"""
260282
if pointers_to_delete is None and pointers_file is None:
261283
raise ValueError("Provide either pointers_to_delete or pointers_file")
262284

263285
if pointers_to_delete is not None and pointers_file is not None:
264286
raise ValueError("Provide either pointers_to_delete or pointers_file, not both")
265287

266-
# Load pointers from file if provided
267288
if pointers_file:
268289
pointers_to_delete = _load_pointers_from_file(pointers_file)
269290

270-
# establish start_time early so any early-return can use it for filename
271291
start_time = datetime.now(tz=timezone.utc)
272-
stamp = start_time.strftime("%Y%m%dT%H%M%SZ")
292+
timestamp = start_time.strftime("%Y%m%dT%H%M%SZ")
273293
script_dir = os.path.dirname(os.path.abspath(__file__)) or "."
274294
output_file_path = os.path.join(
275-
script_dir, f"delete_results_{ods_code}_{stamp}.json"
295+
script_dir, f"delete_results_{ods_code}_{timestamp}.json"
276296
)
277297

278298
if not pointers_to_delete:
279299
end_time = datetime.now(tz=timezone.utc)
280300
return _build_and_write_result(
281-
pointers_to_delete,
282-
ods_code,
283-
[],
284-
[],
285-
[],
286-
[],
287-
[],
288-
start_time,
289-
end_time,
290-
output_file_path,
301+
PointerDeletionContext(
302+
pointers_to_delete=pointers_to_delete,
303+
ods_code=ods_code,
304+
matched_pointers=[],
305+
mismatched_pointers=[],
306+
not_found_pointers=[],
307+
pointers_deleted=[],
308+
failed_deletes=[],
309+
start_time=start_time,
310+
end_time=end_time,
311+
output_file_path=output_file_path,
312+
)
291313
)
292314

293315
print(
@@ -305,16 +327,18 @@ def _delete_pointers_by_id(
305327
print(f"None of the pointer IDs are a match for ODS code {ods_code}. Exiting.")
306328
end_time = datetime.now(tz=timezone.utc)
307329
return _build_and_write_result(
308-
pointers_to_delete,
309-
ods_code,
310-
matched_pointers,
311-
mismatched_pointers,
312-
[],
313-
[],
314-
[],
315-
start_time,
316-
end_time,
317-
output_file_path,
330+
PointerDeletionContext(
331+
pointers_to_delete=pointers_to_delete,
332+
ods_code=ods_code,
333+
matched_pointers=matched_pointers,
334+
mismatched_pointers=mismatched_pointers,
335+
not_found_pointers=[],
336+
pointers_deleted=[],
337+
failed_deletes=[],
338+
start_time=start_time,
339+
end_time=end_time,
340+
output_file_path=output_file_path,
341+
)
318342
)
319343

320344
print(f"Checking existence of {len(matched_pointers)} pointers in {table_name}...")
@@ -330,16 +354,18 @@ def _delete_pointers_by_id(
330354
print("No pointers found to delete. Exiting.")
331355
end_time = datetime.now(tz=timezone.utc)
332356
return _build_and_write_result(
333-
pointers_to_delete,
334-
ods_code,
335-
matched_pointers,
336-
mismatched_pointers,
337-
not_found_pointers,
338-
[],
339-
[],
340-
start_time,
341-
end_time,
342-
output_file_path,
357+
PointerDeletionContext(
358+
pointers_to_delete=pointers_to_delete,
359+
ods_code=ods_code,
360+
matched_pointers=matched_pointers,
361+
mismatched_pointers=mismatched_pointers,
362+
not_found_pointers=not_found_pointers,
363+
pointers_deleted=[],
364+
failed_deletes=[],
365+
start_time=start_time,
366+
end_time=end_time,
367+
output_file_path=output_file_path,
368+
)
343369
)
344370

345371
# Proceed with deletion using BatchWriteItem
@@ -349,16 +375,18 @@ def _delete_pointers_by_id(
349375

350376
end_time = datetime.now(tz=timezone.utc)
351377
result = _build_and_write_result(
352-
pointers_to_delete,
353-
ods_code,
354-
matched_pointers,
355-
mismatched_pointers,
356-
not_found_pointers,
357-
pointers_deleted,
358-
failed_deletes,
359-
start_time,
360-
end_time,
361-
output_file_path,
378+
PointerDeletionContext(
379+
pointers_to_delete=pointers_to_delete,
380+
ods_code=ods_code,
381+
matched_pointers=matched_pointers,
382+
mismatched_pointers=mismatched_pointers,
383+
not_found_pointers=not_found_pointers,
384+
pointers_deleted=pointers_deleted,
385+
failed_deletes=failed_deletes,
386+
start_time=start_time,
387+
end_time=end_time,
388+
output_file_path=output_file_path,
389+
)
362390
)
363391
print(" Done")
364392
return result

0 commit comments

Comments
 (0)