Skip to content

Commit aae468a

Browse files
NRL-1277 Fix invalid pointers from file
1 parent f1d9045 commit aae468a

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed

scripts/delete_all_invalid_pointers.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,9 +228,105 @@ def _fix_invalid_pointers(table_name: str) -> dict[str, Any]:
228228
}
229229

230230

231+
def _fix_invalid_pointers_from_file(table_name: str, file_path: str) -> dict[str, Any]:
232+
print(f"Fixing invalid pointers from file {file_path} in table {table_name}....")
233+
234+
with open(file_path, "r") as f:
235+
pointer_ids = [line.split(":")[0] for line in f.readlines()]
236+
237+
fixed_pointers = []
238+
total_fixed_count = 0
239+
240+
start_time = datetime.now(tz=timezone.utc)
241+
242+
for pointer_id in pointer_ids:
243+
try:
244+
response = dynamodb.get_item(
245+
TableName=table_name,
246+
Key={"pk": {"S": f"D#{pointer_id}"}, "sk": {"S": f"D#{pointer_id}"}}
247+
)
248+
item = response.get("Item")
249+
if not item:
250+
print(f"Pointer {pointer_id} not found.")
251+
continue
252+
253+
document = item.get("document", {}).get("S", "")
254+
255+
# if "https://fhir.nhs.uk/STU3/CodeSystem/NRL-FormatCode-1" in document:
256+
# document = document.replace(
257+
# "https://fhir.nhs.uk/STU3/CodeSystem/NRL-FormatCode-1",
258+
# "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode",
259+
# )
260+
# resource.Table(table_name).update_item(
261+
# Key={"pk": f"D#{pointer_id}", "sk": f"D#{pointer_id}"},
262+
# UpdateExpression="SET document = :d",
263+
# ExpressionAttributeValues={":d": document},
264+
# )
265+
# fixed_pointers.append(pointer_id)
266+
# total_fixed_count += 1
267+
268+
docref: DocumentReference = DocumentReference.model_validate_json(document)
269+
if docref.content[0].attachment.contentType.startswith("application/pdf") and len(docref.content[0].attachment.contentType) > len("application/pdf"):
270+
docref.content[0].attachment.contentType = "application/pdf"
271+
resource.Table(table_name).update_item(
272+
Key={"pk": f"D#{pointer_id}", "sk": f"D#{pointer_id}"},
273+
UpdateExpression="SET document = :d",
274+
ExpressionAttributeValues={":d": docref.json()},
275+
)
276+
fixed_pointers.append(pointer_id)
277+
total_fixed_count += 1
278+
279+
if docref.content[0].attachment.url.startswith("ssp://") and docref.content[0].attachment.contentType != "application/pdf":
280+
docref.content[0].attachment.contentType = "application/pdf"
281+
resource.Table(table_name).update_item(
282+
Key={"pk": f"D#{pointer_id}", "sk": f"D#{pointer_id}"},
283+
UpdateExpression="SET document = :d",
284+
ExpressionAttributeValues={":d": docref.json()},
285+
)
286+
fixed_pointers.append(pointer_id)
287+
total_fixed_count += 1
288+
289+
if docref.content[0].attachment.contentType == "application/pdf" and docref.content[0].attachment.url.endswith("pdf"):
290+
if docref.content[0].attachment.url.startswith("https://"):
291+
docref.content[0].attachment.url.replace(("https://"), ("ssp://"))
292+
293+
if docref.content[0].format.code == "urn:nhs-ic:record-contact" or docref.content[0].format.display == "Contact details (HTTP Unsecured)":
294+
docref.content[0].format.code = "urn:nhs-ic:unstructured"
295+
docref.content[0].format.display = "Unstructured Document"
296+
resource.Table(table_name).update_item(
297+
Key={"pk": f"D#{pointer_id}", "sk": f"D#{pointer_id}"},
298+
UpdateExpression="SET document = :d",
299+
ExpressionAttributeValues={":d": docref.json()},
300+
)
301+
fixed_pointers.append(pointer_id)
302+
total_fixed_count += 1
303+
304+
except Exception as exc:
305+
print(f"Failed to fix document {pointer_id}: {exc}")
306+
307+
if total_fixed_count % 100 == 0:
308+
print("x", end="", flush=True)
309+
310+
end_time = datetime.now(tz=timezone.utc)
311+
312+
print(f" Done. Fixed {len(fixed_pointers)} invalid pointers")
313+
314+
if len(fixed_pointers) > 0:
315+
print("Writing fixed pointers IDs to file ./fixed_pointers_from_file.txt ...")
316+
with open("fixed_pointers_from_file.txt", "w") as f:
317+
for _id in fixed_pointers:
318+
f.write(f"{_id}\n")
319+
320+
return {
321+
"fixed_pointers": fixed_pointers,
322+
"total_fixed_count": total_fixed_count,
323+
"fix-took-secs": timedelta.total_seconds(end_time - start_time),
324+
}
325+
231326
if __name__ == "__main__":
232327
fire.Fire({
233328
"find_and_delete_invalid_pointers": _find_and_delete_invalid_pointers,
234329
"fix_invalid_pointers": _fix_invalid_pointers,
235330
"find_invalid_pointers": _find_invalid_pointers,
331+
"fix_invalid_pointers_from_file": _fix_invalid_pointers_from_file,
236332
})

0 commit comments

Comments
 (0)