@@ -21,14 +21,14 @@ def _validate_url(document: str):
2121 """
2222 docref = DocumentReference .model_validate_json (document )
2323
24- validator = DocumentReferenceValidator ()
25- result = validator .validate (data = docref )
26- for i , content in enumerate (result .content ):
24+ for content in docref .content :
2725 if content .attachment .contentType == "application/pdf" :
2826 url = content .attachment .url
2927 if url [- 1 ] == "/" :
3028 raise RuntimeError ("Malformed URL found: " + str (url ))
3129
30+ validator = DocumentReferenceValidator ()
31+ result = validator .validate (data = docref )
3232 if not result .is_valid :
3333 raise RuntimeError ("Failed to validate document: " + str (result .issues ))
3434
@@ -51,22 +51,25 @@ def _find_malformed_urls(
5151 "PaginationConfig" : {"PageSize" : 50 },
5252 }
5353
54- malformed_pointers = []
54+ malformed_pointers : list [ dict [ str , Any ]] = []
5555 total_scanned_count = 0
5656
5757 start_time = datetime .now (tz = timezone .utc )
5858
5959 for page in paginator .paginate (** params ):
6060 for item in page ["Items" ]:
61- pointer_id = item .get ("id" , {}).get ("S" )
61+ pointer_id = item .get ("id" , {}).get ("S" , "" )
6262 custodian , _ = pointer_id .split ("-" , 1 )
6363 # only need to check pointers created by the specified custodians
6464 if custodian in custodian_ods_codes :
65+ patient_number = item .get ("nhs_number" , {}).get ("S" , "" )
6566 document = item .get ("document" , {}).get ("S" , "" )
6667 try :
6768 _validate_url (document )
6869 except Exception as exc :
69- malformed_pointers .append ((pointer_id , exc ))
70+ malformed_pointers .append (
71+ {"id" : pointer_id , "patient_number" : patient_number , "err" : exc }
72+ )
7073
7174 total_scanned_count += page ["ScannedCount" ]
7275
@@ -84,8 +87,8 @@ def _find_malformed_urls(
8487
8588 print ("Writing malformed_pointers to file ./malformed_pointers.txt ..." ) # noqa
8689 with open ("malformed_pointers.txt" , "w" ) as f :
87- for _id , err in malformed_pointers :
88- f .write (f"{ _id } : { err } \n " )
90+ for malformed_pointer_info in malformed_pointers :
91+ f .write (f"{ malformed_pointer_info } \n " )
8992
9093 return {
9194 "malformed_pointers" : len (malformed_pointers ),
0 commit comments