@@ -228,9 +228,105 @@ def _fix_invalid_pointers(table_name: str) -> dict[str, Any]:
228228 }
229229
230230
231+ def _fix_invalid_pointers_from_file (table_name : str , file_path : str ) -> dict [str , Any ]:
232+ print (f"Fixing invalid pointers from file { file_path } in table { table_name } ...." )
233+
234+ with open (file_path , "r" ) as f :
235+ pointer_ids = [line .split (":" )[0 ] for line in f .readlines ()]
236+
237+ fixed_pointers = []
238+ total_fixed_count = 0
239+
240+ start_time = datetime .now (tz = timezone .utc )
241+
242+ for pointer_id in pointer_ids :
243+ try :
244+ response = dynamodb .get_item (
245+ TableName = table_name ,
246+ Key = {"pk" : {"S" : f"D#{ pointer_id } " }, "sk" : {"S" : f"D#{ pointer_id } " }}
247+ )
248+ item = response .get ("Item" )
249+ if not item :
250+ print (f"Pointer { pointer_id } not found." )
251+ continue
252+
253+ document = item .get ("document" , {}).get ("S" , "" )
254+
255+ # if "https://fhir.nhs.uk/STU3/CodeSystem/NRL-FormatCode-1" in document:
256+ # document = document.replace(
257+ # "https://fhir.nhs.uk/STU3/CodeSystem/NRL-FormatCode-1",
258+ # "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode",
259+ # )
260+ # resource.Table(table_name).update_item(
261+ # Key={"pk": f"D#{pointer_id}", "sk": f"D#{pointer_id}"},
262+ # UpdateExpression="SET document = :d",
263+ # ExpressionAttributeValues={":d": document},
264+ # )
265+ # fixed_pointers.append(pointer_id)
266+ # total_fixed_count += 1
267+
268+ docref : DocumentReference = DocumentReference .model_validate_json (document )
269+ if docref .content [0 ].attachment .contentType .startswith ("application/pdf" ) and len (docref .content [0 ].attachment .contentType ) > len ("application/pdf" ):
270+ docref .content [0 ].attachment .contentType = "application/pdf"
271+ resource .Table (table_name ).update_item (
272+ Key = {"pk" : f"D#{ pointer_id } " , "sk" : f"D#{ pointer_id } " },
273+ UpdateExpression = "SET document = :d" ,
274+ ExpressionAttributeValues = {":d" : docref .json ()},
275+ )
276+ fixed_pointers .append (pointer_id )
277+ total_fixed_count += 1
278+
279+ if docref .content [0 ].attachment .url .startswith ("ssp://" ) and docref .content [0 ].attachment .contentType != "application/pdf" :
280+ docref .content [0 ].attachment .contentType = "application/pdf"
281+ resource .Table (table_name ).update_item (
282+ Key = {"pk" : f"D#{ pointer_id } " , "sk" : f"D#{ pointer_id } " },
283+ UpdateExpression = "SET document = :d" ,
284+ ExpressionAttributeValues = {":d" : docref .json ()},
285+ )
286+ fixed_pointers .append (pointer_id )
287+ total_fixed_count += 1
288+
289+ if docref .content [0 ].attachment .contentType == "application/pdf" and docref .content [0 ].attachment .url .endswith ("pdf" ):
290+ if docref .content [0 ].attachment .url .startswith ("https://" ):
291+ docref .content [0 ].attachment .url .replace (("https://" ), ("ssp://" ))
292+
293+ if docref .content [0 ].format .code == "urn:nhs-ic:record-contact" or docref .content [0 ].format .display == "Contact details (HTTP Unsecured)" :
294+ docref .content [0 ].format .code = "urn:nhs-ic:unstructured"
295+ docref .content [0 ].format .display = "Unstructured Document"
296+ resource .Table (table_name ).update_item (
297+ Key = {"pk" : f"D#{ pointer_id } " , "sk" : f"D#{ pointer_id } " },
298+ UpdateExpression = "SET document = :d" ,
299+ ExpressionAttributeValues = {":d" : docref .json ()},
300+ )
301+ fixed_pointers .append (pointer_id )
302+ total_fixed_count += 1
303+
304+ except Exception as exc :
305+ print (f"Failed to fix document { pointer_id } : { exc } " )
306+
307+ if total_fixed_count % 100 == 0 :
308+ print ("x" , end = "" , flush = True )
309+
310+ end_time = datetime .now (tz = timezone .utc )
311+
312+ print (f" Done. Fixed { len (fixed_pointers )} invalid pointers" )
313+
314+ if len (fixed_pointers ) > 0 :
315+ print ("Writing fixed pointers IDs to file ./fixed_pointers_from_file.txt ..." )
316+ with open ("fixed_pointers_from_file.txt" , "w" ) as f :
317+ for _id in fixed_pointers :
318+ f .write (f"{ _id } \n " )
319+
320+ return {
321+ "fixed_pointers" : fixed_pointers ,
322+ "total_fixed_count" : total_fixed_count ,
323+ "fix-took-secs" : timedelta .total_seconds (end_time - start_time ),
324+ }
325+
231326if __name__ == "__main__" :
232327 fire .Fire ({
233328 "find_and_delete_invalid_pointers" : _find_and_delete_invalid_pointers ,
234329 "fix_invalid_pointers" : _fix_invalid_pointers ,
235330 "find_invalid_pointers" : _find_invalid_pointers ,
331+ "fix_invalid_pointers_from_file" : _fix_invalid_pointers_from_file ,
236332 })
0 commit comments