Skip to content

Commit 41dd84b

Browse files
committed
[NRL-1279] Add script that can redact live pointer samples
1 parent 9d37f6b commit 41dd84b

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

scripts/redact_live_pointers.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import json
2+
import os
3+
from datetime import datetime, timedelta, timezone
4+
from typing import Any
5+
6+
import boto3
7+
import fire
8+
9+
from nrlf.consumer.fhir.r4.model import DocumentReference
10+
from nrlf.core.constants import PointerTypes
11+
from nrlf.core.logger import logger
12+
from nrlf.core.utils import create_fhir_instant
13+
from nrlf.core.validators import DocumentReferenceValidator
14+
15+
dynamodb = boto3.client("dynamodb")
16+
paginator = dynamodb.get_paginator("scan")
17+
18+
logger.setLevel("ERROR")
19+
20+
type_to_name = {pointer_type.value: pointer_type.name for pointer_type in PointerTypes}
21+
22+
23+
def _redact_pointers(src_path: str, dest_path: str) -> None:
24+
"""
25+
Redact pointers in .json files in from the source path and write the redacted pointer to the destination path.
26+
Parameters:
27+
- src_path: The path to the source directory containing the pointers.
28+
- dest_path: The path to the destination directory to write the redacted pointers.
29+
"""
30+
src_pointer_files = [f for f in os.listdir(src_path) if f.endswith(".json")]
31+
32+
for src_pointer_file in src_pointer_files:
33+
print("Reading", src_pointer_file)
34+
with open(f"{src_path}/{src_pointer_file}", "r") as f:
35+
pointer_data = f.read()
36+
37+
docref = DocumentReference.model_validate_json(pointer_data)
38+
39+
ods_code = docref.custodian.identifier.value
40+
type_coding = docref.type.coding[0]
41+
pointer_type = type_to_name[f"{type_coding.system}|{type_coding.code}"]
42+
43+
mock_timestamp = create_fhir_instant()
44+
docref.meta.lastUpdated = mock_timestamp
45+
docref.date = mock_timestamp
46+
47+
mock_id = f"c2a99222-eb50-4451-ad6e-1e951627800e"
48+
docref.subject.identifier.value = "9999999999"
49+
docref.id = f"{ods_code}-{mock_id}"
50+
if docref.masterIdentifier:
51+
docref.masterIdentifier.value = f"mid_{mock_id}"
52+
if docref.relatesTo:
53+
for relates_to in docref.relatesTo:
54+
relates_to.target.identifier.value = f"rel_{mock_id}"
55+
56+
for content in docref.content:
57+
if content.attachment.url.startswith("ssp://"):
58+
content.attachment.url = "ssp://content.test.local/content"
59+
else:
60+
content.attachment.url = "https://content.test.local/content"
61+
content.attachment.creation = mock_timestamp
62+
if docref.context.related:
63+
for related in docref.context.related:
64+
related.identifier.value = "012345678910"
65+
if docref.context.period:
66+
if docref.context.period.start:
67+
docref.context.period.start = mock_timestamp
68+
if docref.context.period.end:
69+
docref.context.period.end = mock_timestamp
70+
71+
month_year = datetime.now().strftime("%b%y")
72+
filename = f"{dest_path}/{ods_code}_{pointer_type}_{month_year}.json"
73+
74+
print("Writing", filename)
75+
with open(filename, "w") as f:
76+
f.write(docref.model_dump_json(indent=2, exclude_unset=True))
77+
78+
79+
if __name__ == "__main__":
80+
fire.Fire(_redact_pointers)

0 commit comments

Comments
 (0)