Skip to content

Commit 55cf373

Browse files
committed
NRL-1342 Merge branch 'develop' into feature/bars-prototype
2 parents 5d02430 + 01784e4 commit 55cf373

File tree

20 files changed

+5325
-337
lines changed

20 files changed

+5325
-337
lines changed

postman_collection.json

Lines changed: 9 additions & 9 deletions
Large diffs are not rendered by default.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
{
2+
"resourceType": "CodeSystem",
3+
"id": "England-RetrievalMechanismNRL",
4+
"url": "https://fhir.nhs.uk/England/CodeSystem/England-RetrievalMechanismNRL",
5+
"version": "1.0.0",
6+
"name": "EnglandRetrievalMechanismNRL",
7+
"title": "England Retrieval MechanismNRL",
8+
"status": "draft",
9+
"experimental": false,
10+
"date": "2025-02-28",
11+
"publisher": "NHS England",
12+
"contact": [
13+
{
14+
"name": "NHS England",
15+
"telecom": [
16+
{
17+
"system": "email",
18+
"value": "[email protected]",
19+
"use": "work",
20+
"rank": 1
21+
}
22+
]
23+
},
24+
{
25+
"name": "NRL Team at NHS Digital",
26+
"telecom": [
27+
{
28+
"system": "email",
29+
"value": "[email protected]",
30+
"use": "work"
31+
}
32+
]
33+
}
34+
],
35+
"description": "A CodeSystem to identify the means by which an NRL DocumentReference can be retrieved via its content.attachment url.",
36+
"copyright": "Copyright © 2025+ NHS England Licensed under the Apache License, Version 2.0 (the \\\"License\\\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \\\"AS IS\\\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. HL7® FHIR® standard Copyright © 2011+ HL7 The HL7® FHIR® standard is used under the FHIR license. You may obtain a copy of the FHIR license at https://www.hl7.org/fhir/license.html.",
37+
"content": "complete",
38+
"hierarchyMeaning": "is-a",
39+
"concept": [
40+
{
41+
"code": "Direct",
42+
"display": "Direct",
43+
"definition": "This document can be directly retrieved via HTTP(s) at its public URL."
44+
},
45+
{
46+
"code": "Proxy",
47+
"display": "Proxy",
48+
"definition": "This document must be retrieved via a proxy.",
49+
"concept": [
50+
{
51+
"code": "SSP",
52+
"display": "Spine Secure Proxy",
53+
"definition": "This document can be retrieved via Spine Secure Proxy by authorised organisations. The custodian's ASID will be needed and can be found in the context.related field."
54+
},
55+
{
56+
"code": "NDR",
57+
"display": "National Document Repository",
58+
"definition": "This document can be retrieved via the National Document Repository proxy service."
59+
}
60+
]
61+
}
62+
]
63+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"resourceType": "ValueSet",
3+
"id": "England-RetrievalMechanism",
4+
"url": "https://fhir.nhs.uk/England/ValueSet/England-RetrievalMechanism",
5+
"version": "1.0.0",
6+
"name": "EnglandRetrievalMechanism",
7+
"status": "draft",
8+
"date": "2025-02-28",
9+
"publisher": "NHS Digital",
10+
"contact": {
11+
"name": "NRL Team at NHS Digital",
12+
"telecom": {
13+
"system": "email",
14+
"value": "[email protected]",
15+
"use": "work"
16+
}
17+
},
18+
"description": "A code to represent the means by which the document being referenced can be accessed (via the url in the content.attachment).",
19+
"copyright": "Copyright 2025 NHS Digital.",
20+
"compose": {
21+
"include": [
22+
{
23+
"system": "https://fhir.nhs.uk/England/CodeSystem/England-RetrievalMechanismNRL",
24+
"concept": [
25+
{
26+
"code": "Direct",
27+
"display": "Direct"
28+
},
29+
{
30+
"code": "SSP",
31+
"display": "Spine Secure Proxy"
32+
},
33+
{
34+
"code": "NDR",
35+
"display": "National Document Repository"
36+
}
37+
]
38+
}
39+
]
40+
}
41+
}

scripts/put_pointers_from_files.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python
2+
# Put pointers from the provided files into the pointers table
3+
# This will overwrite the pointer if it already exists in the table
4+
import json
5+
import os
6+
7+
import fire
8+
from aws_session_assume import get_boto_session
9+
10+
from nrlf.core.dynamodb.model import DocumentPointer
11+
from nrlf.core.logger import logger
12+
from nrlf.producer.fhir.r4.model import DocumentReference
13+
14+
logger.setLevel("ERROR")
15+
16+
SKIP_PROD_WARNING = os.getenv("SKIP_PROD_WARNING", "false")
17+
18+
19+
def _put_pointers_from_files(
20+
*filenames, env: str = "dev", table_name: str | None = None
21+
):
22+
if env == "prod" and SKIP_PROD_WARNING != "true":
23+
confirmation = input(
24+
"\nWARNING - This command will modify the PROD environment. Continue? [y/n] "
25+
)
26+
if confirmation != "y":
27+
return "Exiting at user request"
28+
29+
docrefs: list[DocumentReference] = []
30+
print("Reading docrefs from files...")
31+
for filename in filenames:
32+
with open(filename) as f:
33+
docref_json = json.load(f)
34+
docref = DocumentReference.model_validate(docref_json)
35+
docrefs.append(docref)
36+
37+
session = get_boto_session(env)
38+
dynamodb = session.resource("dynamodb")
39+
if not table_name:
40+
table_name = f"nhsd-nrlf--{env}-pointers-table"
41+
table = dynamodb.Table(table_name)
42+
43+
for docref in docrefs:
44+
try:
45+
print(f"Putting {docref.id}....")
46+
pointer = DocumentPointer.from_document_reference(docref)
47+
table.put_item(Item=pointer.model_dump())
48+
except Exception as e:
49+
print(f"Unable to put pointer for {docref.id}. Error: {e}")
50+
51+
52+
if __name__ == "__main__":
53+
fire.Fire(_put_pointers_from_files)

scripts/set_pointer_supersede.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/env python
2+
# Set supersede info on a pointer
3+
import json
4+
import os
5+
6+
import aws_session_assume
7+
import fire
8+
9+
from nrlf.core.dynamodb.model import DocumentPointer
10+
from nrlf.core.logger import logger
11+
from nrlf.producer.fhir.r4.model import (
12+
DocumentReference,
13+
DocumentReferenceRelatesTo,
14+
Identifier,
15+
Reference,
16+
)
17+
18+
logger.setLevel("ERROR")
19+
20+
SKIP_PROD_WARNING = os.getenv("SKIP_PROD_WARNING", "false")
21+
22+
23+
def _set_pointer_supersede_info(
24+
pointer_id: str,
25+
supersede_pointer_id: str,
26+
delete_superseded: bool = False,
27+
env: str = "dev",
28+
table_name: str | None = None,
29+
):
30+
if env == "prod" and SKIP_PROD_WARNING != "true":
31+
confirmation = input(
32+
"\nWARNING - This command will modify the PROD environment. Continue? [y/n] "
33+
)
34+
if confirmation != "y":
35+
return "Exiting at user request"
36+
37+
session = aws_session_assume.get_boto_session(env)
38+
dynamodb = session.resource("dynamodb")
39+
40+
if not table_name:
41+
table_name = f"nhsd-nrlf--{env}-pointers-table"
42+
table = dynamodb.Table(table_name)
43+
44+
print(
45+
f"Setting pointer {pointer_id} in {table_name} to supersede {supersede_pointer_id}...."
46+
)
47+
48+
try:
49+
doc_key = f"D#{pointer_id}"
50+
print(f"Getting {pointer_id}...")
51+
result = table.get_item(
52+
Key={"pk": doc_key, "sk": doc_key},
53+
)
54+
except Exception as e:
55+
print(f"Unable to get pointer. Error: {e}")
56+
return
57+
58+
if "Item" not in result:
59+
print(f"Unable to set superseded info. Pointer {pointer_id} not found.")
60+
return
61+
62+
item = result["Item"]
63+
64+
try:
65+
pointer = DocumentPointer.model_validate({"_from_dynamo": True, **item})
66+
doc_ref = DocumentReference.model_validate_json(pointer.document)
67+
except Exception as e:
68+
print(f"Could not validate pointer from table. Error: {e}")
69+
return
70+
71+
if not doc_ref.relatesTo:
72+
doc_ref.relatesTo = []
73+
74+
existing_supersedes = [
75+
relates_to for relates_to in doc_ref.relatesTo if relates_to.code == "replaces"
76+
]
77+
if existing_supersedes:
78+
print(
79+
f"Unable to add supersede info as pointer is already superseding a pointer: {existing_supersedes}"
80+
)
81+
return
82+
83+
doc_ref.relatesTo.append(
84+
DocumentReferenceRelatesTo(
85+
code="replaces",
86+
target=Reference(
87+
type="DocumentReference",
88+
identifier=Identifier(value=supersede_pointer_id),
89+
),
90+
)
91+
)
92+
93+
print(f"Adding supersede info to {pointer_id}...")
94+
updated_pointer = DocumentPointer.from_document_reference(doc_ref)
95+
table.put_item(
96+
Item=updated_pointer.dict(exclude_none=True, exclude={"_from_dynamo"})
97+
)
98+
99+
if delete_superseded:
100+
print(f"Deleting superseded {supersede_pointer_id}...")
101+
table.delete_item(
102+
Key={"pk": f"D#{supersede_pointer_id}", "sk": f"D#{supersede_pointer_id}"}
103+
)
104+
105+
106+
if __name__ == "__main__":
107+
fire.Fire(_set_pointer_supersede_info)

terraform/account-wide-infrastructure/modules/glue/LogSchemaGeneration/LogSchemaGeneration.ipynb

Lines changed: 357 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Log Schema Generation
2+
3+
The Glue script uses pyspark to process log data. Due to the structure of each json document inside of a log group differing, we need to account for this variance.
4+
5+
The notebook provides a way to automatically generate a pyspark schema for a log group without manual intervention. Point it at the desired group, and hit run all, then copy and paste the output into either producer_schema.py or consumer_schema.py.

terraform/account-wide-infrastructure/modules/glue/glue.tf

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Create Glue Data Catalog Database
22
resource "aws_glue_catalog_database" "log_database" {
33
name = "${var.name_prefix}-reporting"
4-
location_uri = "${aws_s3_bucket.target-data-bucket.id}/logs/"
4+
location_uri = "${aws_s3_bucket.target-data-bucket.id}/"
55
}
66

77
# Create Glue Crawler
@@ -10,7 +10,37 @@ resource "aws_glue_crawler" "log_crawler" {
1010
database_name = aws_glue_catalog_database.log_database.name
1111
role = aws_iam_role.glue_service_role.name
1212
s3_target {
13-
path = "${aws_s3_bucket.target-data-bucket.id}/logs/"
13+
path = "${aws_s3_bucket.target-data-bucket.id}/consumer_countDocumentReference/"
14+
}
15+
s3_target {
16+
path = "${aws_s3_bucket.target-data-bucket.id}/consumer_readDocumentReference/"
17+
}
18+
s3_target {
19+
path = "${aws_s3_bucket.target-data-bucket.id}/consumer_searchDocumentReference/"
20+
}
21+
s3_target {
22+
path = "${aws_s3_bucket.target-data-bucket.id}/consumer_searchPostDocumentReference/"
23+
}
24+
s3_target {
25+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_createDocumentReference/"
26+
}
27+
s3_target {
28+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_deleteDocumentReference/"
29+
}
30+
s3_target {
31+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_readDocumentReference/"
32+
}
33+
s3_target {
34+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_searchDocumentReference/"
35+
}
36+
s3_target {
37+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_searchPostDocumentReference/"
38+
}
39+
s3_target {
40+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_updateDocumentReference/"
41+
}
42+
s3_target {
43+
path = "${aws_s3_bucket.target-data-bucket.id}/producer_upsertDocumentReference/"
1444
}
1545
schema_change_policy {
1646
delete_behavior = "LOG"
@@ -34,10 +64,10 @@ resource "aws_glue_job" "glue_job" {
3464
name = "${var.name_prefix}-glue-job"
3565
role_arn = aws_iam_role.glue_service_role.arn
3666
description = "Transfer logs from source to bucket"
37-
glue_version = "4.0"
67+
glue_version = "5.0"
3868
worker_type = "G.1X"
3969
timeout = 2880
40-
max_retries = 1
70+
max_retries = 0
4171
number_of_workers = 2
4272
command {
4373
name = "glueetl"
@@ -49,8 +79,8 @@ resource "aws_glue_job" "glue_job" {
4979
"--enable-auto-scaling" = "true"
5080
"--enable-continous-cloudwatch-log" = "true"
5181
"--datalake-formats" = "delta"
52-
"--source_path" = "s3://${aws_s3_bucket.source-data-bucket.id}/" # Specify the source S3 path
53-
"--target_path" = "s3://${aws_s3_bucket.target-data-bucket.id}/logs" # Specify the destination S3 path
82+
"--source_path" = "s3://${aws_s3_bucket.source-data-bucket.id}/" # Specify the source S3 path
83+
"--target_path" = "s3://${aws_s3_bucket.target-data-bucket.id}/" # Specify the destination S3 path
5484
"--job_name" = "${var.name_prefix}-glue-job"
5585
"--partition_cols" = "date"
5686
"--enable-continuous-log-filter" = "true"

terraform/account-wide-infrastructure/modules/glue/iam.tf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,16 @@ data "aws_iam_policy_document" "glue_service" {
8080

8181
effect = "Allow"
8282
}
83+
84+
statement {
85+
actions = [
86+
"iam:PassRole",
87+
]
88+
effect = "Allow"
89+
resources = [
90+
"*"
91+
]
92+
}
8393
}
8494

8595
resource "aws_iam_policy" "glue_service" {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
locals {
2+
s3 = {
3+
transition_storage = {
4+
infrequent_access = {
5+
storage_class = "STANDARD_IA"
6+
days = 150
7+
}
8+
glacier = {
9+
storage_class = "GLACIER"
10+
days = 200
11+
}
12+
}
13+
14+
expiration = {
15+
days = 1095
16+
}
17+
}
18+
}

0 commit comments

Comments
 (0)