NHSDigital
diff --git a/‎postman_collection.json‎
Lines changed: 9 additions & 9 deletions b/‎postman_collection.json‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎resources/fhir/NRLF-Retrieval-CodeSystem.json‎
Lines changed: 63 additions & 0 deletions b/‎resources/fhir/NRLF-Retrieval-CodeSystem.json‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎resources/fhir/NRLF-RetrievalMechanism-ValueSet.json‎
Lines changed: 41 additions & 0 deletions b/‎resources/fhir/NRLF-RetrievalMechanism-ValueSet.json‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎scripts/put_pointers_from_files.py‎
Lines changed: 53 additions & 0 deletions b/‎scripts/put_pointers_from_files.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎scripts/set_pointer_supersede.py‎
Lines changed: 107 additions & 0 deletions b/‎scripts/set_pointer_supersede.py‎
Lines changed: 107 additions & 0 deletions
diff --git a/‎terraform/account-wide-infrastructure/modules/glue/LogSchemaGeneration/LogSchemaGeneration.ipynb‎
Lines changed: 357 additions & 0 deletions b/‎terraform/account-wide-infrastructure/modules/glue/LogSchemaGeneration/LogSchemaGeneration.ipynb‎
Lines changed: 357 additions & 0 deletions
diff --git a/‎terraform/account-wide-infrastructure/modules/glue/LogSchemaGeneration/README.md‎
Lines changed: 5 additions & 0 deletions b/‎terraform/account-wide-infrastructure/modules/glue/LogSchemaGeneration/README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎terraform/account-wide-infrastructure/modules/glue/glue.tf‎
Lines changed: 36 additions & 6 deletions b/‎terraform/account-wide-infrastructure/modules/glue/glue.tf‎
Lines changed: 36 additions & 6 deletions
diff --git a/‎terraform/account-wide-infrastructure/modules/glue/iam.tf‎
Lines changed: 10 additions & 0 deletions b/‎terraform/account-wide-infrastructure/modules/glue/iam.tf‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎terraform/account-wide-infrastructure/modules/glue/locals.tf‎
Lines changed: 18 additions & 0 deletions b/‎terraform/account-wide-infrastructure/modules/glue/locals.tf‎
Lines changed: 18 additions & 0 deletions
@@ -0,0 +1,63 @@
+{
+  "resourceType": "CodeSystem",
+  "id": "England-RetrievalMechanismNRL",
+  "url": "https://fhir.nhs.uk/England/CodeSystem/England-RetrievalMechanismNRL",
+  "version": "1.0.0",
+  "name": "EnglandRetrievalMechanismNRL",
+  "title": "England Retrieval MechanismNRL",
+  "status": "draft",
+  "experimental": false,
+  "date": "2025-02-28",
+  "publisher": "NHS England",
+  "contact": [
+    {
+      "name": "NHS England",
+      "telecom": [
+        {
+          "system": "email",
+          "value": "[email protected]",
+          "use": "work",
+          "rank": 1
+        }
+      ]
+    },
+    {
+      "name": "NRL Team at NHS Digital",
+      "telecom": [
+        {
+          "system": "email",
+          "value": "[email protected]",
+          "use": "work"
+        }
+      ]
+    }
+  ],
+  "description": "A CodeSystem to identify the means by which an NRL DocumentReference can be retrieved via its content.attachment url.",
+  "copyright": "Copyright © 2025+ NHS England Licensed under the Apache License, Version 2.0 (the \\\"License\\\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at  http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \\\"AS IS\\\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. HL7® FHIR® standard Copyright © 2011+ HL7 The HL7® FHIR® standard is used under the FHIR license. You may obtain a copy of the FHIR license at  https://www.hl7.org/fhir/license.html.",
+  "content": "complete",
+  "hierarchyMeaning": "is-a",
+  "concept": [
+    {
+      "code": "Direct",
+      "display": "Direct",
+      "definition": "This document can be directly retrieved via HTTP(s) at its public URL."
+    },
+    {
+      "code": "Proxy",
+      "display": "Proxy",
+      "definition": "This document must be retrieved via a proxy.",
+      "concept": [
+        {
+          "code": "SSP",
+          "display": "Spine Secure Proxy",
+          "definition": "This document can be retrieved via Spine Secure Proxy by authorised organisations. The custodian's ASID will be needed and can be found in the context.related field."
+        },
+        {
+          "code": "NDR",
+          "display": "National Document Repository",
+          "definition": "This document can be retrieved via the National Document Repository proxy service."
+        }
+      ]
+    }
+  ]
+}
@@ -0,0 +1,41 @@
+{
+  "resourceType": "ValueSet",
+  "id": "England-RetrievalMechanism",
+  "url": "https://fhir.nhs.uk/England/ValueSet/England-RetrievalMechanism",
+  "version": "1.0.0",
+  "name": "EnglandRetrievalMechanism",
+  "status": "draft",
+  "date": "2025-02-28",
+  "publisher": "NHS Digital",
+  "contact": {
+    "name": "NRL Team at NHS Digital",
+    "telecom": {
+      "system": "email",
+      "value": "[email protected]",
+      "use": "work"
+    }
+  },
+  "description": "A code to represent the means by which the document being referenced can be accessed (via the url in the content.attachment).",
+  "copyright": "Copyright 2025 NHS Digital.",
+  "compose": {
+    "include": [
+      {
+        "system": "https://fhir.nhs.uk/England/CodeSystem/England-RetrievalMechanismNRL",
+        "concept": [
+          {
+            "code": "Direct",
+            "display": "Direct"
+          },
+          {
+            "code": "SSP",
+            "display": "Spine Secure Proxy"
+          },
+          {
+            "code": "NDR",
+            "display": "National Document Repository"
+          }
+        ]
+      }
+    ]
+  }
+}
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# Put pointers from the provided files into the pointers table
+# This will overwrite the pointer if it already exists in the table
+import json
+import os
+
+import fire
+from aws_session_assume import get_boto_session
+
+from nrlf.core.dynamodb.model import DocumentPointer
+from nrlf.core.logger import logger
+from nrlf.producer.fhir.r4.model import DocumentReference
+
+logger.setLevel("ERROR")
+
+SKIP_PROD_WARNING = os.getenv("SKIP_PROD_WARNING", "false")
+
+
+def _put_pointers_from_files(
+    *filenames, env: str = "dev", table_name: str | None = None
+):
+    if env == "prod" and SKIP_PROD_WARNING != "true":
+        confirmation = input(
+            "\nWARNING - This command will modify the PROD environment. Continue? [y/n] "
+        )
+        if confirmation != "y":
+            return "Exiting at user request"
+
+    docrefs: list[DocumentReference] = []
+    print("Reading docrefs from files...")
+    for filename in filenames:
+        with open(filename) as f:
+            docref_json = json.load(f)
+            docref = DocumentReference.model_validate(docref_json)
+            docrefs.append(docref)
+
+    session = get_boto_session(env)
+    dynamodb = session.resource("dynamodb")
+    if not table_name:
+        table_name = f"nhsd-nrlf--{env}-pointers-table"
+    table = dynamodb.Table(table_name)
+
+    for docref in docrefs:
+        try:
+            print(f"Putting {docref.id}....")
+            pointer = DocumentPointer.from_document_reference(docref)
+            table.put_item(Item=pointer.model_dump())
+        except Exception as e:
+            print(f"Unable to put pointer for {docref.id}. Error: {e}")
+
+
+if __name__ == "__main__":
+    fire.Fire(_put_pointers_from_files)
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# Set supersede info on a pointer
+import json
+import os
+
+import aws_session_assume
+import fire
+
+from nrlf.core.dynamodb.model import DocumentPointer
+from nrlf.core.logger import logger
+from nrlf.producer.fhir.r4.model import (
+    DocumentReference,
+    DocumentReferenceRelatesTo,
+    Identifier,
+    Reference,
+)
+
+logger.setLevel("ERROR")
+
+SKIP_PROD_WARNING = os.getenv("SKIP_PROD_WARNING", "false")
+
+
+def _set_pointer_supersede_info(
+    pointer_id: str,
+    supersede_pointer_id: str,
+    delete_superseded: bool = False,
+    env: str = "dev",
+    table_name: str | None = None,
+):
+    if env == "prod" and SKIP_PROD_WARNING != "true":
+        confirmation = input(
+            "\nWARNING - This command will modify the PROD environment. Continue? [y/n] "
+        )
+        if confirmation != "y":
+            return "Exiting at user request"
+
+    session = aws_session_assume.get_boto_session(env)
+    dynamodb = session.resource("dynamodb")
+
+    if not table_name:
+        table_name = f"nhsd-nrlf--{env}-pointers-table"
+    table = dynamodb.Table(table_name)
+
+    print(
+        f"Setting pointer {pointer_id} in {table_name} to supersede {supersede_pointer_id}...."
+    )
+
+    try:
+        doc_key = f"D#{pointer_id}"
+        print(f"Getting {pointer_id}...")
+        result = table.get_item(
+            Key={"pk": doc_key, "sk": doc_key},
+        )
+    except Exception as e:
+        print(f"Unable to get pointer. Error: {e}")
+        return
+
+    if "Item" not in result:
+        print(f"Unable to set superseded info. Pointer {pointer_id} not found.")
+        return
+
+    item = result["Item"]
+
+    try:
+        pointer = DocumentPointer.model_validate({"_from_dynamo": True, **item})
+        doc_ref = DocumentReference.model_validate_json(pointer.document)
+    except Exception as e:
+        print(f"Could not validate pointer from table. Error: {e}")
+        return
+
+    if not doc_ref.relatesTo:
+        doc_ref.relatesTo = []
+
+    existing_supersedes = [
+        relates_to for relates_to in doc_ref.relatesTo if relates_to.code == "replaces"
+    ]
+    if existing_supersedes:
+        print(
+            f"Unable to add supersede info as pointer is already superseding a pointer: {existing_supersedes}"
+        )
+        return
+
+    doc_ref.relatesTo.append(
+        DocumentReferenceRelatesTo(
+            code="replaces",
+            target=Reference(
+                type="DocumentReference",
+                identifier=Identifier(value=supersede_pointer_id),
+            ),
+        )
+    )
+
+    print(f"Adding supersede info to {pointer_id}...")
+    updated_pointer = DocumentPointer.from_document_reference(doc_ref)
+    table.put_item(
+        Item=updated_pointer.dict(exclude_none=True, exclude={"_from_dynamo"})
+    )
+
+    if delete_superseded:
+        print(f"Deleting superseded {supersede_pointer_id}...")
+        table.delete_item(
+            Key={"pk": f"D#{supersede_pointer_id}", "sk": f"D#{supersede_pointer_id}"}
+        )
+
+
+if __name__ == "__main__":
+    fire.Fire(_set_pointer_supersede_info)
@@ -0,0 +1,5 @@
+# Log Schema Generation
+
+The Glue script uses pyspark to process log data. Due to the structure of each json document inside of a log group differing, we need to account for this variance.
+
+The notebook provides a way to automatically generate a pyspark schema for a log group without manual intervention. Point it at the desired group, and hit run all, then copy and paste the output into either producer_schema.py or consumer_schema.py.
@@ -1,7 +1,7 @@
 # Create Glue Data Catalog Database
 resource "aws_glue_catalog_database" "log_database" {
   name         = "${var.name_prefix}-reporting"
-  location_uri = "${aws_s3_bucket.target-data-bucket.id}/logs/"
+  location_uri = "${aws_s3_bucket.target-data-bucket.id}/"
 }
 
 # Create Glue Crawler
@@ -10,7 +10,37 @@ resource "aws_glue_crawler" "log_crawler" {
   database_name = aws_glue_catalog_database.log_database.name
   role          = aws_iam_role.glue_service_role.name
   s3_target {
-    path = "${aws_s3_bucket.target-data-bucket.id}/logs/"
+    path = "${aws_s3_bucket.target-data-bucket.id}/consumer_countDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/consumer_readDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/consumer_searchDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/consumer_searchPostDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_createDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_deleteDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_readDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_searchDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_searchPostDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_updateDocumentReference/"
+  }
+  s3_target {
+    path = "${aws_s3_bucket.target-data-bucket.id}/producer_upsertDocumentReference/"
   }
   schema_change_policy {
     delete_behavior = "LOG"
@@ -34,10 +64,10 @@ resource "aws_glue_job" "glue_job" {
   name              = "${var.name_prefix}-glue-job"
   role_arn          = aws_iam_role.glue_service_role.arn
   description       = "Transfer logs from source to bucket"
-  glue_version      = "4.0"
+  glue_version      = "5.0"
   worker_type       = "G.1X"
   timeout           = 2880
-  max_retries       = 1
+  max_retries       = 0
   number_of_workers = 2
   command {
     name            = "glueetl"
@@ -49,8 +79,8 @@ resource "aws_glue_job" "glue_job" {
     "--enable-auto-scaling"             = "true"
     "--enable-continous-cloudwatch-log" = "true"
     "--datalake-formats"                = "delta"
-    "--source_path"                     = "s3://${aws_s3_bucket.source-data-bucket.id}/"     # Specify the source S3 path
-    "--target_path"                     = "s3://${aws_s3_bucket.target-data-bucket.id}/logs" # Specify the destination S3 path
+    "--source_path"                     = "s3://${aws_s3_bucket.source-data-bucket.id}/" # Specify the source S3 path
+    "--target_path"                     = "s3://${aws_s3_bucket.target-data-bucket.id}/" # Specify the destination S3 path
     "--job_name"                        = "${var.name_prefix}-glue-job"
     "--partition_cols"                  = "date"
     "--enable-continuous-log-filter"    = "true"
 
@@ -80,6 +80,16 @@ data "aws_iam_policy_document" "glue_service" {
 
     effect = "Allow"
   }
+
+  statement {
+    actions = [
+      "iam:PassRole",
+    ]
+    effect = "Allow"
+    resources = [
+      "*"
+    ]
+  }
 }
 
 resource "aws_iam_policy" "glue_service" {
 
@@ -0,0 +1,18 @@
+locals {
+  s3 = {
+    transition_storage = {
+      infrequent_access = {
+        storage_class = "STANDARD_IA"
+        days          = 150
+      }
+      glacier = {
+        storage_class = "GLACIER"
+        days          = 200
+      }
+    }
+
+    expiration = {
+      days = 1095
+    }
+  }
+}