[PRMT-229] Performance Testing for Full Stitching Process (#662)

PedroSoaresNHS · MohammadIqbalAD-NHS · web-flow · commit 6dccb2e5b6b0 · 2025-06-23T11:31:49.000+01:00
* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - Create workflows to run script

* [PRMT-229] - Rename workflows

* [PRMT-229] - Add pull request parameter to workflow

* [PRMT-229] - Temporarily set workflow sandbox to ndrc

* [PRMT-229] - removed logging

* [PRMT-229] - updated variable name to not clash

* [PRMT-229] - removed no longer necessary variable

* [PRMT-229] - updated variable type for patient number and file number

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] WIP

* [PRMT-229] WIP

* [PRMT-229] WIP

* [PRMT-229] WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - copied updated bulk upload to bulk upload to verify permissions

* [PRMT-229] - reverted

* [PRMT-229] - copied updated_setup_bulk_upload to setup_bulk_upload

* [PRMT-229] - copied setup_bulk_upload to updated_setup_bulk_upload to verify permissions

* [PRMT-229] - updated to continue the test

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - WIP

* [PRMT-229] - reverted code to use updated bulk upload code

* [PRMT-229] - updated num-patients flag

* [PRMT-229] - updated updated to reflect how it was before

* [PRMT-229] - updated logic to try and upload 1 file

* [PRMT-229] - added logging

* [PRMT-229] - added logging

* [PRMT-229] - trying to write default file

* [PRMT-229] - trying to upload several files

* [PRMT-229] - updated nhs number and file paths

* [PRMT-229] - updated how nhs number is created

* [PRMT-229] - updated how nhs number is created

* [PRMT-229] - removed combi-settings

* [PRMT-229] - small efficiency update

* [PRMT-229] - optimized create_test_file_keys

* [PRMT-229] - updated generate_nhs_number

* [PRMT-229] - updated generate_nhs_number

* [PRMT-229] - fixed error

* [PRMT-229] - fixed error

* [PRMT-229] - optimized generate_nhs_number

* [PRMT-229] - testing threads for upload_lg_files_to_staging

* [PRMT-229] - added metadata

* [PRMT-229] - fixed metadata output

* [PRMT-229] - allow file size customization

* [PRMT-229] - updated pypdf dependency

* [PRMT-229] - added new argument to updated_setup_bulk_upload.py

* [PRMT-229] - updated how arguments are collected

* [PRMT-229] - fixed typo

* [PRMT-229] - testing speed with adding blank pages to pdf

* [PRMT-229] - testing speed with empty bytes

* [PRMT-229] - testing different way of changing file size

* [PRMT-229] - removed duplication of upload

* [PRMT-229] - allow file size to be float

* [PRMT-229] - allow file size to be float

* [PRMT-229] - cleaned a bit the code

* [PRMT-70] - cleaned code

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - testing run bulk upload

* [PRMT-229] - stubbing pds

* [PRMT-229] - added new env variable to try and stub pds

* [PRMT-229] - added logging

* [PRMT-229] - added logging

* [PRMT-229] - updated mock logic

* [PRMT-229] - updated file name to make it pass validation, and changed validation strict mode

* [PRMT-229] - skipp validations if we want to always have good pds

* [PRMT-229] - skipp validations if we want to always have good pds

* [PRMT-229] - skipp validations if we want to always have good pds

* [PRMT-229] - optimization when mocking pds

* [PRMT-229] - removed unnecessary logging

* [PRMT-229] - Added tests

* [PRMT-229] - minor fix

* test using number

* test using number

* test using number

* test using number

* test using number

* test using number

* test using number

* test using number

* reverted number differences in github action

* deleted a method

* deleted a method

* removed unnused variables

* fixed variable initialization

* testing with all variables

* testing with only STAGING_BUCKET variable

* delete unused variables

* testing removing copy_to_s3 method

* copy_to_s3 method

* removed wrong access

* added STAGING_BUCKET

* [PRMT-229]-addressed ticket comments

* [PRMT-229]-addressed ticket comments

---------

Co-authored-by: Mohammad Iqbal &lt;mohammad.iqbal27@nhs.net&gt;
diff --git a/.github/workflows/updated-base-run-bulk-upload.yml b/.github/workflows/updated-base-run-bulk-upload.yml
@@ -0,0 +1,91 @@
+name: "Updated Base Bulk Upload - Execute a Bulk Upload"
+
+permissions:
+  pull-requests: write
+  id-token: write # This is required for requesting the JWT
+  contents: read # This is required for actions/checkout
+
+on:
+  workflow_call:
+    inputs:
+      sandbox:
+        description: "Which Sandbox to push to."
+        required: true
+        type: "string"
+        default: "ndr"
+      base_branch:
+        description: "Which Feature Branch for the Bulk Upload Script"
+        required: false
+        type: "string"
+        default: "main"
+      environment:
+        description: "Which Environment settings to use."
+        required: true
+        type: "string"
+        default: "development"
+      num_patients:
+        description: "How many patients to create (default = 1)"
+        required: false
+        type: "string"
+        default: "1"
+      file_count:
+        description: "How many files per patient to generate."
+        required: false
+        type: "string"
+        default: "1"
+      file_size:
+        description: "File size to generate."
+        required: false
+        type: "string"
+        default: "1"
+    secrets:
+      AWS_ASSUME_ROLE:
+        required: true
+
+jobs:
+  perform-bulk-upload:
+    runs-on: ubuntu-latest
+    environment: ${{ inputs.environment }}
+    steps:
+      - name: Configure AWS Credentials for ${{ vars.AWS_REGION }}
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }}
+          role-skip-session-tagging: true
+          aws-region: ${{ vars.AWS_REGION }}
+          mask-aws-account-id: true
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install boto3
+        working-directory: ./tests/bulk-upload/scripts
+
+      - name: Setup Bulk Upload
+        run: |
+          python updated_setup_bulk_upload.py \
+            --environment "${{ inputs.sandbox }}" \
+            --delete-table \
+            --download-data \
+            --build-files \
+            --num-patients "${{ inputs.num_patients }}" \
+            --upload \
+            --num-files "${{ inputs.file_count }}" \
+            --file-size "${{ inputs.file_size }}" \
+            --empty-lloydgeorge-store
+        working-directory: ./tests/bulk-upload/scripts
+
+      - name: Run Bulk Upload
+        run: |
+          python run_bulk_upload.py \
+            --environment "${{ inputs.sandbox }}" \
+            --start-bulk-upload
+        working-directory: ./tests/bulk-upload/scripts
diff --git a/.github/workflows/updated-run-bulk-upload-test.yml b/.github/workflows/updated-run-bulk-upload-test.yml
@@ -0,0 +1,42 @@
+name: Updated Run Bulk Upload - Test
+
+permissions:
+  pull-requests: write
+  id-token: write # This is required for requesting the JWT
+  contents: read # This is required for actions/checkout
+
+on:
+  workflow_dispatch:
+    inputs:
+      sandbox:
+        description: "Which Sandbox to push to."
+        required: true
+        type: "string"
+        default: "ndr"
+      num_patients:
+        description: "How many patients to create (default = 1)"
+        required: true
+        type: "string"
+        default: "1"
+      file_count:
+        description: "How many files per patient"
+        required: true
+        type: "string"
+        default: "1"
+      file_size:
+        description: "File size in MB"
+        required: true
+        type: "string"
+        default: "1"
+
+jobs:
+  bulk_upload:
+    uses: ./.github/workflows/updated-base-run-bulk-upload.yml
+    with:
+      environment: development
+      sandbox: "${{ inputs.sandbox }}"
+      num_patients: "${{ inputs.num_patients }}"
+      file_count: "${{ inputs.file_count }}"
+      file_size: "${{ inputs.file_size }}"
+    secrets:
+      AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}
diff --git a/lambdas/handlers/bulk_upload_handler.py b/lambdas/handlers/bulk_upload_handler.py
@@ -1,3 +1,5 @@
+import os
+
 from enums.feature_flags import FeatureFlags
 from services.bulk_upload_service import BulkUploadService
 from services.feature_flags_service import FeatureFlagService
@@ -25,6 +27,7 @@ def lambda_handler(event, _context):
     validation_strict_mode = validation_strict_mode_flag_object[
         FeatureFlags.LLOYD_GEORGE_VALIDATION_STRICT_MODE_ENABLED.value
     ]
+    pds_fhir_always_true = os.getenv("PDS_FHIR_ALWAYS_TRUE", "false").lower() == "true"
 
     if validation_strict_mode:
         logger.info("Lloyd George validation strict mode is enabled")
@@ -39,7 +42,9 @@ def lambda_handler(event, _context):
             status_code=http_status_code, body=response_body, methods="GET"
         ).create_api_gateway_response()
 
-    bulk_upload_service = BulkUploadService(strict_mode=validation_strict_mode)
+    bulk_upload_service = BulkUploadService(
+        strict_mode=validation_strict_mode, pds_fhir_always_true=pds_fhir_always_true
+    )
 
     try:
         bulk_upload_service.process_message_queue(event["Records"])
diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py
@@ -49,7 +49,7 @@
 
 
 class BulkUploadService:
-    def __init__(self, strict_mode):
+    def __init__(self, strict_mode, pds_fhir_always_true=False):
         self.dynamo_repository = BulkUploadDynamoRepository()
         self.sqs_repository = BulkUploadSqsRepository()
         self.s3_repository = BulkUploadS3Repository()
@@ -58,6 +58,7 @@ def __init__(self, strict_mode):
         self.unhandled_messages = []
         self.file_path_cache = {}
         self.pdf_stitching_queue_url = os.environ["PDF_STITCHING_SQS_URL"]
+        self.pds_fhir_always_true = pds_fhir_always_true
 
     def process_message_queue(self, records: list):
         for index, message in enumerate(records, start=1):
@@ -120,6 +121,7 @@ def handle_sqs_message(self, message: dict):
             raise InvalidMessageException(str(e))
 
         logger.info("SQS event is valid. Validating NHS number and file names")
+
         try:
             file_names = [
                 os.path.basename(metadata.file_path)
@@ -134,43 +136,44 @@ def handle_sqs_message(self, message: dict):
             patient_ods_code = (
                 pds_patient_details.get_ods_code_or_inactive_status_for_gp()
             )
-            if not self.strict_mode:
-                (
-                    name_validation_accepted_reason,
-                    is_name_validation_based_on_historic_name,
-                ) = validate_filename_with_patient_details_lenient(
-                    file_names, pds_patient_details
-                )
-                accepted_reason = self.concatenate_acceptance_reason(
-                    accepted_reason, name_validation_accepted_reason
-                )
-            else:
-                is_name_validation_based_on_historic_name = (
-                    validate_filename_with_patient_details_strict(
+            if not self.pds_fhir_always_true:
+                if not self.strict_mode:
+                    (
+                        name_validation_accepted_reason,
+                        is_name_validation_based_on_historic_name,
+                    ) = validate_filename_with_patient_details_lenient(
                         file_names, pds_patient_details
                     )
-                )
-            if is_name_validation_based_on_historic_name:
-                accepted_reason = self.concatenate_acceptance_reason(
-                    accepted_reason, "Patient matched on historical name"
-                )
+                    accepted_reason = self.concatenate_acceptance_reason(
+                        accepted_reason, name_validation_accepted_reason
+                    )
+                else:
+                    is_name_validation_based_on_historic_name = (
+                        validate_filename_with_patient_details_strict(
+                            file_names, pds_patient_details
+                        )
+                    )
+                if is_name_validation_based_on_historic_name:
+                    accepted_reason = self.concatenate_acceptance_reason(
+                        accepted_reason, "Patient matched on historical name"
+                    )
 
-            if not allowed_to_ingest_ods_code(patient_ods_code):
-                raise LGInvalidFilesException("Patient not registered at your practice")
-            patient_death_notification_status = (
-                pds_patient_details.get_death_notification_status()
-            )
-            if patient_death_notification_status:
-                deceased_accepted_reason = (
-                    f"Patient is deceased - {patient_death_notification_status.name}"
-                )
-                accepted_reason = self.concatenate_acceptance_reason(
-                    accepted_reason, deceased_accepted_reason
-                )
-            if patient_ods_code is PatientOdsInactiveStatus.RESTRICTED:
-                accepted_reason = self.concatenate_acceptance_reason(
-                    accepted_reason, "PDS record is restricted"
+                if not allowed_to_ingest_ods_code(patient_ods_code):
+                    raise LGInvalidFilesException(
+                        "Patient not registered at your practice"
+                    )
+                patient_death_notification_status = (
+                    pds_patient_details.get_death_notification_status()
                 )
+                if patient_death_notification_status:
+                    deceased_accepted_reason = f"Patient is deceased - {patient_death_notification_status.name}"
+                    accepted_reason = self.concatenate_acceptance_reason(
+                        accepted_reason, deceased_accepted_reason
+                    )
+                if patient_ods_code is PatientOdsInactiveStatus.RESTRICTED:
+                    accepted_reason = self.concatenate_acceptance_reason(
+                        accepted_reason, "PDS record is restricted"
+                    )
 
         except (
             InvalidNhsNumberException,
diff --git a/lambdas/services/mock_pds_service.py b/lambdas/services/mock_pds_service.py
@@ -9,7 +9,8 @@
 
 
 class MockPdsApiService(PatientSearch):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, always_pass_mock: bool = False, *args, **kwargs):
+        self.always_pass_mock = always_pass_mock
         pass
 
     def pds_request(self, nhs_number: str, *args, **kwargs) -> Response:
@@ -33,21 +34,24 @@ def pds_request(self, nhs_number: str, *args, **kwargs) -> Response:
             raise PdsErrorException("Error when requesting patient from PDS")
 
         pds_patient: dict = {}
-
-        for result in mock_pds_results:
-            mock_patient_nhs_number = result.get("id")
-            if mock_patient_nhs_number == nhs_number:
-                pds_patient = result
-                break
-
         response = Response()
+        if self.always_pass_mock:
+            pds_patient_index = 3
+            pds_patient = mock_pds_results[pds_patient_index]
+            pds_patient["id"] = nhs_number
+            pds_patient["identifier"][0]["value"] = nhs_number
+        else:
+            for result in mock_pds_results:
+                mock_patient_nhs_number = result.get("id")
+                if mock_patient_nhs_number == nhs_number:
+                    pds_patient = result
+                    break
 
         if bool(pds_patient):
             response.status_code = 200
             response._content = json.dumps(pds_patient).encode("utf-8")
         else:
             response.status_code = 404
-
         return response
 
     def too_many_requests_response(self) -> Response:
diff --git a/lambdas/utils/utilities.py b/lambdas/utils/utilities.py
@@ -71,7 +71,9 @@ def get_pds_service() -> PatientSearch:
         auth_service = NhsOauthService(ssm_service)
         return PdsApiService(ssm_service, auth_service)
     else:
-        return MockPdsApiService()
+        return MockPdsApiService(
+            always_pass_mock=os.getenv("PDS_FHIR_ALWAYS_TRUE") in ["True", "true"]
+        )
 
 
 def redact_id_to_last_4_chars(str_id: str) -> str:
diff --git a/tests/bulk-upload/scripts/run_bulk_upload.py b/tests/bulk-upload/scripts/run_bulk_upload.py
@@ -1,6 +1,8 @@
-import boto3
-import json
 import argparse
+import json
+import sys
+
+import boto3
 
 
 def invoke_lambda(lambda_name, payload={}):
@@ -53,10 +55,17 @@ def update_lambda_environment_variables(lambda_name, new_variables):
 
     bulk_upload_lambda_name = f"{args.environment}_BulkUploadLambda"
     search_lambda_name = f"{args.environment}_SearchPatientDetailsLambda"
-    if args.disable_pds_stub or input("Would you like to disable the FHIR Stub: "):
+    if args.disable_pds_stub or (
+        sys.stdin.isatty()
+        and input("Would you like to disable the FHIR Stub: ").lower() == "y"
+    ):
         new_variables = {"PDS_FHIR_IS_STUBBED": "false"}
         update_lambda_environment_variables(bulk_upload_lambda_name, new_variables)
         update_lambda_environment_variables(search_lambda_name, new_variables)
+    else:
+        new_variables = {"PDS_FHIR_IS_STUBBED": "true", "PDS_FHIR_ALWAYS_TRUE": "true"}
+        update_lambda_environment_variables(bulk_upload_lambda_name, new_variables)
+        update_lambda_environment_variables(search_lambda_name, new_variables)
     if args.start_bulk_upload or input(
         "Would you like to start the Bulk Upload Process:"
     ):
diff --git a/tests/bulk-upload/scripts/test_updated_setup_bulk_upload.py b/tests/bulk-upload/scripts/test_updated_setup_bulk_upload.py
diff --git a/tests/bulk-upload/scripts/updated_setup_bulk_upload.py b/tests/bulk-upload/scripts/updated_setup_bulk_upload.py