NHSDigital
diff --git a/‎app/docker-compose.yml‎
Lines changed: 1 addition & 1 deletion b/‎app/docker-compose.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/src/helpers/requests/getLloydGeorgeRecord.ts‎
Lines changed: 4 additions & 3 deletions b/‎app/src/helpers/requests/getLloydGeorgeRecord.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎lambdas/requirements/layers/requirements_core_lambda_layer.txt‎
Lines changed: 1 addition & 0 deletions b/‎lambdas/requirements/layers/requirements_core_lambda_layer.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lambdas/services/base/s3_service.py‎
Lines changed: 27 additions & 15 deletions b/‎lambdas/services/base/s3_service.py‎
Lines changed: 27 additions & 15 deletions
diff --git a/‎lambdas/services/get_fhir_document_reference_service.py‎
Lines changed: 4 additions & 3 deletions b/‎lambdas/services/get_fhir_document_reference_service.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎lambdas/services/lloyd_george_generate_stitch_service.py‎
Lines changed: 96 additions & 73 deletions b/‎lambdas/services/lloyd_george_generate_stitch_service.py‎
Lines changed: 96 additions & 73 deletions
@@ -11,4 +11,4 @@ services:
     ports:
       - "${HOST_PORT}:${CONTAINER_PORT}"
     env_file:
-      - .env
+      - .env
@@ -6,7 +6,7 @@ import { JOB_STATUS } from '../../types/generic/downloadManifestJobStatus';
 import { isRunningInCypress } from '../utils/isLocal';
 import { StitchRecordError } from '../../types/generic/errors';
 
-export const DELAY_BETWEEN_POLLING_IN_SECONDS = isRunningInCypress() ? 0 : 10;
+export const DELAY_BETWEEN_POLLING_IN_SECONDS = isRunningInCypress() ? 0 : 3;
 
 type Args = {
     nhsNumber: string;
@@ -29,10 +29,11 @@ const UnexpectedResponseMessage =
 async function getLloydGeorgeRecord(args: Args): Promise<LloydGeorgeStitchResult> {
     const postResponse = await requestStitchJob(args);
     let pendingCount = 0;
-    while (pendingCount < 3) {
-        if (postResponse !== JOB_STATUS.COMPLETED) {
+    while (pendingCount < 10) {
+        if (postResponse !== JOB_STATUS.COMPLETED || pendingCount > 0) {
             await waitForSeconds(DELAY_BETWEEN_POLLING_IN_SECONDS);
         }
+
         const pollingResponse = await pollForPresignedUrl(args);
 
         switch (pollingResponse?.jobStatus) {
 
@@ -17,3 +17,4 @@ responses==0.23.1
 six==1.16.0
 types-PyYAML==6.0.12.11
 regex==2023.12.25
+pikepdf==8.4.0
@@ -1,3 +1,4 @@
+import io
 from datetime import datetime, timedelta, timezone
 from io import BytesIO
 from typing import Any, Mapping
@@ -79,14 +80,6 @@ def create_download_presigned_url(self, s3_bucket_name: str, file_key: str):
     def download_file(self, s3_bucket_name: str, file_key: str, download_path: str):
         return self.client.download_file(s3_bucket_name, file_key, download_path)
 
-    def get_binary_file(self, s3_bucket_name: str, file_key: str):
-        response = self.client.get_object(
-            Bucket=s3_bucket_name,
-            Key=file_key,
-        )
-        file = response["Body"].read()
-        return file
-
     def upload_file(self, file_name: str, s3_bucket_name: str, file_key: str):
         return self.client.upload_file(file_name, s3_bucket_name, file_key)
 
@@ -172,21 +165,40 @@ def get_file_size(self, s3_bucket_name: str, object_key: str) -> int:
         response = self.client.head_object(Bucket=s3_bucket_name, Key=object_key)
         return response.get("ContentLength", 0)
 
-    def save_or_create_file(self, source_bucket: str, file_key: str, body: bytes):
-        return self.client.put_object(
-            Bucket=source_bucket, Key=file_key, Body=BytesIO(body)
-        )
-
     def get_object_stream(self, bucket: str, key: str):
         response = self.client.get_object(Bucket=bucket, Key=key)
         return response.get("Body")
 
-    def upload_file_obj(self, file_obj, s3_bucket_name: str, file_key: str):
+    def stream_s3_object_to_memory(self, bucket: str, key: str) -> BytesIO:
+        response = self.client.get_object(Bucket=bucket, Key=key)
+        buf = BytesIO()
+        for chunk in iter(lambda: response["Body"].read(64 * 1024), b""):
+            buf.write(chunk)
+        buf.seek(0)
+        return buf
+
+    def upload_file_obj(
+        self,
+        file_obj: io.BytesIO,
+        s3_bucket_name: str,
+        file_key: str,
+        extra_args: Mapping[str, Any] = None,
+    ):
         try:
-            self.client.upload_fileobj(file_obj, s3_bucket_name, file_key)
+            self.client.upload_fileobj(
+                Fileobj=file_obj,
+                Bucket=s3_bucket_name,
+                Key=file_key,
+                ExtraArgs=extra_args or {},
+            )
             logger.info(f"Uploaded file object to s3://{s3_bucket_name}/{file_key}")
         except ClientError as e:
             logger.error(
                 f"Failed to upload file object to s3://{s3_bucket_name}/{file_key} - {e}"
             )
             raise e
+
+    def save_or_create_file(self, source_bucket: str, file_key: str, body: bytes):
+        return self.client.put_object(
+            Bucket=source_bucket, Key=file_key, Body=BytesIO(body)
+        )
@@ -104,10 +104,11 @@ def create_document_reference_fhir_response(
         )
         if file_size < FileSize.MAX_FILE_SIZE:
             logger.info("File size is smaller than 8MB. Returning binary file.")
-            binary_file = self.s3_service.get_binary_file(
-                s3_bucket_name=bucket_name,
-                file_key=file_location,
+            s3_stream = self.s3_service.get_object_stream(
+                bucket=bucket_name,
+                key=file_location,
             )
+            binary_file = s3_stream.read()
             base64_encoded_file = base64.b64encode(binary_file)
             document_details.data = base64_encoded_file
 
 
@@ -1,24 +1,24 @@
 import os
-import shutil
-import tempfile
 import uuid
+from concurrent.futures import ThreadPoolExecutor
+from io import BytesIO
 from urllib import parse
 
 from botocore.exceptions import ClientError
 from enums.lambda_error import LambdaError
 from enums.trace_status import TraceStatus
 from models.document_reference import DocumentReference
 from models.stitch_trace import StitchTrace
+from pikepdf import Pdf
 from pypdf.errors import PyPdfError
 from services.base.s3_service import S3Service
 from services.document_service import DocumentService
-from services.pdf_stitch_service import stitch_pdf
 from utils.audit_logging_setup import LoggingService
 from utils.exceptions import NoAvailableDocument
 from utils.filename_utils import extract_page_number
 from utils.lambda_exceptions import LGStitchServiceException
 from utils.lloyd_george_validator import check_for_number_of_files_match_expected
-from utils.utilities import create_reference_id, get_file_key_from_s3_url
+from utils.utilities import get_file_key_from_s3_url
 
 logger = LoggingService(__name__)
 
@@ -33,70 +33,107 @@ def __init__(self, stitch_trace: StitchTrace):
 
         self.s3_service = S3Service()
         self.document_service = DocumentService()
-        self.temp_folder = tempfile.mkdtemp()
         self.stitch_trace_object = stitch_trace
         self.stitch_trace_table = os.environ.get("STITCH_METADATA_DYNAMODB_NAME")
         self.stitch_file_name = f"patient-record-{str(uuid.uuid4())}"
-        self.stitch_file_path = os.path.join(self.temp_folder, self.stitch_file_name)
+        self.combined_file_folder = "combined_files"
 
     def handle_stitch_request(self):
         self.stitch_lloyd_george_record()
         self.update_stitch_job_complete()
 
     def stitch_lloyd_george_record(self):
         try:
-            all_lg_parts = self.get_documents_for_stitching()
-            stitched_lg_record = stitch_pdf(all_lg_parts, self.temp_folder)
-            filename_for_stitched_file = os.path.basename(stitched_lg_record)
+            documents_for_stitching = self.get_lloyd_george_record_for_patient()
+            if not documents_for_stitching:
+                raise LGStitchServiceException(404, LambdaError.StitchNotFound)
 
-            self.stitch_trace_object.total_file_size_in_bytes = (
-                self.get_total_file_size_in_bytes(all_lg_parts)
-            )
-            self.upload_stitched_lg_record(
-                stitched_lg_record=stitched_lg_record,
-                filename_on_bucket=f"combined_files/{filename_for_stitched_file}",
-            )
-            logger.audit_splunk_info(
-                "User has viewed Lloyd George records",
-                {"Result": "Successful viewing LG"},
-            )
+            if len(documents_for_stitching) == 1:
+                document_to_stitch = documents_for_stitching[0]
+                file_location = document_to_stitch.file_location
+                file_s3_key = get_file_key_from_s3_url(file_location)
+
+                self.prepare_documents_for_stitching(documents_for_stitching)
+                self.stitch_trace_object.total_file_size_in_bytes = (
+                    self.get_total_file_size_in_bytes(document=document_to_stitch)
+                )
+                self.stitch_trace_object.stitched_file_location = file_s3_key
+
+            else:
+                filename_for_stitched_file = f"{self.stitch_file_name}.pdf"
+                destination_key = (
+                    f"{self.combined_file_folder}/{filename_for_stitched_file}"
+                )
+                ordered_documents = self.prepare_documents_for_stitching(
+                    documents_for_stitching
+                )
+                stitched_lg_stream = self.stream_and_stitch_documents(ordered_documents)
+                self.stitch_trace_object.total_file_size_in_bytes = (
+                    stitched_lg_stream.getbuffer().nbytes
+                )
+
+                self.upload_stitched_lg_record(
+                    stitched_lg_stream=stitched_lg_stream,
+                    filename_on_bucket=destination_key,
+                )
+
+                self.stitch_trace_object.stitched_file_location = destination_key
+
+                logger.audit_splunk_info(
+                    "User has viewed Lloyd George records",
+                    {"Result": "Successful viewing LG"},
+                )
 
         except (ClientError, PyPdfError, FileNotFoundError, NoAvailableDocument) as e:
             logger.error(
                 f"{LambdaError.StitchClient.to_str()}: {str(e)}",
                 {"Result": "Lloyd George stitching failed"},
             )
             raise LGStitchServiceException(500, LambdaError.StitchClient)
-        finally:
-            shutil.rmtree(self.temp_folder)
 
-    def get_documents_for_stitching(self):
-        try:
-            documents_for_stitching = self.get_lloyd_george_record_for_patient()
-            if not documents_for_stitching:
-                raise LGStitchServiceException(404, LambdaError.StitchNotFound)
+    def fetch_pdf(self, doc: DocumentReference) -> Pdf:
+        s3_key = get_file_key_from_s3_url(doc.file_location)
+        stream = self.s3_service.stream_s3_object_to_memory(
+            bucket=self.lloyd_george_bucket_name,
+            key=s3_key,
+        )
+        stream.seek(0)
+        return Pdf.open(stream)
+
+    def stream_and_stitch_documents(
+        self, documents: list[DocumentReference]
+    ) -> BytesIO:
+        output_pdf = Pdf.new()
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(self.fetch_pdf, doc) for doc in documents]
+
+            for future in futures:
+                pdf = future.result()
+                output_pdf.pages.extend(pdf.pages)
+                pdf.close()
+
+        output_stream = BytesIO()
+        output_pdf.save(output_stream)
+        output_pdf.close()
+        output_stream.seek(0)
+        return output_stream
+
+    def prepare_documents_for_stitching(
+        self, documents: list[DocumentReference]
+    ) -> list[DocumentReference]:
+        self.update_trace_status(TraceStatus.PROCESSING)
+
+        if len(documents) == 1:
+            sorted_docs = documents
+        else:
+            sorted_docs = self.sort_documents_by_filenames(documents)
+        self.stitch_trace_object.number_of_files = len(sorted_docs)
+        self.stitch_trace_object.file_last_updated = self.get_most_recent_created_date(
+            sorted_docs
+        )
 
-            self.update_trace_status(TraceStatus.PROCESSING)
-            sorted_documents_for_stitching = self.sort_documents_by_filenames(
-                documents_for_stitching
-            )
-            all_lg_parts = self.download_lloyd_george_files(
-                sorted_documents_for_stitching
-            )
-            self.stitch_trace_object.number_of_files = len(documents_for_stitching)
-            self.stitch_trace_object.file_last_updated = (
-                self.get_most_recent_created_date(sorted_documents_for_stitching)
-            )
-        except ClientError as e:
-            logger.error(
-                f"{LambdaError.StitchNoService.to_str()}: {str(e)}",
-                {"Result": "Lloyd George stitching failed"},
-            )
-            raise LGStitchServiceException(
-                500,
-                LambdaError.StitchNoService,
-            )
-        return all_lg_parts
+        return sorted_docs
 
     @staticmethod
     def sort_documents_by_filenames(
@@ -111,53 +148,39 @@ def sort_documents_by_filenames(
             )
             raise LGStitchServiceException(500, LambdaError.StitchValidation)
 
-    def download_lloyd_george_files(
-        self,
-        ordered_lg_records: list[DocumentReference],
-    ) -> list[str]:
-        all_lg_parts = []
-
-        for lg_part in ordered_lg_records:
-            file_location_on_s3 = lg_part.file_location
-            s3_file_path = get_file_key_from_s3_url(file_location_on_s3)
-            local_file_name = os.path.join(self.temp_folder, create_reference_id())
-            self.s3_service.download_file(
-                self.lloyd_george_bucket_name, s3_file_path, local_file_name
-            )
-            all_lg_parts.append(local_file_name)
-
-        return all_lg_parts
-
     def upload_stitched_lg_record(
-        self, stitched_lg_record: str, filename_on_bucket: str
+        self, stitched_lg_stream: BytesIO, filename_on_bucket: str
     ):
         try:
             extra_args = {
                 "Tagging": parse.urlencode({self.lifecycle_policy_tag: "true"}),
                 "ContentDisposition": "inline",
                 "ContentType": "application/pdf",
             }
-            self.s3_service.upload_file_with_extra_args(
-                file_name=stitched_lg_record,
+            self.s3_service.upload_file_obj(
+                file_obj=stitched_lg_stream,
                 s3_bucket_name=self.lloyd_george_bucket_name,
                 file_key=filename_on_bucket,
                 extra_args=extra_args,
             )
-            self.stitch_trace_object.stitched_file_location = filename_on_bucket
+            logger.info(
+                f"Uploaded stitched file to {self.lloyd_george_bucket_name} with key {filename_on_bucket}"
+            )
         except ValueError as e:
             logger.error(
                 f"{LambdaError.StitchCloudFront.to_str()}: {str(e)}",
-                {"Result": "Failed to format CloudFront URL due to invalid input."},
+                {"Result": "Failed to format CloudFront URL."},
             )
             raise LGStitchServiceException(500, LambdaError.StitchCloudFront)
 
     @staticmethod
     def get_most_recent_created_date(documents: list[DocumentReference]) -> str:
         return max(doc.created for doc in documents)
 
-    @staticmethod
-    def get_total_file_size_in_bytes(filepaths: list[str]) -> int:
-        return sum(os.path.getsize(filepath) for filepath in filepaths)
+    def get_total_file_size_in_bytes(self, document: DocumentReference) -> int:
+        bucket = document.s3_bucket_name
+        key = document.s3_file_key
+        return self.s3_service.get_file_size(bucket, key)
 
     def update_stitch_job_complete(self):
         logger.info("Writing stitch trace to db")
Original file line number	Diff line number	Diff line change
`@@ -104,10 +104,11 @@ def create_document_reference_fhir_response(`
`104`	`104`	`)`
`105`	`105`	`if file_size < FileSize.MAX_FILE_SIZE:`
`106`	`106`	`logger.info("File size is smaller than 8MB. Returning binary file.")`
`107`		`- binary_file = self.s3_service.get_binary_file(`
`108`		`- s3_bucket_name=bucket_name,`
`109`		`- file_key=file_location,`
	`107`	`+ s3_stream = self.s3_service.get_object_stream(`
	`108`	`+ bucket=bucket_name,`
	`109`	`+ key=file_location,`
`110`	`110`	`)`
	`111`	`+ binary_file = s3_stream.read()`
`111`	`112`	`base64_encoded_file = base64.b64encode(binary_file)`
`112`	`113`	`document_details.data = base64_encoded_file`
`113`	`114`