Altering comparment 1 utils to be able to process more than 1 subject

adrianoaru-nhs · adrianoaru-nhs · commit 720f35ca3dc9 · 2025-03-07T11:24:14.000Z
Changing csv_reader to be able to generate fit device ids
diff --git a/requirements.txt b/requirements.txt
@@ -7,3 +7,4 @@ pandas>=2.2.3
 dotenv~=0.9.9
 python-dotenv~=1.0.1
 Flask~=3.0.3
+fastparquet>=2024.11.0
diff --git a/tests/Smokescreen/test_compartment_1.py b/tests/Smokescreen/test_compartment_1.py
@@ -19,7 +19,7 @@ def test_example(page: Page) -> None:
     InvitationsMonitoring(page).go_to_bcss001_invitations_plan_page()
     InvitationsPlans(page).go_to_create_a_plan_page()
     CreateAPlan(page).click_set_all_button()
-    CreateAPlan(page).fill_daily_invitation_rate_field("1")
+    CreateAPlan(page).fill_daily_invitation_rate_field("10")
     CreateAPlan(page).click_update_button()
     CreateAPlan(page).click_confirm_button()
     CreateAPlan(page).click_save_button()
@@ -35,13 +35,15 @@ def test_example(page: Page) -> None:
     GenerateInvitations(page).wait_for_invitation_generation_complete()
 
     # Print the batch of Pre-Invitation Letters
-    s1_nhs_no = batch_processing(page, "S1", "Pre-invitation (FIT)", "S9 - Pre-invitation Sent")
+    s1_nhs_numbers = batch_processing(page, "S1", "Pre-invitation (FIT)", "S9 - Pre-invitation Sent")
     batch_processing(page, "S1", "Pre-invitation (FIT) (digital leaflet)", "S9 - Pre-invitation Sent")
-    OracleDB().exec_bcss_timed_events(s1_nhs_no)
+    for nhs_no in range(len(s1_nhs_numbers)):
+        OracleDB().exec_bcss_timed_events(s1_nhs_numbers[nhs_no-1])
 
     # Print the batch of Invitation & Test Kit Letters
-    s9_nhs_no = batch_processing(page, "S9", "Invitation & Test Kit (FIT)", "S10 - Invitation & Test Kit Sent")
-    OracleDB().exec_bcss_timed_events(s9_nhs_no)
+    s9_nhs_numbers = batch_processing(page, "S9", "Invitation & Test Kit (FIT)", "S10 - Invitation & Test Kit Sent")
+    for nhs_no in range(len(s9_nhs_numbers)):
+        OracleDB().exec_bcss_timed_events(s9_nhs_numbers[nhs_no-1])
 
     # Print a set of reminder letters
     batch_processing(page, "S10", "Test Kit Reminder", "S19 - Reminder of Initial Test Sent")
diff --git a/utils/batch_processing.py b/utils/batch_processing.py
@@ -55,10 +55,12 @@ def batch_processing(page: Page, batch_type: str, batch_description: str, latest
         # Wait for the download process to complete and save the downloaded file in a temp folder
         download_file.save_as(file)
         if file.endswith(".pdf"):
-            nhs_no = extract_nhs_no_from_pdf(file)
+            nhs_numbers = extract_nhs_no_from_pdf(file)
+            first_nhs_no = nhs_numbers[0]
             os.remove(file) # Deletes the file after extracting the necessary data
         elif file.endswith(".csv"):
             csv_df = convert_csv_to_df(file) # Currently no use in compartment 1, will be necessary for future compartments
+            csv_df.to_parquet('subject_kit_number.parquet', engine='fastparquet')
             os.remove(file) # Deletes the file after extracting the necessary data
 
     # This loops through each Confirm printed button and clicks each one
@@ -74,5 +76,5 @@ def batch_processing(page: Page, batch_type: str, batch_description: str, latest
     ArchivedBatchList(page).enter_id_filter(link_text)
     ArchivedBatchList(page).verify_table_data(link_text)
 
-    verify_subject_event_status_by_nhs_no(page, nhs_no, latest_event_status)
-    return nhs_no
+    verify_subject_event_status_by_nhs_no(page, first_nhs_no, latest_event_status)
+    return nhs_numbers
diff --git a/utils/csv_reader.py b/utils/csv_reader.py
@@ -1,5 +1,16 @@
 import pandas as pd
+from datetime import datetime
 
 def convert_csv_to_df(file: str):
     csv_df = pd.read_csv(file)
-    return csv_df
+    csv_df.drop(csv_df.columns[[0,2,3,4,5,6,7,8,9,10,12]], axis = 1, inplace = True) # Removing unnecessary columns
+    df = csv_df.rename(columns={csv_df.columns[0]: "NHS_Number",csv_df.columns[1]: "Kit_ID"}) # Renaming the columns to something more meaningful
+    df.dropna(inplace = True) # Deleting any Null records
+    df["NHS_Number"] = df["NHS_Number"].str.replace(" ", "") # Removing the space from the nhs number
+    df["FIT_Device_ID"] = df["Kit_ID"].apply(convert_kit_id_to_fit_device_id)
+    return df
+
+def convert_kit_id_to_fit_device_id(kit_id):
+        today = datetime.now()
+        year = today.strftime("%y") # Get the year from todays date in YY format
+        return f"{kit_id}12{int(year)+1}12345/KD00001" # Creating the fit device id with an expiry date set to December next year
diff --git a/utils/pdf_reader.py b/utils/pdf_reader.py
@@ -2,7 +2,7 @@
 
 def extract_nhs_no_from_pdf(file: str):
     reader = PdfReader(file)
-
+    nhs_number = []
     # For loop looping through all pages of the file to find the NHS Number
     for pages in reader.pages:
         text = pages.extract_text()
@@ -13,5 +13,6 @@ def extract_nhs_no_from_pdf(file: str):
                 if "NHS No" in split_text:
                     # If a string is found containing "NHS No" only digits are stored into nhs_no
                     nhs_no = "".join([ele for ele in split_text if ele.isdigit()])
+                    nhs_number.append(nhs_no)
                     break
-    return nhs_no
+    return nhs_number