seanpedrick-case
diff --git a/‎app.py‎
Lines changed: 9 additions & 9 deletions b/‎app.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎tools/config.py‎
Lines changed: 3 additions & 3 deletions b/‎tools/config.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎tools/custom_image_analyser_engine.py‎
Lines changed: 18 additions & 18 deletions b/‎tools/custom_image_analyser_engine.py‎
Lines changed: 18 additions & 18 deletions
@@ -316,7 +316,7 @@
 # Add custom spacy recognisers to the Comprehend list, so that local Spacy model can be used to pick up e.g. titles, streetnames, UK postcodes that are sometimes missed by comprehend
 CHOSEN_COMPREHEND_ENTITIES.extend(custom_entities)
 FULL_COMPREHEND_ENTITY_LIST.extend(custom_entities)
-# CHOSEN_LLM_ENTITIES.extend(custom_entities)
+FULL_LLM_ENTITY_LIST.extend(custom_entities)
 
 
 # 1. Create a custom error class
@@ -573,16 +573,12 @@ def change_tab_to_review_redactions():
 
 walkthrough_custom_llm_instructions_textbox = gr.Textbox(
     label="Custom instructions for LLM-based entity detection",
-    placeholder="e.g., 'don't redact anything related to Mark Wilson' or 'redact all company names with the label COMPANY_NAME'",
+    placeholder="Specify new labels to redact with a description. E.g. 'Redact information related to Mark Wilson with the label MARK_WILSON' or 'redact all company names with the label COMPANY_NAME'.",
     value="",
     lines=3,
     visible=initial_is_llm_method,
 )
 
-# Note: Accordion container removed to avoid block ID mismatches
-# Components are now rendered directly in the walkthrough
-
-
 ## Redaction examples
 in_doc_files = gr.File(
     label="Choose a PDF document or image file (PDF, JPG, PNG)",
@@ -694,7 +690,7 @@ def change_tab_to_review_redactions():
 
 custom_llm_instructions_textbox = gr.Textbox(
     label="Custom instructions for LLM-based entity detection",
-    placeholder="Positive instructions are more likely to be successful than negative instructions. E.g. 'Redact information related to Mark Wilson with the label MARK_WILSON' or 'redact all company names with the label COMPANY_NAME' create labels you can filter by on the review screen, and are both more likely to be successful than 'Don't redact anything related to Mark Wilson' or 'Don't redact any company names.",
+    placeholder="Specify new labels to redact with a description. E.g. 'Redact information related to Mark Wilson with the label MARK_WILSON' or 'redact all company names with the label COMPANY_NAME'.",
     value="",
     lines=3,
     visible=initial_is_llm_method,
@@ -2001,8 +1997,8 @@ def show_info_box_on_click(
                     0,
                     "paddle",
                     CHOSEN_REDACT_ENTITIES,
-                    CHOSEN_LLM_ENTITIES,
-                    "Redact personal information about Lauren with the label LAUREN. Redact any university names with the label UNIVERSITY.",
+                    [],
+                    "Redact Lauren's name, email addresses, and phone numbers with the label LAUREN. Redact university names with the label UNIVERSITY.",
                 ],
             )
             ocr_example_labels.append("Example email LLM PII detection")
@@ -4673,6 +4669,8 @@ def restore_sys_tracebacklimit():
             local_ocr_method_radio,
             chosen_language_drop,
             input_review_files,
+            custom_llm_instructions_textbox,
+            inference_server_vlm_model_textbox,
             efficient_ocr_checkbox,
             efficient_ocr_min_words_number,
         ],
@@ -5054,6 +5052,8 @@ def restore_sys_tracebacklimit():
             local_ocr_method_radio,
             chosen_language_drop,
             input_review_files,
+            custom_llm_instructions_textbox,
+            inference_server_vlm_model_textbox,
             efficient_ocr_checkbox,
             efficient_ocr_min_words_number,
         ],
 
@@ -1463,7 +1463,7 @@ def update_model_choice_config(default_model_source, model_name_map):
 # If set and non-empty, overrides CLOUD_LLM_PII_MODEL_CHOICE whenever custom instructions are passed to the LLM (e.g. allow-list style rules). Leave empty to always use CLOUD_LLM_PII_MODEL_CHOICE.
 CLOUD_LLM_PII_CUSTOM_INSTRUCTIONS_MODEL_CHOICE = get_or_create_env_var(
     "CLOUD_LLM_PII_CUSTOM_INSTRUCTIONS_MODEL_CHOICE",
-    "anthropic.claude-3-7-sonnet-20250219-v1:0",  # Empty = use CLOUD_LLM_PII_MODEL_CHOICE even with custom instructions
+    "amazon.nova-pro-v1:0",  # Empty = use CLOUD_LLM_PII_MODEL_CHOICE even with custom instructions
 )
 
 # Cloud LLM Model Choice for summarisation (AWS Bedrock / cloud)
@@ -1806,13 +1806,13 @@ def update_model_choice_config(default_model_source, model_name_map):
 
 FULL_LLM_ENTITY_LIST = get_or_create_env_var(
     "FULL_LLM_ENTITY_LIST",
-    "['EMAIL_ADDRESS','ADDRESS','NAME','PHONE_NUMBER', 'DATE_TIME', 'URL', 'IP_ADDRESS', 'MAC_ADDRESS', 'AGE', 'BANK_ACCOUNT_NUMBER', 'PASSPORT_NUMBER', 'CA_HEALTH_NUMBER', 'CUSTOM', 'CUSTOM_FUZZY']",
+    "['EMAIL_ADDRESS', 'STREET_ADDRESS','PERSON_NAME','PHONE_NUMBER', 'DATE_TIME', 'URL', 'IP_ADDRESS', 'AGE', 'BANK_ACCOUNT_NUMBER', 'PASSPORT_NUMBER', 'CUSTOM', 'CUSTOM_FUZZY']",
 )
 
 # Entities for LLM-based PII redaction option
 CHOSEN_LLM_ENTITIES = get_or_create_env_var(
     "CHOSEN_LLM_ENTITIES",
-    "['EMAIL_ADDRESS','ADDRESS','NAME','PHONE_NUMBER', 'CUSTOM']",
+    "['EMAIL_ADDRESS','STREET_ADDRESS','PERSON_NAME','PHONE_NUMBER', 'CUSTOM']",
 )
 
 
 
@@ -7682,7 +7682,7 @@ def analyze_text(
                                 bedrock_runtime=bedrock_runtime,
                                 language=aws_language,
                                 allow_list=text_analyzer_kwargs.get("allow_list", []),
-                                chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                                chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                                 all_text_line_results=all_text_line_results,
                                 model_choice=model_choice,
                                 temperature=text_analyzer_kwargs.get(
@@ -7784,7 +7784,7 @@ def analyze_text(
                                 bedrock_runtime=bedrock_runtime,
                                 language=aws_language,
                                 allow_list=text_analyzer_kwargs.get("allow_list", []),
-                                chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                                chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                                 all_text_line_results=all_text_line_results,
                                 model_choice=model_choice,
                                 temperature=text_analyzer_kwargs.get(
@@ -7861,7 +7861,7 @@ def analyze_text(
                         bedrock_runtime=bedrock_runtime,
                         language=aws_language,
                         allow_list=text_analyzer_kwargs.get("allow_list", []),
-                        chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                        chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                         all_text_line_results=all_text_line_results,
                         model_choice=model_choice,
                         temperature=text_analyzer_kwargs.get(
@@ -8043,7 +8043,7 @@ def analyze_text(
                                 bedrock_runtime=bedrock_runtime,
                                 language=aws_language,
                                 allow_list=text_analyzer_kwargs.get("allow_list", []),
-                                chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                                chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                                 all_text_line_results=all_text_line_results,
                                 model_choice=model_choice,
                                 temperature=text_analyzer_kwargs.get(
@@ -8145,7 +8145,7 @@ def analyze_text(
                                 bedrock_runtime=bedrock_runtime,
                                 language=aws_language,
                                 allow_list=text_analyzer_kwargs.get("allow_list", []),
-                                chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                                chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                                 all_text_line_results=all_text_line_results,
                                 model_choice=model_choice,
                                 temperature=text_analyzer_kwargs.get(
@@ -8222,7 +8222,7 @@ def analyze_text(
                         bedrock_runtime=bedrock_runtime,
                         language=aws_language,
                         allow_list=text_analyzer_kwargs.get("allow_list", []),
-                        chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                        chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                         all_text_line_results=all_text_line_results,
                         model_choice=model_choice,
                         temperature=text_analyzer_kwargs.get(
@@ -8433,7 +8433,7 @@ def analyze_text(
                                 bedrock_runtime=bedrock_runtime,
                                 language=aws_language,
                                 allow_list=text_analyzer_kwargs.get("allow_list", []),
-                                chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                                chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                                 all_text_line_results=all_text_line_results,
                                 model_choice=model_choice,
                                 temperature=text_analyzer_kwargs.get(
@@ -8535,7 +8535,7 @@ def analyze_text(
                                 bedrock_runtime=bedrock_runtime,
                                 language=aws_language,
                                 allow_list=text_analyzer_kwargs.get("allow_list", []),
-                                chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                                chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                                 all_text_line_results=all_text_line_results,
                                 model_choice=model_choice,
                                 temperature=text_analyzer_kwargs.get(
@@ -8612,7 +8612,7 @@ def analyze_text(
                         bedrock_runtime=bedrock_runtime,
                         language=aws_language,
                         allow_list=text_analyzer_kwargs.get("allow_list", []),
-                        chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                        chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                         all_text_line_results=all_text_line_results,
                         model_choice=model_choice,
                         temperature=text_analyzer_kwargs.get(
@@ -9633,7 +9633,7 @@ def run_page_text_redaction(
                             allow_list=text_analyzer_kwargs.get(
                                 "allow_list", allow_list or []
                             ),
-                            chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                            chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                             all_text_line_results=all_text_line_results,
                             model_choice=model_choice,
                             temperature=text_analyzer_kwargs.get(
@@ -9727,7 +9727,7 @@ def run_page_text_redaction(
                             allow_list=text_analyzer_kwargs.get(
                                 "allow_list", allow_list or []
                             ),
-                            chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                            chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                             all_text_line_results=all_text_line_results,
                             model_choice=model_choice,
                             temperature=text_analyzer_kwargs.get(
@@ -9793,7 +9793,7 @@ def run_page_text_redaction(
                     bedrock_runtime=bedrock_runtime,
                     language=aws_language,
                     allow_list=text_analyzer_kwargs.get("allow_list", allow_list or []),
-                    chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                    chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                     all_text_line_results=all_text_line_results,
                     model_choice=model_choice,
                     temperature=text_analyzer_kwargs.get(
@@ -9969,7 +9969,7 @@ def run_page_text_redaction(
                             allow_list=text_analyzer_kwargs.get(
                                 "allow_list", allow_list or []
                             ),
-                            chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                            chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                             all_text_line_results=all_text_line_results,
                             model_choice=model_choice,
                             temperature=text_analyzer_kwargs.get(
@@ -10063,7 +10063,7 @@ def run_page_text_redaction(
                             allow_list=text_analyzer_kwargs.get(
                                 "allow_list", allow_list or []
                             ),
-                            chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                            chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                             all_text_line_results=all_text_line_results,
                             model_choice=model_choice,
                             temperature=text_analyzer_kwargs.get(
@@ -10129,7 +10129,7 @@ def run_page_text_redaction(
                     bedrock_runtime=bedrock_runtime,
                     language=aws_language,
                     allow_list=text_analyzer_kwargs.get("allow_list", allow_list or []),
-                    chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                    chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                     all_text_line_results=all_text_line_results,
                     model_choice=model_choice,
                     temperature=text_analyzer_kwargs.get(
@@ -10336,7 +10336,7 @@ def run_page_text_redaction(
                             allow_list=text_analyzer_kwargs.get(
                                 "allow_list", allow_list or []
                             ),
-                            chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                            chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                             all_text_line_results=all_text_line_results,
                             model_choice=model_choice,
                             temperature=text_analyzer_kwargs.get(
@@ -10430,7 +10430,7 @@ def run_page_text_redaction(
                             allow_list=text_analyzer_kwargs.get(
                                 "allow_list", allow_list or []
                             ),
-                            chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                            chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                             all_text_line_results=all_text_line_results,
                             model_choice=model_choice,
                             temperature=text_analyzer_kwargs.get(
@@ -10496,7 +10496,7 @@ def run_page_text_redaction(
                     bedrock_runtime=bedrock_runtime,
                     language=aws_language,
                     allow_list=text_analyzer_kwargs.get("allow_list", allow_list or []),
-                    chosen_redact_comprehend_entities=llm_chosen_redact_comprehend_entities,
+                    chosen_redact_llm_entities=llm_chosen_redact_comprehend_entities,
                     all_text_line_results=all_text_line_results,
                     model_choice=model_choice,
                     temperature=text_analyzer_kwargs.get(
Original file line number	Diff line number	Diff line change
`@@ -1463,7 +1463,7 @@ def update_model_choice_config(default_model_source, model_name_map):`
`1463`	`1463`	`# If set and non-empty, overrides CLOUD_LLM_PII_MODEL_CHOICE whenever custom instructions are passed to the LLM (e.g. allow-list style rules). Leave empty to always use CLOUD_LLM_PII_MODEL_CHOICE.`
`1464`	`1464`	`CLOUD_LLM_PII_CUSTOM_INSTRUCTIONS_MODEL_CHOICE = get_or_create_env_var(`
`1465`	`1465`	`"CLOUD_LLM_PII_CUSTOM_INSTRUCTIONS_MODEL_CHOICE",`
`1466`		`- "anthropic.claude-3-7-sonnet-20250219-v1:0", # Empty = use CLOUD_LLM_PII_MODEL_CHOICE even with custom instructions`
	`1466`	`+ "amazon.nova-pro-v1:0", # Empty = use CLOUD_LLM_PII_MODEL_CHOICE even with custom instructions`
`1467`	`1467`	`)`
`1468`	`1468`
`1469`	`1469`	`# Cloud LLM Model Choice for summarisation (AWS Bedrock / cloud)`
`@@ -1806,13 +1806,13 @@ def update_model_choice_config(default_model_source, model_name_map):`
`1806`	`1806`
`1807`	`1807`	`FULL_LLM_ENTITY_LIST = get_or_create_env_var(`
`1808`	`1808`	`"FULL_LLM_ENTITY_LIST",`
`1809`		`- "['EMAIL_ADDRESS','ADDRESS','NAME','PHONE_NUMBER', 'DATE_TIME', 'URL', 'IP_ADDRESS', 'MAC_ADDRESS', 'AGE', 'BANK_ACCOUNT_NUMBER', 'PASSPORT_NUMBER', 'CA_HEALTH_NUMBER', 'CUSTOM', 'CUSTOM_FUZZY']",`
	`1809`	`+ "['EMAIL_ADDRESS', 'STREET_ADDRESS','PERSON_NAME','PHONE_NUMBER', 'DATE_TIME', 'URL', 'IP_ADDRESS', 'AGE', 'BANK_ACCOUNT_NUMBER', 'PASSPORT_NUMBER', 'CUSTOM', 'CUSTOM_FUZZY']",`
`1810`	`1810`	`)`
`1811`	`1811`
`1812`	`1812`	`# Entities for LLM-based PII redaction option`
`1813`	`1813`	`CHOSEN_LLM_ENTITIES = get_or_create_env_var(`
`1814`	`1814`	`"CHOSEN_LLM_ENTITIES",`
`1815`		`- "['EMAIL_ADDRESS','ADDRESS','NAME','PHONE_NUMBER', 'CUSTOM']",`
	`1815`	`+ "['EMAIL_ADDRESS','STREET_ADDRESS','PERSON_NAME','PHONE_NUMBER', 'CUSTOM']",`
`1816`	`1816`	`)`
`1817`	`1817`
`1818`	`1818`