aws-solutions-library-samples
diff --git a/‎lib/idp_common_pkg/idp_common/bda/bda_blueprint_creator.py‎
Lines changed: 12 additions & 9 deletions b/‎lib/idp_common_pkg/idp_common/bda/bda_blueprint_creator.py‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎lib/idp_common_pkg/idp_common/bda/schema_converter.py‎
Lines changed: 1 addition & 2 deletions b/‎lib/idp_common_pkg/idp_common/bda/schema_converter.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎lib/idp_common_pkg/idp_common/discovery/classes_discovery.py‎
Lines changed: 40 additions & 22 deletions b/‎lib/idp_common_pkg/idp_common/discovery/classes_discovery.py‎
Lines changed: 40 additions & 22 deletions
@@ -74,7 +74,9 @@ def update_data_automation_project(self, projectArn: str, blueprint):
             logger.error(f"Failed to update data automation project: {e}")
             return None
 
-    def update_data_automation_project_with_custom_configurations(self, projectArn: str, customConfiguration):
+    def update_data_automation_project_with_custom_configurations(
+        self, projectArn: str, customConfiguration
+    ):
         """
         Update an existing Bedrock Data Automation project with the provided blueprint.
 
@@ -91,7 +93,7 @@ def update_data_automation_project_with_custom_configurations(self, projectArn:
             )
             project = project.get("project", None)
             logger.info(f"Updating project: {project}")
-            
+
             logger.info(f"Updating updated data automation project: {projectArn}")
             response = self.bedrock_client.update_data_automation_project(
                 projectArn=projectArn,
@@ -352,26 +354,27 @@ def get_blueprint(self, blueprint_arn, stage):
             logger.error(f"Error updating blueprint: {e}")
             raise e
 
-
-    def list_blueprints(self, projectArn, projectStage ):
+    def list_blueprints(self, projectArn, projectStage):
         try:
             project = self.bedrock_client.get_data_automation_project(
                 projectArn=projectArn, projectStage="LIVE"
             )
             project = project.get("project", None)
             logger.info(f"Updating project: {project}")
             customOutputConfiguration = project.get("customOutputConfiguration", None)
-            
+
             return customOutputConfiguration
 
         except Exception as e:
             logger.error(f"Error updating blueprint: {e}")
             raise e
-    
-    def delete_blueprint(self, blueprint_arn, blueprint_version ):
+
+    def delete_blueprint(self, blueprint_arn, blueprint_version):
         try:
-            return self.bedrock_client.delete_blueprint(blueprintArn=blueprint_arn, blueprintVersion=blueprint_version )
-            
+            return self.bedrock_client.delete_blueprint(
+                blueprintArn=blueprint_arn, blueprintVersion=blueprint_version
+            )
+
         except Exception as e:
             logger.error(f"Error delete_blueprint: {e}")
             raise e
@@ -74,7 +74,7 @@ def convert(self, extraction_response: Dict[str, Any]) -> Dict[str, Any]:
             if group_type and group_type.lower() == "list":
                 listItemTemplate = group.get("listItemTemplate", {})
                 fields = listItemTemplate.get("itemAttributes", [])
-            
+
             for field in fields:
                 field_name = self._format_field_name(field.get("name", ""))
                 if not field_name:
@@ -88,7 +88,6 @@ def convert(self, extraction_response: Dict[str, Any]) -> Dict[str, Any]:
                     field_name
                 ] = field_schema
 
-            
             if group_type and group_type.lower() == "list":
                 # Create array property for tables
                 blueprint_schema["properties"][section_def_name] = {
 
@@ -27,17 +27,17 @@ def __init__(
         self.input_bucket = input_bucket
         self.input_prefix = input_prefix
         self.region = region or os.environ.get("AWS_REGION", "us-east-1")
-        
+
         # Load configuration
         self.config = config or self._load_default_config()
-        
+
         # Get discovery configuration
         self.discovery_config = self.config.get("discovery", {})
-        
+
         # Get model configuration for both scenarios
         self.without_gt_config = self.discovery_config.get("without_ground_truth", {})
         self.with_gt_config = self.discovery_config.get("with_ground_truth", {})
-        
+
         # Backward compatibility: use bedrock_model_id if provided
         if bedrock_model_id:
             self.without_gt_config["model_id"] = bedrock_model_id
@@ -76,7 +76,7 @@ def _load_default_config(self):
 Group the fields based on the section they are grouped in the form. Group should have attributeType as "group".
 If the group repeats and follows table format, update the attributeType as "list".      
 Do not extract the values.
-Return the extracted data in JSON format."""
+Return the extracted data in JSON format.""",
                 },
                 "with_ground_truth": {
                     "model_id": "anthropic.claude-3-sonnet-20240229-v1:0",
@@ -97,7 +97,7 @@ def _load_default_config(self):
 For document_class generate a short name based on the document content like W4, I-9, Paystub. 
 For document_description generate a description about the document in less than 50 words.
 If the group repeats and follows table format, update the attributeType as "list".      
-Do not extract the values."""
+Do not extract the values.""",
                 },
                 "output_format": {
                     "sample_json": """{
@@ -123,7 +123,7 @@ def _load_default_config(self):
         }
     ]
 }"""
-                }
+                },
             }
         }
 
@@ -353,16 +353,24 @@ def _load_ground_truth(self, bucket: str, key: str):
     def _extract_data_from_document(self, document_content, file_extension):
         try:
             # Get configuration for without ground truth
-            model_id = self.without_gt_config.get("model_id", "anthropic.claude-3-sonnet-20240229-v1:0")
-            system_prompt = self.without_gt_config.get("system_prompt", 
-                "You are an expert in processing forms. Extracting data from images and documents")
+            model_id = self.without_gt_config.get(
+                "model_id", "anthropic.claude-3-sonnet-20240229-v1:0"
+            )
+            system_prompt = self.without_gt_config.get(
+                "system_prompt",
+                "You are an expert in processing forms. Extracting data from images and documents",
+            )
             temperature = self.without_gt_config.get("temperature", 1.0)
             top_p = self.without_gt_config.get("top_p", 0.1)
             max_tokens = self.without_gt_config.get("max_tokens", 10000)
-            
+
             # Create user prompt with sample format
-            user_prompt = self.without_gt_config.get("user_prompt", self._prompt_classes_discovery())
-            sample_format = self.discovery_config.get("output_format", {}).get("sample_json", self._sample_output_format())
+            user_prompt = self.without_gt_config.get(
+                "user_prompt", self._prompt_classes_discovery()
+            )
+            sample_format = self.discovery_config.get("output_format", {}).get(
+                "sample_json", self._sample_output_format()
+            )
             full_prompt = f"{user_prompt}\nFormat the extracted data using the below JSON format:\n{sample_format}"
 
             # Create content for the user message
@@ -422,25 +430,35 @@ def _extract_data_from_document_with_ground_truth(
         """Extract data from document using ground truth as reference."""
         try:
             # Get configuration for with ground truth
-            model_id = self.with_gt_config.get("model_id", "anthropic.claude-3-sonnet-20240229-v1:0")
-            system_prompt = self.with_gt_config.get("system_prompt", 
-                "You are an expert in processing forms. Extracting data from images and documents")
+            model_id = self.with_gt_config.get(
+                "model_id", "anthropic.claude-3-sonnet-20240229-v1:0"
+            )
+            system_prompt = self.with_gt_config.get(
+                "system_prompt",
+                "You are an expert in processing forms. Extracting data from images and documents",
+            )
             temperature = self.with_gt_config.get("temperature", 1.0)
             top_p = self.with_gt_config.get("top_p", 0.1)
             max_tokens = self.with_gt_config.get("max_tokens", 10000)
 
             # Create enhanced prompt with ground truth
-            user_prompt = self.with_gt_config.get("user_prompt", 
-                self._prompt_classes_discovery_with_ground_truth(ground_truth_data))
-            
+            user_prompt = self.with_gt_config.get(
+                "user_prompt",
+                self._prompt_classes_discovery_with_ground_truth(ground_truth_data),
+            )
+
             # If user_prompt contains placeholder, replace it with ground truth
             if "{ground_truth_json}" in user_prompt:
                 ground_truth_json = json.dumps(ground_truth_data, indent=2)
                 prompt = user_prompt.replace("{ground_truth_json}", ground_truth_json)
             else:
-                prompt = self._prompt_classes_discovery_with_ground_truth(ground_truth_data)
-            
-            sample_format = self.discovery_config.get("output_format", {}).get("sample_json", self._sample_output_format())
+                prompt = self._prompt_classes_discovery_with_ground_truth(
+                    ground_truth_data
+                )
+
+            sample_format = self.discovery_config.get("output_format", {}).get(
+                "sample_json", self._sample_output_format()
+            )
             full_prompt = f"{prompt}\nFormat the extracted data using the below JSON format:\n{sample_format}"
 
             # Create content for the user message