Skip to content

Commit 19f012c

Browse files
committed
reformatted to meet the quality check
1 parent 3f02d7c commit 19f012c

File tree

9 files changed

+405
-321
lines changed

9 files changed

+405
-321
lines changed

lib/idp_common_pkg/idp_common/bda/bda_blueprint_creator.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ def update_data_automation_project(self, projectArn: str, blueprint):
7474
logger.error(f"Failed to update data automation project: {e}")
7575
return None
7676

77-
def update_data_automation_project_with_custom_configurations(self, projectArn: str, customConfiguration):
77+
def update_data_automation_project_with_custom_configurations(
78+
self, projectArn: str, customConfiguration
79+
):
7880
"""
7981
Update an existing Bedrock Data Automation project with the provided blueprint.
8082
@@ -91,7 +93,7 @@ def update_data_automation_project_with_custom_configurations(self, projectArn:
9193
)
9294
project = project.get("project", None)
9395
logger.info(f"Updating project: {project}")
94-
96+
9597
logger.info(f"Updating updated data automation project: {projectArn}")
9698
response = self.bedrock_client.update_data_automation_project(
9799
projectArn=projectArn,
@@ -352,26 +354,27 @@ def get_blueprint(self, blueprint_arn, stage):
352354
logger.error(f"Error updating blueprint: {e}")
353355
raise e
354356

355-
356-
def list_blueprints(self, projectArn, projectStage ):
357+
def list_blueprints(self, projectArn, projectStage):
357358
try:
358359
project = self.bedrock_client.get_data_automation_project(
359360
projectArn=projectArn, projectStage="LIVE"
360361
)
361362
project = project.get("project", None)
362363
logger.info(f"Updating project: {project}")
363364
customOutputConfiguration = project.get("customOutputConfiguration", None)
364-
365+
365366
return customOutputConfiguration
366367

367368
except Exception as e:
368369
logger.error(f"Error updating blueprint: {e}")
369370
raise e
370-
371-
def delete_blueprint(self, blueprint_arn, blueprint_version ):
371+
372+
def delete_blueprint(self, blueprint_arn, blueprint_version):
372373
try:
373-
return self.bedrock_client.delete_blueprint(blueprintArn=blueprint_arn, blueprintVersion=blueprint_version )
374-
374+
return self.bedrock_client.delete_blueprint(
375+
blueprintArn=blueprint_arn, blueprintVersion=blueprint_version
376+
)
377+
375378
except Exception as e:
376379
logger.error(f"Error delete_blueprint: {e}")
377380
raise e

lib/idp_common_pkg/idp_common/bda/schema_converter.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def convert(self, extraction_response: Dict[str, Any]) -> Dict[str, Any]:
7474
if group_type and group_type.lower() == "list":
7575
listItemTemplate = group.get("listItemTemplate", {})
7676
fields = listItemTemplate.get("itemAttributes", [])
77-
77+
7878
for field in fields:
7979
field_name = self._format_field_name(field.get("name", ""))
8080
if not field_name:
@@ -88,7 +88,6 @@ def convert(self, extraction_response: Dict[str, Any]) -> Dict[str, Any]:
8888
field_name
8989
] = field_schema
9090

91-
9291
if group_type and group_type.lower() == "list":
9392
# Create array property for tables
9493
blueprint_schema["properties"][section_def_name] = {

lib/idp_common_pkg/idp_common/discovery/classes_discovery.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,17 @@ def __init__(
2727
self.input_bucket = input_bucket
2828
self.input_prefix = input_prefix
2929
self.region = region or os.environ.get("AWS_REGION", "us-east-1")
30-
30+
3131
# Load configuration
3232
self.config = config or self._load_default_config()
33-
33+
3434
# Get discovery configuration
3535
self.discovery_config = self.config.get("discovery", {})
36-
36+
3737
# Get model configuration for both scenarios
3838
self.without_gt_config = self.discovery_config.get("without_ground_truth", {})
3939
self.with_gt_config = self.discovery_config.get("with_ground_truth", {})
40-
40+
4141
# Backward compatibility: use bedrock_model_id if provided
4242
if bedrock_model_id:
4343
self.without_gt_config["model_id"] = bedrock_model_id
@@ -76,7 +76,7 @@ def _load_default_config(self):
7676
Group the fields based on the section they are grouped in the form. Group should have attributeType as "group".
7777
If the group repeats and follows table format, update the attributeType as "list".
7878
Do not extract the values.
79-
Return the extracted data in JSON format."""
79+
Return the extracted data in JSON format.""",
8080
},
8181
"with_ground_truth": {
8282
"model_id": "anthropic.claude-3-sonnet-20240229-v1:0",
@@ -97,7 +97,7 @@ def _load_default_config(self):
9797
For document_class generate a short name based on the document content like W4, I-9, Paystub.
9898
For document_description generate a description about the document in less than 50 words.
9999
If the group repeats and follows table format, update the attributeType as "list".
100-
Do not extract the values."""
100+
Do not extract the values.""",
101101
},
102102
"output_format": {
103103
"sample_json": """{
@@ -123,7 +123,7 @@ def _load_default_config(self):
123123
}
124124
]
125125
}"""
126-
}
126+
},
127127
}
128128
}
129129

@@ -353,16 +353,24 @@ def _load_ground_truth(self, bucket: str, key: str):
353353
def _extract_data_from_document(self, document_content, file_extension):
354354
try:
355355
# Get configuration for without ground truth
356-
model_id = self.without_gt_config.get("model_id", "anthropic.claude-3-sonnet-20240229-v1:0")
357-
system_prompt = self.without_gt_config.get("system_prompt",
358-
"You are an expert in processing forms. Extracting data from images and documents")
356+
model_id = self.without_gt_config.get(
357+
"model_id", "anthropic.claude-3-sonnet-20240229-v1:0"
358+
)
359+
system_prompt = self.without_gt_config.get(
360+
"system_prompt",
361+
"You are an expert in processing forms. Extracting data from images and documents",
362+
)
359363
temperature = self.without_gt_config.get("temperature", 1.0)
360364
top_p = self.without_gt_config.get("top_p", 0.1)
361365
max_tokens = self.without_gt_config.get("max_tokens", 10000)
362-
366+
363367
# Create user prompt with sample format
364-
user_prompt = self.without_gt_config.get("user_prompt", self._prompt_classes_discovery())
365-
sample_format = self.discovery_config.get("output_format", {}).get("sample_json", self._sample_output_format())
368+
user_prompt = self.without_gt_config.get(
369+
"user_prompt", self._prompt_classes_discovery()
370+
)
371+
sample_format = self.discovery_config.get("output_format", {}).get(
372+
"sample_json", self._sample_output_format()
373+
)
366374
full_prompt = f"{user_prompt}\nFormat the extracted data using the below JSON format:\n{sample_format}"
367375

368376
# Create content for the user message
@@ -422,25 +430,35 @@ def _extract_data_from_document_with_ground_truth(
422430
"""Extract data from document using ground truth as reference."""
423431
try:
424432
# Get configuration for with ground truth
425-
model_id = self.with_gt_config.get("model_id", "anthropic.claude-3-sonnet-20240229-v1:0")
426-
system_prompt = self.with_gt_config.get("system_prompt",
427-
"You are an expert in processing forms. Extracting data from images and documents")
433+
model_id = self.with_gt_config.get(
434+
"model_id", "anthropic.claude-3-sonnet-20240229-v1:0"
435+
)
436+
system_prompt = self.with_gt_config.get(
437+
"system_prompt",
438+
"You are an expert in processing forms. Extracting data from images and documents",
439+
)
428440
temperature = self.with_gt_config.get("temperature", 1.0)
429441
top_p = self.with_gt_config.get("top_p", 0.1)
430442
max_tokens = self.with_gt_config.get("max_tokens", 10000)
431443

432444
# Create enhanced prompt with ground truth
433-
user_prompt = self.with_gt_config.get("user_prompt",
434-
self._prompt_classes_discovery_with_ground_truth(ground_truth_data))
435-
445+
user_prompt = self.with_gt_config.get(
446+
"user_prompt",
447+
self._prompt_classes_discovery_with_ground_truth(ground_truth_data),
448+
)
449+
436450
# If user_prompt contains placeholder, replace it with ground truth
437451
if "{ground_truth_json}" in user_prompt:
438452
ground_truth_json = json.dumps(ground_truth_data, indent=2)
439453
prompt = user_prompt.replace("{ground_truth_json}", ground_truth_json)
440454
else:
441-
prompt = self._prompt_classes_discovery_with_ground_truth(ground_truth_data)
442-
443-
sample_format = self.discovery_config.get("output_format", {}).get("sample_json", self._sample_output_format())
455+
prompt = self._prompt_classes_discovery_with_ground_truth(
456+
ground_truth_data
457+
)
458+
459+
sample_format = self.discovery_config.get("output_format", {}).get(
460+
"sample_json", self._sample_output_format()
461+
)
444462
full_prompt = f"{prompt}\nFormat the extracted data using the below JSON format:\n{sample_format}"
445463

446464
# Create content for the user message

0 commit comments

Comments
 (0)