Skip to content

Commit 4ab18e0

Browse files
author
Bob Strahan
committed
Merge branch 'develop' of ssh.gitlab.aws.dev:genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator into develop
2 parents 5fd32e8 + 568629c commit 4ab18e0

File tree

7 files changed

+63
-12
lines changed

7 files changed

+63
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT-0
66
## [Unreleased]
77

88
### Added
9-
- **Document Compression for Large Documents**
9+
- **Document Compression for Large Documents - all patterns**
1010
- Added automatic compression support to handle large documents and avoid exceeding Step Functions payload limits (256KB)
1111
- **Key Features**:
1212
- Automatic compression (default trigger threshold of 0KB enables compression by default)

config_library/pattern-3/default/config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,17 @@
33

44
notes: Default settings
55
ocr:
6+
backend: "textract" # Default to Textract for backward compatibility
7+
model_id: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
8+
system_prompt: "You are an expert OCR system. Extract all text from the provided image accurately, preserving layout where possible."
9+
task_prompt: "Extract all text from this document image. Preserve the layout, including paragraphs, tables, and formatting."
610
features:
711
- name: LAYOUT
812
- name: TABLES
13+
- name: SIGNATURES
14+
image:
15+
target_width: '951'
16+
target_height: '1268'
917
classes:
1018
- name: letter
1119
description: A formal written correspondence with sender/recipient addresses, date, salutation, body, and closing signature

patterns/pattern-1/src/bda_invoke_function/index.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,11 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
129129
try:
130130
logger.info(f"Received event: {json.dumps(event)}")
131131

132-
# Get document from event
133-
document = Document.from_dict(event["document"])
132+
# Get document from event using new utility method
133+
working_bucket = event['working_bucket']
134+
document = Document.load_document(event["document"], working_bucket, logger)
134135
input_bucket = document.input_bucket
135136
object_key = document.input_key
136-
working_bucket = event['working_bucket']
137137
data_project_arn = event['BDAProjectArn']
138138
task_token = event['taskToken']
139139

@@ -159,4 +159,4 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
159159

160160
except Exception as e:
161161
logger.error(f"Error processing request: {str(e)}", exc_info=True)
162-
raise
162+
raise

patterns/pattern-1/src/processresults_function/index.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,9 +1176,15 @@ def handler(event, context):
11761176

11771177
appsync_service.update_document(document)
11781178

1179-
# Prepare response
1179+
# Prepare response using new serialization method
1180+
# Use working bucket for document compression
1181+
working_bucket = os.environ.get('WORKING_BUCKET')
1182+
if not working_bucket:
1183+
logger.warning("WORKING_BUCKET environment variable not set, using output_bucket for compression")
1184+
working_bucket = output_bucket
1185+
11801186
response = {
1181-
"document": document.to_dict(),
1187+
"document": document.serialize_document(working_bucket, "processresults", logger),
11821188
"hitl_triggered": hitl_triggered,
11831189
"bda_response_count": len(bda_responses)
11841190
}

patterns/pattern-1/src/summarization_function/index.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,13 @@ def handler(event, context):
4141
if not document_dict:
4242
raise ValueError("No document data provided")
4343

44-
# Convert dict to Document object
45-
document = Document.from_dict(document_dict)
44+
# Get working bucket and load document using new utility method
45+
working_bucket = os.environ.get('WORKING_BUCKET')
46+
if not working_bucket:
47+
raise ValueError("WORKING_BUCKET environment variable not set")
48+
49+
# Convert dict to Document object using new utility method
50+
document = Document.load_document(document_dict, working_bucket, logger)
4651

4752
# Update document status to SUMMARIZING
4853
document.status = Status.SUMMARIZING
@@ -71,9 +76,9 @@ def handler(event, context):
7176
else:
7277
logger.warning("Document summarization completed but no summary report URI was set")
7378

74-
# Return the processed document
79+
# Return the processed document using new serialization method
7580
return {
76-
'document': processed_document.to_dict(),
81+
'document': processed_document.serialize_document(working_bucket, "summarization", logger),
7782
}
7883

7984
except Exception as e:
@@ -90,4 +95,4 @@ def handler(event, context):
9095
except Exception as status_error:
9196
logger.error(f"Failed to update document status: {str(status_error)}", exc_info=True)
9297

93-
raise e
98+
raise e

patterns/pattern-1/template.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ Resources:
594594
ENABLE_HITL: !Ref EnableHITL
595595
DB_NAME: !Ref BDAMetadataTable
596596
BDA_PROJECT_ARN: !Ref BDAProjectArn
597+
WORKING_BUCKET: !Ref WorkingBucket
597598
LoggingConfig:
598599
LogGroup: !Ref ProcessResultsFunctionLogGroup
599600
Policies:
@@ -678,6 +679,7 @@ Resources:
678679
GUARDRAIL_ID_AND_VERSION: !If [HasGuardrailConfig, !Sub "${BedrockGuardrailId}:${BedrockGuardrailVersion}", ""]
679680
LOG_LEVEL: !Ref LogLevel
680681
APPSYNC_API_URL: !Ref AppSyncApiUrl
682+
WORKING_BUCKET: !Ref WorkingBucket
681683
LoggingConfig:
682684
LogGroup: !Ref SummarizationFunctionLogGroup
683685
Policies:
@@ -686,6 +688,8 @@ Resources:
686688
BucketName: !Ref InputBucket
687689
- S3CrudPolicy:
688690
BucketName: !Ref OutputBucket
691+
- S3CrudPolicy:
692+
BucketName: !Ref WorkingBucket
689693
- DynamoDBCrudPolicy:
690694
TableName: !Ref ConfigurationTable
691695
- Statement:
@@ -966,6 +970,19 @@ Resources:
966970

967971
HITLWaitFunction:
968972
Type: AWS::Serverless::Function
973+
Metadata:
974+
cfn_nag:
975+
rules_to_suppress:
976+
- id: W11
977+
reason: "Cloudwatch does not support resource-level permissions"
978+
- id: W89
979+
reason: "This Lambda function does not require VPC access as it only interacts with AWS services via AWS APIs"
980+
- id: W92
981+
reason: "Function does not require concurrent execution limits as it is designed to scale based on demand"
982+
# checkov:skip=CKV_AWS_116: "DLQ not required for this function as StepFunctions will handle retries"
983+
# checkov:skip=CKV_AWS_173: "Environment variables do not contain sensitive data - only configuration values like feature flags and non-sensitive settings"
984+
# checkov:skip=CKV_AWS_117: "Function does not require VPC access as it only interacts with AWS services via APIs"
985+
# checkov:skip=CKV_AWS_115: "Function does not require reserved concurrency as it scales based on demand"
969986
Properties:
970987
CodeUri: src/hitl-wait-function/
971988
Handler: index.lambda_handler

template.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3549,6 +3549,11 @@ Resources:
35493549
A2IHumanTaskUILambdaRole:
35503550
Type: AWS::IAM::Role
35513551
Condition: IsPattern1HITLEnabled
3552+
Metadata:
3553+
cfn_nag:
3554+
rules_to_suppress:
3555+
- id: W11
3556+
reason: "Role requires * resource access for SageMaker A2I operations as resource name is not available"
35523557
Properties:
35533558
AssumeRolePolicyDocument:
35543559
Version: '2012-10-17'
@@ -3671,6 +3676,16 @@ Resources:
36713676
GetWorkforceURLFunction:
36723677
Type: AWS::Serverless::Function
36733678
Condition: IsPattern1HITLEnabled
3679+
Metadata:
3680+
cfn_nag:
3681+
rules_to_suppress:
3682+
- id: W89
3683+
reason: "Function does not require VPC access as it only interacts with AWS services via APIs"
3684+
- id: W92
3685+
reason: "Function does not require reserved concurrency as it scales based on demand"
3686+
# checkov:skip=CKV_AWS_116: "DLQ not required for Cfn Custom Resource function"
3687+
# checkov:skip=CKV_AWS_117: "Function does not require VPC access as it only interacts with AWS services via APIs"
3688+
# checkov:skip=CKV_AWS_115: "Function does not require reserved concurrency as it scales based on demand"
36743689
Properties:
36753690
Handler: index.handler
36763691
Runtime: python3.12

0 commit comments

Comments
 (0)