Merge branch 'feature/chat-with-document' into 'develop'

rstrahan · rstrahan · commit 65a22681f1ea · 2025-09-11T22:02:36.000Z
update for chat with document resolver to use the IDP common bedrock class

See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!295
diff --git a/lib/idp_common_pkg/setup.py b/lib/idp_common_pkg/setup.py
@@ -79,6 +79,13 @@
         "ipykernel>=6.29.5,<7.0.0",
         "jupyter>=1.1.1,<2.0.0",
     ],
+    # Agents module dependencies
+    "agents": [
+        "strands-agents>=1.0.0",
+        "strands-agents-tools>=0.2.2",
+        "bedrock-agentcore>=0.1.1",  # Specifically for the code interpreter tool
+        "regex>=2024.0.0,<2026.0.0",  # Pin regex version to avoid conflicts
+    ],
     # Full package with all dependencies
     "all": [
         "Pillow==11.2.1",
@@ -91,6 +98,10 @@
         "pyarrow==20.0.0",
         "openpyxl==3.1.5",
         "python-docx==1.2.0",
+        "strands-agents>=1.0.0",
+        "strands-agents-tools>=0.2.2",
+        "bedrock-agentcore>=0.1.1",
+        "regex>=2024.0.0,<2026.0.0",
     ],
 }
 
diff --git a/src/lambda/chat_with_document_resolver/index.py b/src/lambda/chat_with_document_resolver/index.py
@@ -11,12 +11,64 @@
 import os
 from urllib.parse import urlparse
 from botocore.exceptions import ClientError
+from idp_common.bedrock.client import BedrockClient
 
 # Set up logging
 logger = logging.getLogger()
 logger.setLevel(os.environ.get("LOG_LEVEL", "INFO"))
 # Get LOG_LEVEL from environment variable with INFO as default
 
+def s3_object_exists(bucket, key):
+    try:
+        s3 = boto3.client('s3')
+        s3.head_object(Bucket=bucket, Key=key)
+        return True
+    except ClientError as e:
+        if e.response['Error']['Code'] == '404':
+            return False
+        else:
+            raise
+
+def get_full_text(bucket, key):
+    try:
+        dynamodb = boto3.resource('dynamodb')
+        tracking_table = dynamodb.Table(os.environ['TRACKING_TABLE_NAME'])
+        
+        doc_pk = f"doc#{key}"
+        response = tracking_table.get_item(
+            Key={'PK': doc_pk, 'SK': 'none'}
+        )
+        
+        if 'Item' not in response:
+            logger.info(f"Document {key} not found")
+            raise Exception(f"Document {key} not found")
+            
+        document = response['Item']
+        pages = document.get('Pages', {})
+        sorted_pages = sorted(pages, key=lambda x: x['Id'])
+
+        s3 = boto3.client('s3')
+        all_text = ""
+        
+        for page in sorted_pages:
+            if 'TextUri' in page:
+                # Extract S3 key from URI
+                text_key = page['TextUri'].replace(f"s3://{bucket}/", "")
+                
+                try:
+                    response = s3.get_object(Bucket=bucket, Key=text_key)
+                    page_text = response['Body'].read().decode('utf-8')
+                    all_text += f"<page-number>{page['Id']}</page-number>\n{page_text}\n\n"
+                except Exception as e:
+                    logger.warning(f"Failed to load page {page['Id']}: {e}")
+                    
+        return all_text
+        
+    except Exception as e:
+        logger.error(f"Error getting document pages: {str(e)}")
+        raise
+
+
 def get_summarization_model():
     """Get the summarization model from configuration table"""
     try:
@@ -51,106 +103,79 @@ def handler(event, context):
         prompt = event['arguments']['prompt']
         history = event['arguments']['history']
 
-        full_prompt = "You are an assistant that's responsible for getting details from document text attached here based on questions from the user.\n\n"
-        full_prompt += "If you don't know the answer, just say that you don't know. Don't try to make up an answer.\n\n"
-        full_prompt += "Additionally, use the user and assistant responses in the following JSON object to see what's been asked and what the resposes were in the past.\n\n"
-        # full_prompt += "Your response MUST be in the following JSON format: {'content': [{'text': 'String'}]}.\n\n"
-        # full_prompt += "You MUST NOT include outside of that JSON format.\n\n"
-        # full_prompt += "Do NOT include the role or anything else in the response."
-        full_prompt += "The history JSON object is: " + json.dumps(history) + ".\n\n"
+        full_prompt = "The history JSON object is: " + json.dumps(history) + ".\n\n"
         full_prompt += "The user's question is: " + prompt + "\n\n"
 
         # this feature is not enabled until the model can be selected on the chat screen
         # selectedModelId = event['arguments']['modelId']
         selectedModelId = get_summarization_model()
 
         logger.info(f"Processing S3 URI: {objectKey}")
+        logger.info(f"Region: {os.environ['AWS_REGION']}")
 
         output_bucket = os.environ['OUTPUT_BUCKET']
 
-        bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-west-2')
+        bedrock_runtime = boto3.client('bedrock-runtime', region_name=os.environ['AWS_REGION'])
 
-        # Call Bedrock Runtime to get Python code based on the prompt
         if (len(objectKey)):
-            # encoded_string = objectKey.encode()
-            # md5_hash = hashlib.md5(encoded_string)
-            # hex_representation = md5_hash.hexdigest()
-
-            # full text key
             fulltext_key = objectKey + '/summary/fulltext.txt'
+            content_str = ""
+            s3 = boto3.client('s3')
+
+            if not s3_object_exists(output_bucket, fulltext_key):
+                logger.info(f"Creating full text file: {fulltext_key}")
+                content_str = get_full_text(output_bucket, objectKey)
+
+                s3.put_object(
+                    Bucket=output_bucket,
+                    Key=fulltext_key,
+                    Body=content_str.encode('utf-8')
+                )
+            else:
+                # read full contents of the object as text
+                response = s3.get_object(Bucket=output_bucket, Key=fulltext_key)
+                content_str = response['Body'].read().decode('utf-8')
 
             logger.info(f"Model: {selectedModelId}")
             logger.info(f"Output Bucket: {output_bucket}")
             logger.info(f"Full Text Key: {fulltext_key}")
 
-            # read full contents of the object as text
-            s3 = boto3.client('s3')
-            response = s3.get_object(Bucket=output_bucket, Key=fulltext_key)
-            content_str = response['Body'].read().decode('utf-8')
-
-            message = [
-                {
-                    "role":"user",
-                    "content": [
-                        {
-                            "text": content_str
-                        },
-                        {
-                           "cachePoint" : {
-                                'type': 'default'
-                            }
-                        }
-                    ]
-                },
+            client = BedrockClient()
+            # Content with cachepoint tags
+            content = [
                 {
-                    "role":"user",
-                    "content": [
-                        {
-                            "text": full_prompt
-                        }
-                    ]
+                    "text": content_str + """
+                    <<CACHEPOINT>>
+                    """ + full_prompt
                 }
             ]
 
-            # print('invoking model converse')
-
-            selectedModelId = 'us.amazon.nova-pro-v1:0'
-            response = bedrock_runtime.converse(
-                modelId=selectedModelId,
-                messages=message
+            model_response = client.invoke_model(
+                model_id="us.amazon.nova-pro-v1:0",
+                system_prompt="You are an assistant that's responsible for getting details from document text attached here based on questions from the user.\n\nIf you don't know the answer, just say that you don't know. Don't try to make up an answer.\n\nAdditionally, use the user and assistant responses in the following JSON object to see what's been asked and what the resposes were in the past.\n\n",
+                content=content,
+                temperature=0.0
             )
 
-            token_usage = response['usage']
-            # print(f"Input tokens:  {token_usage['inputTokens']}")
-            # print(f"Output tokens:  {token_usage['outputTokens']}")
-            # print(f"Total tokens:  {token_usage['totalTokens']}")
-            # print(f"cacheReadInputTokens:  {token_usage['cacheReadInputTokens']}")
-            # print(f"cacheWriteInputTokens:  {token_usage['cacheWriteInputTokens']}")
-            # print(f"Stop reason: {response['stopReason']}")
-
-
-            output_message = response['output']['message']
-            text_content = output_message['content'][0]['text']
-
-            chat_response = {"cr": {"content": [{"text": text_content}]}}
+            text = client.extract_text_from_response(model_response)
+            
+            chat_response = {"cr": {"content": [{"text": text}]}}
             return json.dumps(chat_response)
 
-
-
     except ClientError as e:
         error_code = e.response['Error']['Code']
         error_message = e.response['Error']['Message']
-        logger.error(f"S3 ClientError: {error_code} - {error_message}")
+        logger.error(f"Error: {error_code} - {error_message}")
         
         if error_code == 'NoSuchKey':
-            raise Exception(f"File not found: {fulltext_key}. The chat feature will not work with files that were processed prior to v0.3.11.")
+            raise Exception(f"File not found: {fulltext_key}")
         elif error_code == 'NoSuchBucket':
             raise Exception(f"Bucket not found: {output_bucket}")
         else:
             raise Exception(error_message)
             
     except Exception as e:
         logger.error(f"Unexpected error: {str(e)}")
-        raise Exception(f"Error fetching file: {str(e)}")
+        raise Exception(f"Unexpected error: {str(e)}")
     
     return response_data
diff --git a/src/lambda/chat_with_document_resolver/requirements.txt b/src/lambda/chat_with_document_resolver/requirements.txt
@@ -1 +1,2 @@
-boto3>=1.38.45
+boto3>=1.38.45
+./lib/idp_common_pkg  # common utilities package
diff --git a/template.yaml b/template.yaml
@@ -6076,13 +6076,16 @@ Resources:
           LOG_LEVEL: !Ref LogLevel
           OUTPUT_BUCKET: !Ref OutputBucket
           CONFIGURATION_TABLE_NAME: !Ref ConfigurationTable
+          TRACKING_TABLE_NAME: !Ref TrackingTable
       LoggingConfig:
         LogGroup: !Ref ChatWithDocumentResolverFunctionLogGroup
       Policies:
-        - S3ReadPolicy:
+        - S3CrudPolicy:
             BucketName: !Ref OutputBucket
         - DynamoDBCrudPolicy:
             TableName: !Ref ConfigurationTable
+        - DynamoDBCrudPolicy:
+            TableName: !Ref TrackingTable
         - Statement: 
           - !If
             - ShouldUseDocumentKnowledgeBase
@@ -6098,6 +6101,10 @@ Resources:
             Resource:
               - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*"
               - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*"
+          - Effect: Allow
+            Action:
+              - cloudwatch:PutMetricData
+            Resource: "*"
           - Effect: Allow
             Action:
               - "bedrock:GetInferenceProfile"

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-boto3>=1.38.45`
	`1`	`+boto3>=1.38.45`
	`2`	`+./lib/idp_common_pkg # common utilities package`