Skip to content

Commit 65a2268

Browse files
committed
Merge branch 'feature/chat-with-document' into 'develop'
update for chat with document resolver to use the IDP common bedrock class See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!295
2 parents bde0fed + c6ef9af commit 65a2268

File tree

4 files changed

+109
-65
lines changed

4 files changed

+109
-65
lines changed

lib/idp_common_pkg/setup.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@
7979
"ipykernel>=6.29.5,<7.0.0",
8080
"jupyter>=1.1.1,<2.0.0",
8181
],
82+
# Agents module dependencies
83+
"agents": [
84+
"strands-agents>=1.0.0",
85+
"strands-agents-tools>=0.2.2",
86+
"bedrock-agentcore>=0.1.1", # Specifically for the code interpreter tool
87+
"regex>=2024.0.0,<2026.0.0", # Pin regex version to avoid conflicts
88+
],
8289
# Full package with all dependencies
8390
"all": [
8491
"Pillow==11.2.1",
@@ -91,6 +98,10 @@
9198
"pyarrow==20.0.0",
9299
"openpyxl==3.1.5",
93100
"python-docx==1.2.0",
101+
"strands-agents>=1.0.0",
102+
"strands-agents-tools>=0.2.2",
103+
"bedrock-agentcore>=0.1.1",
104+
"regex>=2024.0.0,<2026.0.0",
94105
],
95106
}
96107

src/lambda/chat_with_document_resolver/index.py

Lines changed: 88 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,64 @@
1111
import os
1212
from urllib.parse import urlparse
1313
from botocore.exceptions import ClientError
14+
from idp_common.bedrock.client import BedrockClient
1415

1516
# Set up logging
1617
logger = logging.getLogger()
1718
logger.setLevel(os.environ.get("LOG_LEVEL", "INFO"))
1819
# Get LOG_LEVEL from environment variable with INFO as default
1920

21+
def s3_object_exists(bucket, key):
22+
try:
23+
s3 = boto3.client('s3')
24+
s3.head_object(Bucket=bucket, Key=key)
25+
return True
26+
except ClientError as e:
27+
if e.response['Error']['Code'] == '404':
28+
return False
29+
else:
30+
raise
31+
32+
def get_full_text(bucket, key):
33+
try:
34+
dynamodb = boto3.resource('dynamodb')
35+
tracking_table = dynamodb.Table(os.environ['TRACKING_TABLE_NAME'])
36+
37+
doc_pk = f"doc#{key}"
38+
response = tracking_table.get_item(
39+
Key={'PK': doc_pk, 'SK': 'none'}
40+
)
41+
42+
if 'Item' not in response:
43+
logger.info(f"Document {key} not found")
44+
raise Exception(f"Document {key} not found")
45+
46+
document = response['Item']
47+
pages = document.get('Pages', {})
48+
sorted_pages = sorted(pages, key=lambda x: x['Id'])
49+
50+
s3 = boto3.client('s3')
51+
all_text = ""
52+
53+
for page in sorted_pages:
54+
if 'TextUri' in page:
55+
# Extract S3 key from URI
56+
text_key = page['TextUri'].replace(f"s3://{bucket}/", "")
57+
58+
try:
59+
response = s3.get_object(Bucket=bucket, Key=text_key)
60+
page_text = response['Body'].read().decode('utf-8')
61+
all_text += f"<page-number>{page['Id']}</page-number>\n{page_text}\n\n"
62+
except Exception as e:
63+
logger.warning(f"Failed to load page {page['Id']}: {e}")
64+
65+
return all_text
66+
67+
except Exception as e:
68+
logger.error(f"Error getting document pages: {str(e)}")
69+
raise
70+
71+
2072
def get_summarization_model():
2173
"""Get the summarization model from configuration table"""
2274
try:
@@ -51,106 +103,79 @@ def handler(event, context):
51103
prompt = event['arguments']['prompt']
52104
history = event['arguments']['history']
53105

54-
full_prompt = "You are an assistant that's responsible for getting details from document text attached here based on questions from the user.\n\n"
55-
full_prompt += "If you don't know the answer, just say that you don't know. Don't try to make up an answer.\n\n"
56-
full_prompt += "Additionally, use the user and assistant responses in the following JSON object to see what's been asked and what the resposes were in the past.\n\n"
57-
# full_prompt += "Your response MUST be in the following JSON format: {'content': [{'text': 'String'}]}.\n\n"
58-
# full_prompt += "You MUST NOT include outside of that JSON format.\n\n"
59-
# full_prompt += "Do NOT include the role or anything else in the response."
60-
full_prompt += "The history JSON object is: " + json.dumps(history) + ".\n\n"
106+
full_prompt = "The history JSON object is: " + json.dumps(history) + ".\n\n"
61107
full_prompt += "The user's question is: " + prompt + "\n\n"
62108

63109
# this feature is not enabled until the model can be selected on the chat screen
64110
# selectedModelId = event['arguments']['modelId']
65111
selectedModelId = get_summarization_model()
66112

67113
logger.info(f"Processing S3 URI: {objectKey}")
114+
logger.info(f"Region: {os.environ['AWS_REGION']}")
68115

69116
output_bucket = os.environ['OUTPUT_BUCKET']
70117

71-
bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-west-2')
118+
bedrock_runtime = boto3.client('bedrock-runtime', region_name=os.environ['AWS_REGION'])
72119

73-
# Call Bedrock Runtime to get Python code based on the prompt
74120
if (len(objectKey)):
75-
# encoded_string = objectKey.encode()
76-
# md5_hash = hashlib.md5(encoded_string)
77-
# hex_representation = md5_hash.hexdigest()
78-
79-
# full text key
80121
fulltext_key = objectKey + '/summary/fulltext.txt'
122+
content_str = ""
123+
s3 = boto3.client('s3')
124+
125+
if not s3_object_exists(output_bucket, fulltext_key):
126+
logger.info(f"Creating full text file: {fulltext_key}")
127+
content_str = get_full_text(output_bucket, objectKey)
128+
129+
s3.put_object(
130+
Bucket=output_bucket,
131+
Key=fulltext_key,
132+
Body=content_str.encode('utf-8')
133+
)
134+
else:
135+
# read full contents of the object as text
136+
response = s3.get_object(Bucket=output_bucket, Key=fulltext_key)
137+
content_str = response['Body'].read().decode('utf-8')
81138

82139
logger.info(f"Model: {selectedModelId}")
83140
logger.info(f"Output Bucket: {output_bucket}")
84141
logger.info(f"Full Text Key: {fulltext_key}")
85142

86-
# read full contents of the object as text
87-
s3 = boto3.client('s3')
88-
response = s3.get_object(Bucket=output_bucket, Key=fulltext_key)
89-
content_str = response['Body'].read().decode('utf-8')
90-
91-
message = [
92-
{
93-
"role":"user",
94-
"content": [
95-
{
96-
"text": content_str
97-
},
98-
{
99-
"cachePoint" : {
100-
'type': 'default'
101-
}
102-
}
103-
]
104-
},
143+
client = BedrockClient()
144+
# Content with cachepoint tags
145+
content = [
105146
{
106-
"role":"user",
107-
"content": [
108-
{
109-
"text": full_prompt
110-
}
111-
]
147+
"text": content_str + """
148+
<<CACHEPOINT>>
149+
""" + full_prompt
112150
}
113151
]
114152

115-
# print('invoking model converse')
116-
117-
selectedModelId = 'us.amazon.nova-pro-v1:0'
118-
response = bedrock_runtime.converse(
119-
modelId=selectedModelId,
120-
messages=message
153+
model_response = client.invoke_model(
154+
model_id="us.amazon.nova-pro-v1:0",
155+
system_prompt="You are an assistant that's responsible for getting details from document text attached here based on questions from the user.\n\nIf you don't know the answer, just say that you don't know. Don't try to make up an answer.\n\nAdditionally, use the user and assistant responses in the following JSON object to see what's been asked and what the resposes were in the past.\n\n",
156+
content=content,
157+
temperature=0.0
121158
)
122159

123-
token_usage = response['usage']
124-
# print(f"Input tokens: {token_usage['inputTokens']}")
125-
# print(f"Output tokens: {token_usage['outputTokens']}")
126-
# print(f"Total tokens: {token_usage['totalTokens']}")
127-
# print(f"cacheReadInputTokens: {token_usage['cacheReadInputTokens']}")
128-
# print(f"cacheWriteInputTokens: {token_usage['cacheWriteInputTokens']}")
129-
# print(f"Stop reason: {response['stopReason']}")
130-
131-
132-
output_message = response['output']['message']
133-
text_content = output_message['content'][0]['text']
134-
135-
chat_response = {"cr": {"content": [{"text": text_content}]}}
160+
text = client.extract_text_from_response(model_response)
161+
162+
chat_response = {"cr": {"content": [{"text": text}]}}
136163
return json.dumps(chat_response)
137164

138-
139-
140165
except ClientError as e:
141166
error_code = e.response['Error']['Code']
142167
error_message = e.response['Error']['Message']
143-
logger.error(f"S3 ClientError: {error_code} - {error_message}")
168+
logger.error(f"Error: {error_code} - {error_message}")
144169

145170
if error_code == 'NoSuchKey':
146-
raise Exception(f"File not found: {fulltext_key}. The chat feature will not work with files that were processed prior to v0.3.11.")
171+
raise Exception(f"File not found: {fulltext_key}")
147172
elif error_code == 'NoSuchBucket':
148173
raise Exception(f"Bucket not found: {output_bucket}")
149174
else:
150175
raise Exception(error_message)
151176

152177
except Exception as e:
153178
logger.error(f"Unexpected error: {str(e)}")
154-
raise Exception(f"Error fetching file: {str(e)}")
179+
raise Exception(f"Unexpected error: {str(e)}")
155180

156181
return response_data
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
boto3>=1.38.45
1+
boto3>=1.38.45
2+
./lib/idp_common_pkg # common utilities package

template.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6076,13 +6076,16 @@ Resources:
60766076
LOG_LEVEL: !Ref LogLevel
60776077
OUTPUT_BUCKET: !Ref OutputBucket
60786078
CONFIGURATION_TABLE_NAME: !Ref ConfigurationTable
6079+
TRACKING_TABLE_NAME: !Ref TrackingTable
60796080
LoggingConfig:
60806081
LogGroup: !Ref ChatWithDocumentResolverFunctionLogGroup
60816082
Policies:
6082-
- S3ReadPolicy:
6083+
- S3CrudPolicy:
60836084
BucketName: !Ref OutputBucket
60846085
- DynamoDBCrudPolicy:
60856086
TableName: !Ref ConfigurationTable
6087+
- DynamoDBCrudPolicy:
6088+
TableName: !Ref TrackingTable
60866089
- Statement:
60876090
- !If
60886091
- ShouldUseDocumentKnowledgeBase
@@ -6098,6 +6101,10 @@ Resources:
60986101
Resource:
60996102
- !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*"
61006103
- !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*"
6104+
- Effect: Allow
6105+
Action:
6106+
- cloudwatch:PutMetricData
6107+
Resource: "*"
61016108
- Effect: Allow
61026109
Action:
61036110
- "bedrock:GetInferenceProfile"

0 commit comments

Comments
 (0)