Skip to content

Commit 8663517

Browse files
authored
Notebooks and helper script updated
1. Updated to the script to use the latest langchain_community module. 2. Updated the Mistral models to use LangChain. 3. Added Claude 3 models. 4. Updated the sort prompt for the CSV data format.
1 parent 15262f7 commit 8663517

File tree

4 files changed

+282
-19
lines changed

4 files changed

+282
-19
lines changed

notebooks/helper_functions.py

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
"""
2+
Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
SPDX-License-Identifier: MIT-0
4+
"""
5+
import json
6+
from langchain.prompts.prompt import PromptTemplate
7+
from langchain_community.llms import Bedrock
8+
from langchain_community.chat_models import BedrockChat
9+
import logging
10+
import os
11+
from timeit import default_timer as timer
12+
13+
# Create the logger
14+
DEFAULT_LOG_LEVEL = logging.NOTSET
15+
DEFAULT_LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
16+
log_level = os.environ.get('LOG_LEVEL')
17+
match log_level:
18+
case '10':
19+
log_level = logging.DEBUG
20+
case '20':
21+
log_level = logging.INFO
22+
case '30':
23+
log_level = logging.WARNING
24+
case '40':
25+
log_level = logging.ERROR
26+
case '50':
27+
log_level = logging.CRITICAL
28+
case _:
29+
log_level = DEFAULT_LOG_LEVEL
30+
log_format = os.environ.get('LOG_FORMAT')
31+
if log_format is None:
32+
log_format = DEFAULT_LOG_FORMAT
33+
elif len(log_format) == 0:
34+
log_format = DEFAULT_LOG_FORMAT
35+
# Set the basic config for the lgger
36+
logging.basicConfig(level=log_level, format=log_format)
37+
38+
39+
# Function to get counts from text
40+
def get_counts_from_text(text):
41+
if text is None:
42+
text = ''
43+
char_count = len(text)
44+
word_count = len(text.split())
45+
return char_count, word_count
46+
47+
48+
# Function to check if the specified modality exists in both input and output
49+
def does_modality_exists(input_modality_list, output_modality_list, required_modality):
50+
if required_modality in input_modality_list:
51+
if required_modality in output_modality_list:
52+
return True
53+
else:
54+
return False
55+
else:
56+
return False
57+
58+
59+
# Function to get the max output token length for the specified model
60+
def get_max_output_length(model_id):
61+
# These limits have been obtained from
62+
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html
63+
# For Anthropic models, the provider recommends a limit of 4000
64+
# for optimal performance even though they support 4096
65+
model_id_output_length_dict = {
66+
'amazon.titan-text-lite-v1': 8000,
67+
'amazon.titan-text-express-v1': 8000,
68+
'anthropic.claude-instant-v1': 4000,
69+
'anthropic.claude-v2': 4000,
70+
'anthropic.claude-v2:1': 4000,
71+
'anthropic.claude-3-sonnet-20240229-v1:0': 4000,
72+
'anthropic.claude-3-haiku-20240307-v1:0': 4000,
73+
'ai21.j2-mid-v1': 8191,
74+
'ai21.j2-ultra-v1': 8191,
75+
'cohere.command-light-text-v14': 4096,
76+
'cohere.command-text-v14': 4096,
77+
'meta.llama2-13b-chat-v1': 2048,
78+
'meta.llama2-70b-chat-v1': 2048,
79+
'mistral.mistral-7b-instruct-v0:2': 8192,
80+
'mistral.mixtral-8x7b-instruct-v0:1': 4096
81+
}
82+
return model_id_output_length_dict.get(model_id, 0)
83+
84+
85+
# Function to create the model-specific inference parameters
86+
def get_model_kwargs(model_id, temperature, max_response_token_length):
87+
# Check and substitute for the model's max response token length if it is specified as '-1'
88+
if max_response_token_length == -1:
89+
max_response_token_length = get_max_output_length(model_id)
90+
# Generate the model-specific inference parameters
91+
match model_id:
92+
case 'amazon.titan-text-lite-v1' | 'amazon.titan-text-express-v1':
93+
model_kwargs = {
94+
"temperature": temperature,
95+
"maxTokenCount": max_response_token_length
96+
}
97+
case 'anthropic.claude-instant-v1' | 'anthropic.claude-v2' | 'anthropic.claude-v2:1':
98+
model_kwargs = {
99+
"temperature": temperature,
100+
"max_tokens_to_sample": max_response_token_length
101+
}
102+
case 'anthropic.claude-3-sonnet-20240229-v1:0' | 'anthropic.claude-3-haiku-20240307-v1:0':
103+
model_kwargs = {
104+
"temperature": temperature,
105+
"max_tokens": max_response_token_length
106+
}
107+
case 'ai21.j2-mid-v1' | 'ai21.j2-ultra-v1':
108+
model_kwargs = {
109+
"temperature": temperature,
110+
"maxTokens": max_response_token_length
111+
}
112+
case 'cohere.command-light-text-v14' | 'cohere.command-text-v14':
113+
model_kwargs = {
114+
"temperature": temperature,
115+
"max_tokens": max_response_token_length
116+
}
117+
case 'meta.llama2-13b-chat-v1' | 'meta.llama2-70b-chat-v1':
118+
model_kwargs = {
119+
"temperature": temperature,
120+
"max_gen_len": max_response_token_length
121+
}
122+
case 'mistral.mistral-7b-instruct-v0:2' | 'mistral.mixtral-8x7b-instruct-v0:1':
123+
model_kwargs = {
124+
"temperature": temperature,
125+
"max_tokens": max_response_token_length
126+
}
127+
case _:
128+
model_kwargs = None
129+
# Return the model kwargs
130+
return model_kwargs
131+
132+
133+
# Function to read the content of the specified file
134+
def read_file(dir_name, file_name):
135+
logging.info('Reading content from file "{}"...'.format(file_name))
136+
with open(os.path.join(dir_name, file_name)) as f:
137+
content = f.read()
138+
logging.info('Completed reading content from file.')
139+
return content
140+
141+
142+
# Function to prepare the prompt
143+
def prepare_prompt(prompt_template_dir, prompt_template_file_name, **kwargs):
144+
prompt_template_file_path = os.path.join(prompt_template_dir, prompt_template_file_name)
145+
logging.info('Reading content from prompt template file "{}"...'.format(prompt_template_file_name))
146+
prompt_template = PromptTemplate.from_file(prompt_template_file_path)
147+
logging.info('Completed reading content from prompt template file.')
148+
logging.info('Substituting prompt variables...')
149+
prompt = prompt_template.format(**kwargs)
150+
logging.info('Completed substituting prompt variables.')
151+
return prompt
152+
153+
154+
# Function to invoke the specified LLM through the boto3 Bedrock Runtime
155+
# client and using the specified prompt
156+
def invoke_llm(llm, prompt):
157+
logging.info('Invoking LLM "{}" with specified inference parameters "{}"...'.
158+
format(llm.model_id, llm.model_kwargs))
159+
start = timer()
160+
prompt_response = llm.invoke(prompt)
161+
end = timer()
162+
logging.info(prompt + prompt_response)
163+
logging.info('Completed invoking LLM.')
164+
logging.info('Prompt processing duration = {} second(s)'.format(end - start))
165+
return prompt_response
166+
167+
168+
# Function to invoke the specified LLM through the Bedrock Runtime client and
169+
# using the specified prompt
170+
def invoke_llm_with_bedrock_rt(model_id, bedrock_rt_client, temperature, max_response_token_length, prompt):
171+
# Create the request body
172+
json_body = get_model_kwargs(model_id, temperature, max_response_token_length)
173+
model_kwargs = json.dumps(json_body)
174+
json_body.update({"prompt": prompt})
175+
body = json.dumps(json_body)
176+
logging.info('Invoking LLM "{}" with specified inference parameters "{}"...'.
177+
format(model_id, model_kwargs))
178+
start = timer()
179+
invoke_model_response = bedrock_rt_client.invoke_model(
180+
body=body,
181+
modelId=model_id
182+
)
183+
end = timer()
184+
logging.info('Completed invoking LLM.')
185+
# Parse the response body
186+
response_body = json.loads(invoke_model_response.get('body').read())
187+
outputs = response_body.get('outputs')
188+
prompt_response = outputs[0].get('text')
189+
logging.info(prompt + prompt_response)
190+
logging.info('Prompt processing duration = {} second(s)'.format(end - start))
191+
return prompt_response
192+
193+
194+
# Function to invoke the specified LLM through the LangChain LLM client and
195+
# using the specified prompt
196+
def invoke_llm_with_lc_llm(model_id, bedrock_rt_client, temperature, max_response_token_length, prompt):
197+
# Create the LangChain LLM client
198+
logging.info('Creating LangChain LLM client for LLM "{}"...'.format(model_id))
199+
llm = Bedrock(
200+
model_id=model_id,
201+
model_kwargs=get_model_kwargs(model_id, temperature, max_response_token_length),
202+
client=bedrock_rt_client,
203+
streaming=False
204+
)
205+
logging.info('Completed creating LangChain LLM client for LLM.')
206+
logging.info('Invoking LLM "{}" with specified inference parameters "{}"...'.
207+
format(llm.model_id, llm.model_kwargs))
208+
start = timer()
209+
prompt_response = llm.invoke(prompt)
210+
end = timer()
211+
logging.info(prompt + prompt_response)
212+
logging.info('Completed invoking LLM.')
213+
logging.info('Prompt processing duration = {} second(s)'.format(end - start))
214+
return prompt_response
215+
216+
217+
# Function to invoke the specified LLM through the LangChain ChatModel client and
218+
# using the specified prompt
219+
def invoke_llm_with_lc_cm(model_id, bedrock_rt_client, temperature, max_response_token_length, prompt):
220+
# Create the LangChain ChatModel client
221+
logging.info('Creating LangChain ChatModel client for LLM "{}"...'.format(model_id))
222+
llm = BedrockChat(
223+
model_id=model_id,
224+
model_kwargs=get_model_kwargs(model_id, temperature, max_response_token_length),
225+
client=bedrock_rt_client,
226+
streaming=False
227+
)
228+
logging.info('Completed creating LangChain ChatModel client for LLM.')
229+
logging.info('Invoking LLM "{}" with specified inference parameters "{}"...'.
230+
format(llm.model_id, llm.model_kwargs))
231+
start = timer()
232+
prompt_response = llm.invoke(prompt).content
233+
end = timer()
234+
logging.info(prompt + prompt_response)
235+
logging.info('Completed invoking LLM.')
236+
logging.info('Prompt processing duration = {} second(s)'.format(end - start))
237+
return prompt_response
238+
239+
240+
# Function to process the steps required in the example prompt 1
241+
def process_prompt_1(model_id, bedrock_rt_client, temperature, max_response_token_length,
242+
prompt_templates_dir, prompt_template_file, prompt_data, call_to_action):
243+
# Read the prompt template and perform variable substitution
244+
prompt = prepare_prompt(prompt_templates_dir, prompt_template_file,
245+
DATA=prompt_data, CALL_TO_ACTION=call_to_action)
246+
# Invoke the LLM and print the response
247+
match model_id:
248+
case 'anthropic.claude-3-sonnet-20240229-v1:0' | 'anthropic.claude-3-haiku-20240307-v1:0':
249+
return invoke_llm_with_lc_cm(model_id, bedrock_rt_client, temperature,
250+
max_response_token_length, prompt)
251+
case _:
252+
return invoke_llm_with_lc_llm(model_id, bedrock_rt_client, temperature,
253+
max_response_token_length, prompt)

notebooks/llm_csv_data_processing.ipynb

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"* Sorting\n",
1919
"* Transformations\n",
2020
"\n",
21-
"We will use [LangChain](https://www.langchain.com/) to simplify the process of constructing the prompts and interacting with the LLMs. However, some new LLMs on Amazon Bedrock may not be supported on LangChain yet. For those, we will use the [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client. In the process of working through this notebook, you will learn how to setup the Amazon Bedrock client environment, configure security permissions and use prompt templates in LangChain."
21+
"We will use [LangChain](https://www.langchain.com/) to simplify the process of constructing the prompts and interacting with the LLMs. In the process of working through this notebook, you will learn how to setup the Amazon Bedrock client environment, configure security permissions and use prompt templates in LangChain."
2222
]
2323
},
2424
{
@@ -165,9 +165,9 @@
165165
},
166166
"outputs": [],
167167
"source": [
168-
"!pip install boto3==1.34.58\n",
169-
"!pip install langchain==0.1.11\n",
170-
"!pip install sagemaker==2.210.0\n",
168+
"!pip install boto3==1.34.62\n",
169+
"!pip install langchain==0.1.12\n",
170+
"!pip install sagemaker==2.212.0\n",
171171
"\n",
172172
"import IPython\n",
173173
"\n",
@@ -649,8 +649,8 @@
649649
"#call_to_action = 'I want to buy all the books. How much will it cost?'\n",
650650
"\n",
651651
"#### Sorting (NOTE: You will NOT see accurate results most of the time)\n",
652-
"#call_to_action = 'sort the records in ascending order of publication date.'\n",
653-
"#call_to_action = 'sort the records in descending order of price.'\n",
652+
"#call_to_action = 'consider every line as a record. Then, sort the records in ascending order of publication date.'\n",
653+
"#call_to_action = 'consider every line as a record. Then, sort the records in descending order of price.'\n",
654654
"\n",
655655
"#### Transformations (NOTE: You will see accurate results almost all the time)\n",
656656
"#call_to_action = 'convert this CSV to a HTML table.'\n",
@@ -740,8 +740,10 @@
740740
"source": [
741741
"# Specify the model-id\n",
742742
"#model_id = \"anthropic.claude-instant-v1\"\n",
743-
"model_id = \"anthropic.claude-v2\"\n",
743+
"#model_id = \"anthropic.claude-v2\"\n",
744744
"#model_id = \"anthropic.claude-v2:1\"\n",
745+
"model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n",
746+
"#model_id = \"anthropic.claude-3-haiku-20240307-v1:0\"\n",
745747
"\n",
746748
"# Prepare the prompt and invoke the LLM (./scripts/helper_functions.py)\n",
747749
"single_turn_conversation = process_prompt_1(model_id, bedrock_rt_client, temperature, max_response_token_length,\n",

notebooks/llm_json_data_processing.ipynb

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"* Transformations\n",
1919
"* Sample API request generation from a Swagger document\n",
2020
"\n",
21-
"We will use [LangChain](https://www.langchain.com/) to simplify the process of constructing the prompts and interacting with the LLMs. However, some new LLMs on Amazon Bedrock may not be supported on LangChain yet. For those, we will use the [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client. In the process of working through this notebook, you will learn how to setup the Amazon Bedrock client environment, configure security permissions and use prompt templates in LangChain."
21+
"We will use [LangChain](https://www.langchain.com/) to simplify the process of constructing the prompts and interacting with the LLMs. In the process of working through this notebook, you will learn how to setup the Amazon Bedrock client environment, configure security permissions and use prompt templates in LangChain."
2222
]
2323
},
2424
{
@@ -179,9 +179,9 @@
179179
},
180180
"outputs": [],
181181
"source": [
182-
"!pip install boto3==1.34.58\n",
183-
"!pip install langchain==0.1.11\n",
184-
"!pip install sagemaker==2.210.0\n",
182+
"!pip install boto3==1.34.62\n",
183+
"!pip install langchain==0.1.12\n",
184+
"!pip install sagemaker==2.212.0\n",
185185
"\n",
186186
"import IPython\n",
187187
"\n",
@@ -750,8 +750,10 @@
750750
"source": [
751751
"# Specify the model-id\n",
752752
"#model_id = \"anthropic.claude-instant-v1\"\n",
753-
"model_id = \"anthropic.claude-v2\"\n",
753+
"#model_id = \"anthropic.claude-v2\"\n",
754754
"#model_id = \"anthropic.claude-v2:1\"\n",
755+
"model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n",
756+
"#model_id = \"anthropic.claude-3-haiku-20240307-v1:0\"\n",
755757
"\n",
756758
"# Prepare the prompt and invoke the LLM (./scripts/helper_functions.py)\n",
757759
"single_turn_conversation = process_prompt_1(model_id, bedrock_rt_client, temperature, max_response_token_length,\n",
@@ -1087,8 +1089,10 @@
10871089
"source": [
10881090
"# Specify the model-id\n",
10891091
"#model_id = \"anthropic.claude-instant-v1\"\n",
1090-
"model_id = \"anthropic.claude-v2\"\n",
1092+
"#model_id = \"anthropic.claude-v2\"\n",
10911093
"#model_id = \"anthropic.claude-v2:1\"\n",
1094+
"model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n",
1095+
"#model_id = \"anthropic.claude-3-haiku-20240307-v1:0\"\n",
10921096
"\n",
10931097
"# Prepare the prompt and invoke the LLM (./scripts/helper_functions.py)\n",
10941098
"single_turn_conversation = process_prompt_1(model_id, bedrock_rt_client, temperature, max_response_token_length,\n",

notebooks/llm_xml_data_processing.ipynb

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"* Transformations\n",
1919
"* Sample data generation from schemas\n",
2020
"\n",
21-
"We will use [LangChain](https://www.langchain.com/) to simplify the process of constructing the prompts and interacting with the LLMs. However, some new LLMs on Amazon Bedrock may not be supported on LangChain yet. For those, we will use the [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client. In the process of working through this notebook, you will learn how to setup the Amazon Bedrock client environment, configure security permissions and use prompt templates in LangChain."
21+
"We will use [LangChain](https://www.langchain.com/) to simplify the process of constructing the prompts and interacting with the LLMs. In the process of working through this notebook, you will learn how to setup the Amazon Bedrock client environment, configure security permissions and use prompt templates in LangChain."
2222
]
2323
},
2424
{
@@ -179,9 +179,9 @@
179179
},
180180
"outputs": [],
181181
"source": [
182-
"!pip install boto3==1.34.58\n",
183-
"!pip install langchain==0.1.11\n",
184-
"!pip install sagemaker==2.210.0\n",
182+
"!pip install boto3==1.34.62\n",
183+
"!pip install langchain==0.1.12\n",
184+
"!pip install sagemaker==2.212.0\n",
185185
"\n",
186186
"import IPython\n",
187187
"\n",
@@ -750,8 +750,10 @@
750750
"source": [
751751
"# Specify the model-id\n",
752752
"#model_id = \"anthropic.claude-instant-v1\"\n",
753-
"model_id = \"anthropic.claude-v2\"\n",
753+
"#model_id = \"anthropic.claude-v2\"\n",
754754
"#model_id = \"anthropic.claude-v2:1\"\n",
755+
"model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n",
756+
"#model_id = \"anthropic.claude-3-haiku-20240307-v1:0\"\n",
755757
"\n",
756758
"# Prepare the prompt and invoke the LLM (./scripts/helper_functions.py)\n",
757759
"single_turn_conversation = process_prompt_1(model_id, bedrock_rt_client, temperature, max_response_token_length,\n",
@@ -1097,8 +1099,10 @@
10971099
"source": [
10981100
"# Specify the model-id\n",
10991101
"#model_id = \"anthropic.claude-instant-v1\"\n",
1100-
"model_id = \"anthropic.claude-v2\"\n",
1102+
"#model_id = \"anthropic.claude-v2\"\n",
11011103
"#model_id = \"anthropic.claude-v2:1\"\n",
1104+
"model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n",
1105+
"#model_id = \"anthropic.claude-3-haiku-20240307-v1:0\"\n",
11021106
"\n",
11031107
"# Prepare the prompt and invoke the LLM (./scripts/helper_functions.py)\n",
11041108
"single_turn_conversation = process_prompt_1(model_id, bedrock_rt_client, temperature, max_response_token_length,\n",

0 commit comments

Comments
 (0)