Skip to content

Commit 764ce60

Browse files
author
ajosh0504
committed
Updating agents notebook
1 parent b93b0ef commit 764ce60

File tree

5 files changed

+1106
-74
lines changed

5 files changed

+1106
-74
lines changed

labs/ai-agents-lab.ipynb

Lines changed: 65 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@
2121
"execution_count": null,
2222
"metadata": {},
2323
"outputs": [],
24-
"source": "import os\nimport sys\nfrom pymongo import MongoClient\n\n# Add parent directory to path to import from utils\nsys.path.append(os.path.join(os.path.dirname(os.getcwd())))\nfrom utils import track_progress"
24+
"source": [
25+
"import os\n",
26+
"import sys\n",
27+
"from pymongo import MongoClient\n",
28+
"\n",
29+
"# Add parent directory to path to import from utils\n",
30+
"sys.path.append(os.path.join(os.path.dirname(os.getcwd())))\n",
31+
"from utils import track_progress, set_env"
32+
]
2533
},
2634
{
2735
"cell_type": "code",
@@ -48,10 +56,15 @@
4856
]
4957
},
5058
{
51-
"cell_type": "markdown",
59+
"cell_type": "code",
60+
"execution_count": null,
5261
"metadata": {},
62+
"outputs": [],
5363
"source": [
54-
"### **Pick an LLM provider of your choice below**"
64+
"# Set the LLM provider and passkey provided by your workshop instructor\n",
65+
"# NOTE: LLM_PROVIDER can be set to one of \"aws\"/ \"microsoft\" / \"google\"\n",
66+
"LLM_PROVIDER = \"aws\"\n",
67+
"PASSKEY = \"replace-with-passkey\""
5568
]
5669
},
5770
{
@@ -60,9 +73,8 @@
6073
"metadata": {},
6174
"outputs": [],
6275
"source": [
63-
"SERVERLESS_URL = os.environ.get(\"SERVERLESS_URL\")\n",
64-
"# Can be one of \"aws\", \"google\" or \"microsoft\"\n",
65-
"LLM_PROVIDER = \"aws\""
76+
"# Obtain API keys from our AI model proxy and set them as an environment variables-- DO NOT CHANGE\n",
77+
"set_env([LLM_PROVIDER,\"voyageai\"], PASSKEY)"
6678
]
6779
},
6880
{
@@ -192,29 +204,22 @@
192204
" {\n",
193205
" \"type\": \"vector\",\n",
194206
" \"path\": \"embedding\",\n",
195-
" \"numDimensions\": 384,\n",
207+
" \"numDimensions\": 1024,\n",
196208
" \"similarity\": \"cosine\",\n",
197209
" }\n",
198210
" ]\n",
199211
" },\n",
200212
"}"
201213
]
202214
},
203-
{
204-
"cell_type": "markdown",
205-
"metadata": {},
206-
"source": [
207-
"📚 Refer to the `utils.py` script under `notebooks/utils`"
208-
]
209-
},
210215
{
211216
"cell_type": "code",
212217
"execution_count": null,
213218
"metadata": {},
214219
"outputs": [],
215220
"source": [
216221
"# Use the `create_index` function from the `utils` module to create a vector search index with the above definition for the `vs_collection` collection\n",
217-
"<CODE_BLOCK_1>"
222+
"create_index(vs_collection, VS_INDEX_NAME, model)"
218223
]
219224
},
220225
{
@@ -252,9 +257,8 @@
252257
"metadata": {},
253258
"outputs": [],
254259
"source": [
255-
"# You may see a warning upon running this cell. You can ignore it.\n",
256260
"from langchain.agents import tool\n",
257-
"from sentence_transformers import SentenceTransformer\n",
261+
"import voyageai\n",
258262
"from typing import List"
259263
]
260264
},
@@ -271,15 +275,15 @@
271275
"metadata": {},
272276
"outputs": [],
273277
"source": [
274-
"# Load the `gte-small` model using the Sentence Transformers library\n",
275-
"embedding_model = SentenceTransformer(\"thenlper/gte-small\")"
278+
"# Initialize the Voyage AI client\n",
279+
"vo = voyageai.Client()"
276280
]
277281
},
278282
{
279283
"cell_type": "markdown",
280284
"metadata": {},
281285
"source": [
282-
"📚 https://huggingface.co/thenlper/gte-small#usage (See \"Use with sentence-transformers\" under Usage)"
286+
"📚 https://docs.voyageai.com/docs/contextualized-chunk-embeddings#approach-2-contextualized-chunk-embeddings"
283287
]
284288
},
285289
{
@@ -288,20 +292,24 @@
288292
"metadata": {},
289293
"outputs": [],
290294
"source": [
291-
"# Define a function that takes a piece of text (`text`) as input, embeds it using the `embedding_model` instantiated above and returns the embedding as a list\n",
292-
"# An array can be converted to a list using the `tolist()` method\n",
293-
"def get_embedding(text: str) -> List[float]:\n",
295+
"def get_embeddings(query: str) -> List[float]:\n",
294296
" \"\"\"\n",
295-
" Generate the embedding for a piece of text.\n",
297+
" Get embeddings for an input query.\n",
296298
"\n",
297299
" Args:\n",
298-
" text (str): Text to embed.\n",
300+
" query (str): Query string\n",
299301
"\n",
300302
" Returns:\n",
301-
" List[float]: Embedding of the text as a list.\n",
303+
" List[float]: Embedding of the query string\n",
302304
" \"\"\"\n",
303-
" embedding = <CODE_BLOCK_2>\n",
304-
" return embedding.tolist()"
305+
" # Use the `contextualized_embed` method of the Voyage AI API to embed the user query with the following arguments:\n",
306+
" # inputs: `query` wrapped in a list of lists\n",
307+
" # model: `voyage-context-3`\n",
308+
" # input_type: \"query\"\n",
309+
" embds_obj = <CODE_BLOCK_1>\n",
310+
" # Extract embeddings from the embeddings object\n",
311+
" embeddings = <CODE_BLOCK_2>\n",
312+
" return embeddings"
305313
]
306314
},
307315
{
@@ -330,8 +338,8 @@
330338
" str: The retrieved information formatted as a string.\n",
331339
" \"\"\"\n",
332340
"\n",
333-
" # Generate embedding for the `user_query` using the `get_embedding` function defined above\n",
334-
" query_embedding = <CODE_BLOCK_33>\n",
341+
" # Generate embeddings for the `user_query` using the `get_embeddings` function defined above\n",
342+
" query_embedding = <CODE_BLOCK_3>\n",
335343
"\n",
336344
" # Define an aggregation pipeline consisting of a $vectorSearch stage, followed by a $project stage\n",
337345
" # Set the number of candidates to 150 and only return the top 5 documents from the vector search\n",
@@ -481,7 +489,7 @@
481489
"source": [
482490
"from langchain_core.load import load\n",
483491
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
484-
"import requests"
492+
"from utils import get_llm"
485493
]
486494
},
487495
{
@@ -490,11 +498,8 @@
490498
"metadata": {},
491499
"outputs": [],
492500
"source": [
493-
"# Obtain the Langchain LLM object from our serverless endpoint\n",
494-
"llm_dict = requests.post(\n",
495-
" url=SERVERLESS_URL, json={\"task\": \"get_llm\", \"data\": LLM_PROVIDER}\n",
496-
").json()\n",
497-
"llm = load(llm_dict[\"llm\"], secrets_map=llm_dict[\"secrets_map\"])"
501+
"# Obtain the Langchain LLM object using the `get_llm` function from the `utils`` module.\n",
502+
"llm = get_llm(LLM_PROVIDER)"
498503
]
499504
},
500505
{
@@ -560,7 +565,7 @@
560565
"metadata": {},
561566
"outputs": [],
562567
"source": [
563-
"# Chain the `prompt` with the tool-bound llm using the `|` operator\n",
568+
"# Chain the `prompt` with the tool-augmented llm using the `|` operator\n",
564569
"llm_with_tools = <CODE_BLOCK_10>"
565570
]
566571
},
@@ -830,11 +835,7 @@
830835
"outputs": [],
831836
"source": [
832837
"# Visualize the graph\n",
833-
"try:\n",
834-
" display(Image(app.get_graph().draw_mermaid_png()))\n",
835-
"except Exception:\n",
836-
" # This requires some extra dependencies and is optional\n",
837-
" pass"
838+
"app"
838839
]
839840
},
840841
{
@@ -850,24 +851,21 @@
850851
"metadata": {},
851852
"outputs": [],
852853
"source": [
853-
"# Stream outputs from the graph as they pass through its nodes\n",
854854
"def execute_graph(user_input: str) -> None:\n",
855855
" \"\"\"\n",
856856
" Stream outputs from the graph\n",
857857
"\n",
858858
" Args:\n",
859859
" user_input (str): User query string\n",
860860
" \"\"\"\n",
861-
" # Add user input to the messages attribute of the graph state\n",
862-
" # The role of the message should be \"user\" and content should be `user_input`\n",
863-
" input = {\"messages\": [(\"user\", user_input)]}\n",
864-
" # Pass input to the graph and stream the outputs\n",
865-
" for output in app.stream(input):\n",
866-
" for key, value in output.items():\n",
867-
" print(f\"Node {key}:\")\n",
868-
" print(value)\n",
869-
" print(\"---FINAL ANSWER---\")\n",
870-
" print(value[\"messages\"][-1].content)"
861+
" # Stream outputs from each step in the graph\n",
862+
" for step in app.stream(\n",
863+
" {\"messages\": [{\"role\": \"user\", \"content\": user_input}]},\n",
864+
" # Stream full value of the state after each step\n",
865+
" stream_mode=\"values\",\n",
866+
" ):\n",
867+
" # Print the latest message from the step\n",
868+
" step[\"messages\"][-1].pretty_print()"
871869
]
872870
},
873871
{
@@ -939,26 +937,25 @@
939937
"metadata": {},
940938
"outputs": [],
941939
"source": [
942-
"def execute_graph(thread_id: str, user_input: str) -> None:\n",
940+
"def execute_graph_with_memory(thread_id: str, user_input: str) -> None:\n",
943941
" \"\"\"\n",
944942
" Stream outputs from the graph\n",
945943
"\n",
946944
" Args:\n",
947945
" thread_id (str): Thread ID for the checkpointer\n",
948946
" user_input (str): User query string\n",
949947
" \"\"\"\n",
950-
" # Add user input to the messages attribute of the graph state\n",
951-
" # The role of the message should be \"user\" and content should be `user_input`\n",
952-
" input = {\"messages\": [(\"user\", user_input)]}\n",
953-
" # Define a config containing the thread ID\n",
948+
" # Create a runtime config for the thread ID `thread_id`\n",
954949
" config = <CODE_BLOCK_19>\n",
955-
" # Pass `input` and `config` to the graph and stream outputs\n",
956-
" for output in app.stream(input, config):\n",
957-
" for key, value in output.items():\n",
958-
" print(f\"Node {key}:\")\n",
959-
" print(value)\n",
960-
" print(\"---FINAL ANSWER---\")\n",
961-
" print(value[\"messages\"][-1].content)"
950+
" # Stream outputs from each step in the graph\n",
951+
" for step in app.stream(\n",
952+
" {\"messages\": [{\"role\": \"user\", \"content\": user_input}]},\n",
953+
" # Pass the config as an additional parameter\n",
954+
" config,\n",
955+
" stream_mode=\"values\",\n",
956+
" ):\n",
957+
" # Print the latest message from the step\n",
958+
" step[\"messages\"][-1].pretty_print()"
962959
]
963960
},
964961
{
@@ -968,7 +965,7 @@
968965
"outputs": [],
969966
"source": [
970967
"# Test graph execution with thread ID\n",
971-
"execute_graph(\n",
968+
"execute_graph_with_memory(\n",
972969
" \"1\",\n",
973970
" \"What are some best practices for data backups in MongoDB?\",\n",
974971
")"
@@ -981,7 +978,7 @@
981978
"outputs": [],
982979
"source": [
983980
"# Follow-up question to ensure message history works\n",
984-
"execute_graph(\n",
981+
"execute_graph_with_memory(\n",
985982
" \"1\",\n",
986983
" \"What did I just ask you?\",\n",
987984
")"
@@ -1014,4 +1011,4 @@
10141011
},
10151012
"nbformat": 4,
10161013
"nbformat_minor": 4
1017-
}
1014+
}

labs/ai-rag-lab.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@
231231
"metadata": {},
232232
"outputs": [],
233233
"source": [
234-
"def get_embeddings(content: List[str], input_type: str) -> List[List[float]]:\n",
234+
"def get_embeddings(content: List[str], input_type: str) -> List[float] | List[List[float]]:\n",
235235
" \"\"\"\n",
236236
" Get contextualized embeddings for each chunk.\n",
237237
"\n",
@@ -240,7 +240,7 @@
240240
" input_type (str): Type of input, either \"document\" or \"query\" \n",
241241
"\n",
242242
" Returns:\n",
243-
" List[List[float]]: Contextualized embeddings\n",
243+
" List[float] | List[List[float]]: Contextualized embeddings\n",
244244
" \"\"\"\n",
245245
" # Use the `contextualized_embed` method of the Voyage AI API to get contextualized embeddings for each chunk with the following arguments:\n",
246246
" # inputs: `content` wrapped in another list\n",

0 commit comments

Comments
 (0)