2121 "execution_count" : null ,
2222 "metadata" : {},
2323 "outputs" : [],
24- "source" : " import os\n import sys\n from pymongo import MongoClient\n\n # Add parent directory to path to import from utils\n sys.path.append(os.path.join(os.path.dirname(os.getcwd())))\n from utils import track_progress"
24+ "source" : [
25+ " import os\n " ,
26+ " import sys\n " ,
27+ " from pymongo import MongoClient\n " ,
28+ " \n " ,
29+ " # Add parent directory to path to import from utils\n " ,
30+ " sys.path.append(os.path.join(os.path.dirname(os.getcwd())))\n " ,
31+ " from utils import track_progress, set_env"
32+ ]
2533 },
2634 {
2735 "cell_type" : " code" ,
4856 ]
4957 },
5058 {
51- "cell_type" : " markdown" ,
59+ "cell_type" : " code" ,
60+ "execution_count" : null ,
5261 "metadata" : {},
62+ "outputs" : [],
5363 "source" : [
54- " ### **Pick an LLM provider of your choice below**"
64+ " # Set the LLM provider and passkey provided by your workshop instructor\n " ,
65+ " # NOTE: LLM_PROVIDER can be set to one of \" aws\" / \" microsoft\" / \" google\"\n " ,
66+ " LLM_PROVIDER = \" aws\"\n " ,
67+ " PASSKEY = \" replace-with-passkey\" "
5568 ]
5669 },
5770 {
6073 "metadata" : {},
6174 "outputs" : [],
6275 "source" : [
63- " SERVERLESS_URL = os.environ.get(\" SERVERLESS_URL\" )\n " ,
64- " # Can be one of \" aws\" , \" google\" or \" microsoft\"\n " ,
65- " LLM_PROVIDER = \" aws\" "
76+ " # Obtain API keys from our AI model proxy and set them as an environment variables-- DO NOT CHANGE\n " ,
77+ " set_env([LLM_PROVIDER,\" voyageai\" ], PASSKEY)"
6678 ]
6779 },
6880 {
192204 " {\n " ,
193205 " \" type\" : \" vector\" ,\n " ,
194206 " \" path\" : \" embedding\" ,\n " ,
195- " \" numDimensions\" : 384 ,\n " ,
207+ " \" numDimensions\" : 1024 ,\n " ,
196208 " \" similarity\" : \" cosine\" ,\n " ,
197209 " }\n " ,
198210 " ]\n " ,
199211 " },\n " ,
200212 " }"
201213 ]
202214 },
203- {
204- "cell_type" : " markdown" ,
205- "metadata" : {},
206- "source" : [
207- " 📚 Refer to the `utils.py` script under `notebooks/utils`"
208- ]
209- },
210215 {
211216 "cell_type" : " code" ,
212217 "execution_count" : null ,
213218 "metadata" : {},
214219 "outputs" : [],
215220 "source" : [
216221 " # Use the `create_index` function from the `utils` module to create a vector search index with the above definition for the `vs_collection` collection\n " ,
217- " <CODE_BLOCK_1> "
222+ " create_index(vs_collection, VS_INDEX_NAME, model) "
218223 ]
219224 },
220225 {
252257 "metadata" : {},
253258 "outputs" : [],
254259 "source" : [
255- " # You may see a warning upon running this cell. You can ignore it.\n " ,
256260 " from langchain.agents import tool\n " ,
257- " from sentence_transformers import SentenceTransformer \n " ,
261+ " import voyageai \n " ,
258262 " from typing import List"
259263 ]
260264 },
271275 "metadata" : {},
272276 "outputs" : [],
273277 "source" : [
274- " # Load the `gte-small` model using the Sentence Transformers library \n " ,
275- " embedding_model = SentenceTransformer( \" thenlper/gte-small \" )"
278+ " # Initialize the Voyage AI client \n " ,
279+ " vo = voyageai.Client( )"
276280 ]
277281 },
278282 {
279283 "cell_type" : " markdown" ,
280284 "metadata" : {},
281285 "source" : [
282- " 📚 https://huggingface.co/thenlper/gte-small#usage (See \" Use with sentence-transformers \" under Usage) "
286+ " 📚 https://docs.voyageai.com/docs/contextualized-chunk-embeddings#approach-2-contextualized-chunk-embeddings "
283287 ]
284288 },
285289 {
288292 "metadata" : {},
289293 "outputs" : [],
290294 "source" : [
291- " # Define a function that takes a piece of text (`text`) as input, embeds it using the `embedding_model` instantiated above and returns the embedding as a list\n " ,
292- " # An array can be converted to a list using the `tolist()` method\n " ,
293- " def get_embedding(text: str) -> List[float]:\n " ,
295+ " def get_embeddings(query: str) -> List[float]:\n " ,
294296 " \"\"\"\n " ,
295- " Generate the embedding for a piece of text .\n " ,
297+ " Get embeddings for an input query .\n " ,
296298 " \n " ,
297299 " Args:\n " ,
298- " text (str): Text to embed. \n " ,
300+ " query (str): Query string \n " ,
299301 " \n " ,
300302 " Returns:\n " ,
301- " List[float]: Embedding of the text as a list. \n " ,
303+ " List[float]: Embedding of the query string \n " ,
302304 " \"\"\"\n " ,
303- " embedding = <CODE_BLOCK_2>\n " ,
304- " return embedding.tolist()"
305+ " # Use the `contextualized_embed` method of the Voyage AI API to embed the user query with the following arguments:\n " ,
306+ " # inputs: `query` wrapped in a list of lists\n " ,
307+ " # model: `voyage-context-3`\n " ,
308+ " # input_type: \" query\"\n " ,
309+ " embds_obj = <CODE_BLOCK_1>\n " ,
310+ " # Extract embeddings from the embeddings object\n " ,
311+ " embeddings = <CODE_BLOCK_2>\n " ,
312+ " return embeddings"
305313 ]
306314 },
307315 {
330338 " str: The retrieved information formatted as a string.\n " ,
331339 " \"\"\"\n " ,
332340 " \n " ,
333- " # Generate embedding for the `user_query` using the `get_embedding ` function defined above\n " ,
334- " query_embedding = <CODE_BLOCK_33 >\n " ,
341+ " # Generate embeddings for the `user_query` using the `get_embeddings ` function defined above\n " ,
342+ " query_embedding = <CODE_BLOCK_3 >\n " ,
335343 " \n " ,
336344 " # Define an aggregation pipeline consisting of a $vectorSearch stage, followed by a $project stage\n " ,
337345 " # Set the number of candidates to 150 and only return the top 5 documents from the vector search\n " ,
481489 "source" : [
482490 " from langchain_core.load import load\n " ,
483491 " from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n " ,
484- " import requests "
492+ " from utils import get_llm "
485493 ]
486494 },
487495 {
490498 "metadata" : {},
491499 "outputs" : [],
492500 "source" : [
493- " # Obtain the Langchain LLM object from our serverless endpoint\n " ,
494- " llm_dict = requests.post(\n " ,
495- " url=SERVERLESS_URL, json={\" task\" : \" get_llm\" , \" data\" : LLM_PROVIDER}\n " ,
496- " ).json()\n " ,
497- " llm = load(llm_dict[\" llm\" ], secrets_map=llm_dict[\" secrets_map\" ])"
501+ " # Obtain the Langchain LLM object using the `get_llm` function from the `utils`` module.\n " ,
502+ " llm = get_llm(LLM_PROVIDER)"
498503 ]
499504 },
500505 {
560565 "metadata" : {},
561566 "outputs" : [],
562567 "source" : [
563- " # Chain the `prompt` with the tool-bound llm using the `|` operator\n " ,
568+ " # Chain the `prompt` with the tool-augmented llm using the `|` operator\n " ,
564569 " llm_with_tools = <CODE_BLOCK_10>"
565570 ]
566571 },
830835 "outputs" : [],
831836 "source" : [
832837 " # Visualize the graph\n " ,
833- " try:\n " ,
834- " display(Image(app.get_graph().draw_mermaid_png()))\n " ,
835- " except Exception:\n " ,
836- " # This requires some extra dependencies and is optional\n " ,
837- " pass"
838+ " app"
838839 ]
839840 },
840841 {
850851 "metadata" : {},
851852 "outputs" : [],
852853 "source" : [
853- " # Stream outputs from the graph as they pass through its nodes\n " ,
854854 " def execute_graph(user_input: str) -> None:\n " ,
855855 " \"\"\"\n " ,
856856 " Stream outputs from the graph\n " ,
857857 " \n " ,
858858 " Args:\n " ,
859859 " user_input (str): User query string\n " ,
860860 " \"\"\"\n " ,
861- " # Add user input to the messages attribute of the graph state\n " ,
862- " # The role of the message should be \" user\" and content should be `user_input`\n " ,
863- " input = {\" messages\" : [(\" user\" , user_input)]}\n " ,
864- " # Pass input to the graph and stream the outputs\n " ,
865- " for output in app.stream(input):\n " ,
866- " for key, value in output.items():\n " ,
867- " print(f\" Node {key}:\" )\n " ,
868- " print(value)\n " ,
869- " print(\" ---FINAL ANSWER---\" )\n " ,
870- " print(value[\" messages\" ][-1].content)"
861+ " # Stream outputs from each step in the graph\n " ,
862+ " for step in app.stream(\n " ,
863+ " {\" messages\" : [{\" role\" : \" user\" , \" content\" : user_input}]},\n " ,
864+ " # Stream full value of the state after each step\n " ,
865+ " stream_mode=\" values\" ,\n " ,
866+ " ):\n " ,
867+ " # Print the latest message from the step\n " ,
868+ " step[\" messages\" ][-1].pretty_print()"
871869 ]
872870 },
873871 {
939937 "metadata" : {},
940938 "outputs" : [],
941939 "source" : [
942- " def execute_graph (thread_id: str, user_input: str) -> None:\n " ,
940+ " def execute_graph_with_memory (thread_id: str, user_input: str) -> None:\n " ,
943941 " \"\"\"\n " ,
944942 " Stream outputs from the graph\n " ,
945943 " \n " ,
946944 " Args:\n " ,
947945 " thread_id (str): Thread ID for the checkpointer\n " ,
948946 " user_input (str): User query string\n " ,
949947 " \"\"\"\n " ,
950- " # Add user input to the messages attribute of the graph state\n " ,
951- " # The role of the message should be \" user\" and content should be `user_input`\n " ,
952- " input = {\" messages\" : [(\" user\" , user_input)]}\n " ,
953- " # Define a config containing the thread ID\n " ,
948+ " # Create a runtime config for the thread ID `thread_id`\n " ,
954949 " config = <CODE_BLOCK_19>\n " ,
955- " # Pass `input` and `config` to the graph and stream outputs\n " ,
956- " for output in app.stream(input, config):\n " ,
957- " for key, value in output.items():\n " ,
958- " print(f\" Node {key}:\" )\n " ,
959- " print(value)\n " ,
960- " print(\" ---FINAL ANSWER---\" )\n " ,
961- " print(value[\" messages\" ][-1].content)"
950+ " # Stream outputs from each step in the graph\n " ,
951+ " for step in app.stream(\n " ,
952+ " {\" messages\" : [{\" role\" : \" user\" , \" content\" : user_input}]},\n " ,
953+ " # Pass the config as an additional parameter\n " ,
954+ " config,\n " ,
955+ " stream_mode=\" values\" ,\n " ,
956+ " ):\n " ,
957+ " # Print the latest message from the step\n " ,
958+ " step[\" messages\" ][-1].pretty_print()"
962959 ]
963960 },
964961 {
968965 "outputs" : [],
969966 "source" : [
970967 " # Test graph execution with thread ID\n " ,
971- " execute_graph (\n " ,
968+ " execute_graph_with_memory (\n " ,
972969 " \" 1\" ,\n " ,
973970 " \" What are some best practices for data backups in MongoDB?\" ,\n " ,
974971 " )"
981978 "outputs" : [],
982979 "source" : [
983980 " # Follow-up question to ensure message history works\n " ,
984- " execute_graph (\n " ,
981+ " execute_graph_with_memory (\n " ,
985982 " \" 1\" ,\n " ,
986983 " \" What did I just ask you?\" ,\n " ,
987984 " )"
10141011 },
10151012 "nbformat" : 4 ,
10161013 "nbformat_minor" : 4
1017- }
1014+ }
0 commit comments