|
1 | 1 | { |
2 | 2 | "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "74e01527", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "# Introduction\n", |
| 9 | + "\n", |
| 10 | + "In this tutorial, we'll demonstrate how to leverage a semantic caching with Azure Cosmos DB for MongoDB and LangChain.\n", |
| 11 | + "\n", |
| 12 | + "[Learn more here from the LangChain docs.](https://python.langchain.com/docs/integrations/llms/llm_caching#azure-cosmos-db-semantic-cache)" |
| 13 | + ] |
| 14 | + }, |
| 15 | + { |
| 16 | + "cell_type": "code", |
| 17 | + "execution_count": null, |
| 18 | + "id": "c19c036d", |
| 19 | + "metadata": {}, |
| 20 | + "outputs": [], |
| 21 | + "source": [ |
| 22 | + "# ! pip install langchain\n", |
| 23 | + "# ! pip install langchain_openai\n", |
| 24 | + "# ! pip install langchain_community\n", |
| 25 | + "# ! pip install pymongo\n", |
| 26 | + "# ! pip install python-dotenv\n", |
| 27 | + "# ! pip install azure-core\n", |
| 28 | + "# ! pip install azure-cosmos" |
| 29 | + ] |
| 30 | + }, |
3 | 31 | { |
4 | 32 | "cell_type": "code", |
5 | 33 | "execution_count": null, |
|
17 | 45 | " CosmosDBVectorSearchType,\n", |
18 | 46 | ")\n", |
19 | 47 | "\n", |
20 | | - "from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings\n", |
21 | | - "from langchain_openai import OpenAI, AzureOpenAI\n", |
22 | | - "from langchain.globals import set_llm_cache\n", |
| 48 | + "from langchain.chains import ConversationalRetrievalChain, LLMChain\n", |
| 49 | + "from langchain.prompts import PromptTemplate\n", |
23 | 50 | "\n", |
24 | | - "from azure.core.exceptions import AzureError\n", |
25 | | - "from azure.core.credentials import AzureKeyCredential\n", |
| 51 | + "from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings\n", |
| 52 | + "from langchain.globals import set_llm_cache\n", |
26 | 53 | "\n", |
| 54 | + "import pymongo\n", |
27 | 55 | "from dotenv import load_dotenv" |
28 | 56 | ] |
29 | 57 | }, |
|
42 | 70 | "\n", |
43 | 71 | "COSMOS_MONGO_USER = config['cosmos_db_mongo_user']\n", |
44 | 72 | "COSMOS_MONGO_PWD = config['cosmos_db_mongo_pwd']\n", |
45 | | - "COSMOS_MONGO_SERVER = config['cosmos_db_mongo_server'] " |
| 73 | + "COSMOS_MONGO_SERVER = config['cosmos_db_mongo_server']\n", |
| 74 | + "DIMENSIONS = int(config['openai_embeddings_dimensions']) " |
46 | 75 | ] |
47 | 76 | }, |
48 | 77 | { |
|
57 | 86 | "\n", |
58 | 87 | "CONNECTION_STRING = (\"mongodb+srv://\"+COSMOS_MONGO_USER+\":\"+COSMOS_MONGO_PWD+\"@\"+COSMOS_MONGO_SERVER+\"?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000\")\n", |
59 | 88 | "\n", |
60 | | - "DB_NAME, COLLECTION_NAME = NAMESPACE.split(\".\")" |
| 89 | + "DB_NAME, COLLECTION_NAME = NAMESPACE.split(\".\")\n", |
| 90 | + "mongo_client = pymongo.MongoClient(CONNECTION_STRING)\n" |
61 | 91 | ] |
62 | 92 | }, |
63 | 93 | { |
|
67 | 97 | "metadata": {}, |
68 | 98 | "outputs": [], |
69 | 99 | "source": [ |
70 | | - "# Requires model version 0301 or more recent\n", |
71 | | - "llm = AzureOpenAI(\n", |
72 | | - " deployment_name=config['openai_completions_deployment'],\n", |
73 | | - " model_name=config['openai_completions_model'],\n", |
74 | | - " openai_api_key=config['openai_api_key'],\n", |
75 | | - " azure_endpoint=config['openai_api_endpoint'],\n", |
76 | | - " openai_api_version=config['openai_api_version'],\n", |
77 | | - " n=2,\n", |
78 | | - " best_of=2,\n", |
79 | | - " cache=True)\n", |
80 | | - "\n", |
81 | | - "\n", |
82 | | - "embeddings = AzureOpenAIEmbeddings(\n", |
83 | | - " azure_deployment=config['openai_embeddings_deployment'],\n", |
84 | | - " model=config['openai_embeddings_model'],\n", |
85 | | - " openai_api_key=config['openai_api_key'],\n", |
86 | | - " azure_endpoint=config['openai_api_endpoint'],\n", |
87 | | - " openai_api_version=config['openai_api_version'])" |
| 100 | + "def init_llm_chain():\n", |
| 101 | + "\n", |
| 102 | + " # Clear old cache if it exists\n", |
| 103 | + " mongo_client[DB_NAME][COLLECTION_NAME].drop_indexes()\n", |
| 104 | + " mongo_client[DB_NAME].drop_collection(COLLECTION_NAME)\n", |
| 105 | + "\n", |
| 106 | + " # Define a template for the LLM prompt\n", |
| 107 | + " prompt_template = \"\"\"\n", |
| 108 | + " You are an upbeat AI assistant who is excited to help answer questions. \n", |
| 109 | + "\n", |
| 110 | + " Question: {question}\n", |
| 111 | + " If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", |
| 112 | + " \"\"\"\n", |
| 113 | + " chatbot_prompt = PromptTemplate(\n", |
| 114 | + " template = prompt_template, input_variables = [\"question\", \"context\"])\n", |
| 115 | + "\n", |
| 116 | + " # Requires model version 0301 or more recent\n", |
| 117 | + " # Point to completions model deployed in Azure OpenAI\n", |
| 118 | + " llm = AzureChatOpenAI(\n", |
| 119 | + " deployment_name=config['openai_completions_deployment'],\n", |
| 120 | + " model_name=config['openai_completions_model'],\n", |
| 121 | + " api_key=config['openai_api_key'],\n", |
| 122 | + " azure_endpoint=config['openai_api_endpoint'],\n", |
| 123 | + " api_version=config['openai_api_version'],\n", |
| 124 | + " cache=True,\n", |
| 125 | + " n=1)\n", |
| 126 | + "\n", |
| 127 | + " # Point to embeddings model deployed in Azure OpenAI\n", |
| 128 | + " embeddings = AzureOpenAIEmbeddings(\n", |
| 129 | + " azure_deployment=config['openai_embeddings_deployment'],\n", |
| 130 | + " model=config['openai_embeddings_model'],\n", |
| 131 | + " api_key=config['openai_api_key'],\n", |
| 132 | + " azure_endpoint=config['openai_api_endpoint'],\n", |
| 133 | + " dimensions=DIMENSIONS)\n", |
| 134 | + "\n", |
| 135 | + " # Setup simple LLM chain\n", |
| 136 | + " llm_chain = LLMChain(llm = llm, prompt=chatbot_prompt)\n", |
| 137 | + "\n", |
| 138 | + " # Setup semantic cache for LLM\n", |
| 139 | + " num_lists = 1\n", |
| 140 | + " similarity_algorithm = CosmosDBSimilarityType.COS\n", |
| 141 | + " kind = CosmosDBVectorSearchType.VECTOR_IVF\n", |
| 142 | + "\n", |
| 143 | + " score_threshold = 0.9\n", |
| 144 | + "\n", |
| 145 | + " sem_cache = AzureCosmosDBSemanticCache(\n", |
| 146 | + " cosmosdb_connection_string=CONNECTION_STRING,\n", |
| 147 | + " cosmosdb_client=None,\n", |
| 148 | + " embedding=embeddings,\n", |
| 149 | + " database_name=DB_NAME,\n", |
| 150 | + " collection_name=COLLECTION_NAME,\n", |
| 151 | + " num_lists=num_lists,\n", |
| 152 | + " similarity=similarity_algorithm,\n", |
| 153 | + " kind=kind,\n", |
| 154 | + " dimensions=DIMENSIONS,\n", |
| 155 | + " score_threshold=score_threshold)\n", |
| 156 | + "\n", |
| 157 | + " set_llm_cache(sem_cache)\n", |
| 158 | + "\n", |
| 159 | + " return llm_chain" |
88 | 160 | ] |
89 | 161 | }, |
90 | 162 | { |
|
96 | 168 | }, |
97 | 169 | "outputs": [], |
98 | 170 | "source": [ |
99 | | - "# Default value for these params\n", |
100 | | - "num_lists = 1\n", |
101 | | - "dimensions = 1536\n", |
102 | | - "similarity_algorithm = CosmosDBSimilarityType.COS\n", |
103 | | - "kind = CosmosDBVectorSearchType.VECTOR_IVF\n", |
104 | | - "m = 16\n", |
105 | | - "ef_construction = 64\n", |
106 | | - "ef_search = 40\n", |
107 | | - "score_threshold = 0.9\n", |
108 | | - "\n", |
109 | | - "set_llm_cache(\n", |
110 | | - " AzureCosmosDBSemanticCache(\n", |
111 | | - " cosmosdb_connection_string=CONNECTION_STRING,\n", |
112 | | - " cosmosdb_client=None,\n", |
113 | | - " embedding=embeddings,\n", |
114 | | - " database_name=DB_NAME,\n", |
115 | | - " collection_name=COLLECTION_NAME,\n", |
116 | | - " num_lists=num_lists,\n", |
117 | | - " similarity=similarity_algorithm,\n", |
118 | | - " kind=kind,\n", |
119 | | - " dimensions=dimensions,\n", |
120 | | - " m=m,\n", |
121 | | - " ef_construction=ef_construction,\n", |
122 | | - " ef_search=ef_search,\n", |
123 | | - " score_threshold=score_threshold )\n", |
124 | | - ")" |
| 171 | + "# Initialize llm chain\n", |
| 172 | + "llm_chain = init_llm_chain()" |
125 | 173 | ] |
126 | 174 | }, |
127 | 175 | { |
|
133 | 181 | "source": [ |
134 | 182 | "%%time\n", |
135 | 183 | "# The first time, the quesiton/response is not yet cachced in Cosmos DB, so retrieval should be slower\n", |
136 | | - "llm(\"Tell me about beer making\")" |
| 184 | + "llm_chain.invoke(\"Tell me something interesting about beer making\")" |
137 | 185 | ] |
138 | 186 | }, |
139 | 187 | { |
|
145 | 193 | "source": [ |
146 | 194 | "%%time\n", |
147 | 195 | "# This quesiton/response is not yet cachced in Cosmos DB, so retrieval should be slower\n", |
148 | | - "llm(\"Tell me a joke?\")" |
| 196 | + "llm_chain(\"Tell me a joke about tomatoes and food.\")" |
149 | 197 | ] |
150 | 198 | }, |
151 | 199 | { |
|
157 | 205 | "source": [ |
158 | 206 | "%%time\n", |
159 | 207 | "# The second time, the quesiton/response is cached in Cosmos DB, so retrieval should be faster\n", |
160 | | - "llm(\"Tell me about beer making\")" |
| 208 | + "llm_chain(\"Tell me something interesting about beer making\")" |
161 | 209 | ] |
162 | 210 | }, |
163 | 211 | { |
|
169 | 217 | "source": [ |
170 | 218 | "%%time\n", |
171 | 219 | "# This question is semantically similar to the previous one within the score_threshold amount, so retrieval should be faster\n", |
172 | | - "llm(\"How do I make beer?\")" |
| 220 | + "llm_chain(\"How do I make beer?\")" |
173 | 221 | ] |
174 | 222 | } |
175 | 223 | ], |
|
0 commit comments