@@ -18,375 +18,22 @@ In this quickstart, we demonstrate how to build a [RAG Pattern](../gen-ai/rag.md
18
18
19
19
At the end, we create a simple UX using Gradio to allow users to type in questions and display responses generated by Azure OpenAI or served from the cache. The responses also display an elapsed time so you can see the impact caching has on performance versus generating a response.
20
20
21
- > [ !TIP]
21
+ > [ !TIP]
22
+ > You can find the Python notebook for this quickstart [ here] ( https://aka.ms/CosmosPythonRAGQuickstart ) .
22
23
> For more RAG samples, visit: [ AzureDataRetrievalAugmentedGenerationSamples] ( https://github.com/microsoft/AzureDataRetrievalAugmentedGenerationSamples )
23
24
24
25
** Important Note** : This sample requires you to setup accounts for Azure Cosmos DB for NoSQL, and Azure OpenAI. To get started, visit:
25
26
- [ Azure Cosmos DB for NoSQL Python Quickstart] ( ../nosql/quickstart-python.md )
26
27
- [ Azure Cosmos DB for NoSQL Vector Search] ( ../nosql/vector-search.md )
27
28
- [ Azure OpenAI] ( ../../ai-services/openai/toc.yml )
28
29
29
- ### 1. Install Required Packages
30
- Install the necessary Python packages to interact with Azure Cosmos DB and other services.
30
+ ## Python Notebook
31
31
32
- ``` bash
33
- pip install python-dotenv aiohttp openai gradio ijson nest_asyncio tenacity azure-cosmos
34
- ```
35
-
36
- ### 2. Initialize Your Client Connection
37
- Populate ` sample_env_file.env ` file with the appropriate credentials for Azure Cosmos DB and Azure OpenAI.
38
-
39
- ``` python
40
- # Import the required libraries
41
- import time
42
- import json
43
- import uuid
44
- import urllib
45
- import ijson
46
- import zipfile
47
- from dotenv import dotenv_values
48
- from openai import AzureOpenAI
49
- from azure.core.exceptions import AzureError
50
- from azure.cosmos import PartitionKey, exceptions
51
- import gradio as gr
52
-
53
- # Cosmos DB imports
54
- from azure.cosmos.aio import CosmosClient
55
-
56
- # Load configuration
57
- env_name = " sample_env_file.env"
58
- config = dotenv_values(env_name)
59
-
60
- cosmos_conn = config[' cosmos_uri' ]
61
- cosmos_key = config[' cosmos_key' ]
62
- cosmos_database = config[' cosmos_database_name' ]
63
- cosmos_collection = config[' cosmos_collection_name' ]
64
- cosmos_vector_property = config[' cosmos_vector_property_name' ]
65
- comsos_cache_db = config[' cosmos_cache_database_name' ]
66
- cosmos_cache = config[' cosmos_cache_collection_name' ]
67
-
68
- # Create the Azure Cosmos DB for NoSQL async client for faster data loading
69
- cosmos_async_client = CosmosClient(url = cosmos_conn, credential = cosmos_key)
70
-
71
- openai_endpoint = config[' openai_endpoint' ]
72
- openai_key = config[' openai_key' ]
73
- openai_api_version = config[' openai_api_version' ]
74
- openai_embeddings_deployment = config[' openai_embeddings_deployment' ]
75
- openai_embeddings_dimensions = int (config[' openai_embeddings_dimensions' ])
76
- openai_completions_deployment = config[' openai_completions_deployment' ]
77
-
78
- # Create the OpenAI client
79
- openai_client = AzureOpenAI(azure_endpoint = openai_endpoint, api_key = openai_key, api_version = openai_api_version)
80
- ```
81
-
82
- ### 3. Create a Database and Containers with Vector Policies
83
- This function takes a database object, a collection name, the name of the document property that stores vectors, and the number of vector dimensions used for the embeddings.
84
-
85
- ``` python
86
- db = await cosmos_async_client.create_database_if_not_exists(cosmos_database)
87
-
88
- # Create the vector embedding policy to specify vector details
89
- vector_embedding_policy = {
90
- " vectorEmbeddings" : [
91
- {
92
- " path" : " /" + cosmos_vector_property,
93
- " dataType" : " float32" ,
94
- " distanceFunction" : " dotproduct" ,
95
- " dimensions" : openai_embeddings_dimensions
96
- }
97
- ]
98
- }
99
-
100
- # Create the vector index policy to specify vector details
101
- indexing_policy = {
102
- " vectorIndexes" : [
103
- {
104
- " path" : " /" + cosmos_vector_property,
105
- " type" : " quantizedFlat"
106
- }
107
- ]
108
- }
109
-
110
- # Create the data collection with vector index (note: this creates a container with 10000 RUs to allow fast data load)
111
- try :
112
- container = await db.create_container_if_not_exists(id = cosmos_collection,
113
- partition_key = PartitionKey(path = ' /id' ),
114
- vector_embedding_policy = vector embedding_policy,
115
- offer_throughput = 10000 )
116
- print (' Container with id \' {0} \' created' .format(cosmos_collection))
117
-
118
- except exceptions.CosmosHttpResponseError:
119
- raise
120
-
121
- # Create the cache collection with vector index
122
- try :
123
- cache_container = await db.create_container_if_not_exists(id = cosmos_cache,
124
- partition_key = PartitionKey(path = ' /id' ),
125
- indexing_policy = indexing_policy,
126
- vector_embedding_policy = vector_embedding_policy,
127
- offer_throughput = 1000 )
128
- print (' Container with id \' {0} \' created' .format(cosmos_cache))
129
-
130
- except exceptions.CosmosHttpResponseError:
131
- raise
132
- ```
133
-
134
- ### 4. Generate Embeddings from Azure OpenAI
135
-
136
- This function vectorizes the user input for vector search. Ensure the dimensionality and model used match the sample data provided, or else regenerate vectors with your desired model.
137
-
138
- ``` python
139
- from tenacity import retry, stop_after_attempt, wait_random_exponential
140
-
141
- @retry (wait = wait_random_exponential(min = 2 , max = 300 ), stop = stop_after_attempt(20 ))
142
- def generate_embeddings (text ):
143
- response = openai_client.embeddings.create(
144
- input = text,
145
- model = openai_embeddings_deployment,
146
- dimensions = openai_embeddings_dimensions
147
- )
148
- embeddings = response.model_dump()
149
- return embeddings[' data' ][0 ][' embedding' ]
150
- ```
151
-
152
- ### 5. Load Data from the JSON File
153
-
154
- Extract the MovieLens dataset from the zip file.
155
-
156
- ``` python
157
- # Unzip the data file
158
- with zipfile.ZipFile(" ../../DataSet/Movies/MovieLens-4489-256D.zip" , ' r' ) as zip_ref:
159
- zip_ref.extractall(" /Data" )
160
- zip_ref.close()
161
-
162
- # Load the data file
163
- data = []
164
- with open (' /Data/MovieLens-4489-256D.json' , ' r' ) as d:
165
- data = json.load(d)
166
-
167
- # View the number of documents in the data (4489)
168
- len (data)
169
- ```
170
-
171
- ### 6. Store Data in Azure Cosmos DB
172
-
173
- Upsert data into Azure Cosmos DB for NoSQL. Records are written asynchronously.
174
-
175
- ``` python
176
- import asyncio
177
- import nest_asyncio
178
- import time # Import the time module to measure execution time
179
-
180
- nest_asyncio.apply()
181
-
182
- def generate_vectors (items , vector_property ):
183
- for item in items:
184
- vectorArray = generate_embeddings(item[' overview' ])
185
- time.sleep(0.1 )
186
- item[vector_property] = vectorArray
187
- return items
188
-
189
- async def insert_data ():
190
- start_time = time.time() # Record the start time
191
-
192
- counter = 0
193
- tasks = []
194
- max_concurrency = 20 # Adjust this value to control the level of concurrency
195
- semaphore = asyncio.Semaphore(max_concurrency)
196
-
197
- await cosmos_async_client.__aenter__ ()
198
- print (" Starting doc load, please wait..." )
199
-
200
- async def upsert_object (obj ):
201
- nonlocal counter
202
- async with semaphore:
203
- await container.upsert_item(body = obj)
204
- # Progress reporting
205
- counter += 1
206
- if counter % 100 == 0 :
207
- print (f " Sent { counter} documents for insertion into collection. " )
208
-
209
- for obj in data:
210
- tasks.append(asyncio.create_task(upsert_object(obj)))
211
-
212
- # Run all upsert tasks concurrently within the limits set by the semaphore
213
- await asyncio.gather(* tasks)
214
-
215
- end_time = time.time() # Record the end time
216
- duration = end_time - start_time # Calculate the duration
217
- print (f " All { counter} documents inserted! " )
218
- print (f " Time taken: { duration:.2f } seconds " )
219
-
220
- # Run the async function
221
- await insert_data()
222
- ```
223
-
224
- ### 7. Set Up Containers for Chat Bot
225
-
226
- ``` python
227
- from azure.cosmos import CosmosClient
228
-
229
- cosmos_sync_client = CosmosClient(url = cosmos_conn, credential = cosmos_key)
230
- db = cosmos_sync_client.get_database_client(cosmos_database)
231
- movies_container = db.get_container_client(cosmos_collection)
232
- cache_container = db.get_container_client(cosmos_cache)
233
- ```
234
-
235
- ### 8. Perform Vector Search
236
-
237
- This function defines a vector search over the movies data and chat cache collections.
238
-
239
- ``` python
240
- def vector_search (container , vectors , similarity_score = 0.02 , num_results = 5 ):
241
- results = container.query_items(
242
- query = '''
243
- SELECT TOP @num_results c.overview, VectorDistance(c.vector, @embedding) as SimilarityScore
244
- FROM c
245
- WHERE VectorDistance(c.vector,@embedding) > @similarity_score
246
- ORDER BY VectorDistance(c.vector,@embedding)
247
- ''' ,
248
- parameters = [
249
- {" name" : " @embedding" , " value" : vectors},
250
- {" name" : " @num_results" , " value" : num_results},
251
- {" name" : " @similarity_score" , " value" : similarity_score}
252
- ],
253
- enable_cross_partition_query = True , populate_query_metrics = True
254
- )
255
- results = list (results)
256
- formatted_results = [{' SimilarityScore' : result.pop(' SimilarityScore' ), ' document' : result} for result in results]
257
-
258
- return formatted_results
259
- ```
260
-
261
- ### 9. Get Recent Chat History
262
-
263
- This function provides conversational context to the LLM, allowing it to better have a conversation with the user.
264
-
265
- ``` python
266
- def get_chat_history (container , completions = 3 ):
267
- results = container.query_items(
268
- query = '''
269
- SELECT TOP @completions *
270
- FROM c
271
- ORDER BY c._ts DESC
272
- ''' ,
273
- parameters = [
274
- {" name" : " @completions" , " value" : completions},
275
- ], enable_cross_partition_query = True )
276
- results = list (results)
277
- return results
278
- ```
279
-
280
- ### 10. Chat Completion Functions
281
-
282
- Define the functions to handle the chat completion process, including caching responses.
283
-
284
- ``` python
285
- def generate_completion (user_prompt , vector_search_results , chat_history ):
286
- system_prompt = '''
287
- You are an intelligent assistant for movies. You are designed to provide helpful answers to user questions about movies in your database.
288
- You are friendly, helpful, and informative and can be lighthearted. Be concise in your responses, but still friendly.
289
- - Only answer questions related to the information provided below. Provide at least 3 candidate movie answers in a list.
290
- - Write two lines of whitespace between each answer in the list.
291
- '''
292
-
293
- messages = [{' role' : ' system' , ' content' : system_prompt}]
294
- for chat in chat_history:
295
- messages.append({' role' : ' user' , ' content' : chat[' prompt' ] + " " + chat[' completion' ]})
296
- messages.append({' role' : ' user' , ' content' : user_prompt})
297
- for result in vector_search_results:
298
- messages.append({' role' : ' system' , ' content' : json.dumps(result[' document' ])})
299
-
300
- response = openai_client.chat.completions.create(
301
- model = openai_completions_deployment,
302
- messages = messages,
303
- temperature = 0.1
304
- )
305
- return response.model_dump()
306
-
307
- def chat_completion (cache_container , movies_container , user_input ):
308
- print (" starting completion" )
309
- # Generate embeddings from the user input
310
- user_embeddings = generate_embeddings(user_input)
311
- # Query the chat history cache first to see if this question has been asked before
312
- cache_results = get_cache(container = cache_container, vectors = user_embeddings, similarity_score = 0.99 , num_results = 1 )
313
- if len (cache_results) > 0 :
314
- print (" Cached Result\n " )
315
- return cache_results[0 ][' completion' ], True
316
-
317
- else :
318
- # perform vector search on the movie collection
319
- print (" New result\n " )
320
- search_results = vector_search(movies_container, user_embeddings)
321
-
322
- print (" Getting Chat History\n " )
323
- # chat history
324
- chat_history = get_chat_history(cache_container, 3 )
325
- # generate the completion
326
- print (" Generating completions \n " )
327
- completions_results = generate_completion(user_input, search_results, chat_history)
328
-
329
- print (" Caching response \n " )
330
- # cache the response
331
- cache_response(cache_container, user_input, user_embeddings, completions_results)
332
-
333
- print (" \n " )
334
- # Return the generated LLM completion
335
- return completions_results[' choices' ][0 ][' message' ][' content' ], False
336
- ```
337
-
338
- ### 11. Cache Generated Responses
339
-
340
- Save the user prompts and generated completions to the cache for faster future responses.
341
-
342
- ``` python
343
- def cache_response (container , user_prompt , prompt_vectors , response ):
344
- chat_document = {
345
- ' id' : str (uuid.uuid4()),
346
- ' prompt' : user_prompt,
347
- ' completion' : response[' choices' ][0 ][' message' ][' content' ],
348
- ' completionTokens' : str (response[' usage' ][' completion_tokens' ]),
349
- ' promptTokens' : str (response[' usage' ][' prompt_tokens' ]),
350
- ' totalTokens' : str (response[' usage' ][' total_tokens' ]),
351
- ' model' : response[' model' ],
352
- ' vector' : prompt_vectors
353
- }
354
- container.create_item(body = chat_document)
355
- ```
356
-
357
- ### 12. Create a Simple UX in Gradio
358
-
359
- Build a user interface using Gradio for interacting with the AI application.
360
-
361
- ``` python
362
- chat_history = []
363
-
364
- with gr.Blocks() as demo:
365
- chatbot = gr.Chatbot(label = " Cosmic Movie Assistant" )
366
- msg = gr.Textbox(label = " Ask me about movies in the Cosmic Movie Database!" )
367
- clear = gr.Button(" Clear" )
368
-
369
- def user (user_message , chat_history ):
370
- start_time = time.time()
371
- response_payload, cached = chat_completion(cache_container, movies_container, user_message)
372
- end_time = time.time()
373
- elapsed_time = round ((end_time - start_time) * 1000 , 2 )
374
- details = f " \n (Time: { elapsed_time} ms) "
375
- if cached:
376
- details += " (Cached)"
377
- chat_history.append([user_message, response_payload + details])
378
-
379
- return gr.update(value = " " ), chat_history
380
-
381
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue = False )
382
- clear.click(lambda : None , None , chatbot, queue = False )
383
-
384
- # Launch the Gradio interface
385
- demo.launch(debug = True )
386
-
387
- # Be sure to run this cell to close or restart the Gradio demo
388
- demo.close()
389
- ```
32
+ <iframe
33
+ src="https://aka.ms/PythonRAGQuickstartRaw"
34
+ width="100%"
35
+ frameborder="0">
36
+ </iframe >
390
37
391
38
### Next steps
392
39
0 commit comments