Skip to content

Commit 7c0b13a

Browse files
committed
updates after review
1 parent 1109e9c commit 7c0b13a

File tree

1 file changed

+22
-50
lines changed

1 file changed

+22
-50
lines changed

Python/CosmosDB-NoSQL_VectorSearch/CosmosDB-NoSQL-Quickstart-RAG-Chatbot.ipynb

Lines changed: 22 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
"import gradio as gr\n",
7575
"\n",
7676
"# Cosmos DB imports\n",
77-
"from azure.cosmos.aio import CosmosClient\n",
77+
"from azure.cosmos import CosmosClient\n",
7878
"\n",
7979
"# Load configuration\n",
8080
"env_name = \"sample_env_file.env\"\n",
@@ -89,7 +89,7 @@
8989
"cosmos_cache = config['cosmos_cache_collection_name']\n",
9090
"\n",
9191
"# Create the Azure Cosmos DB for NoSQL async client for faster data loading\n",
92-
"cosmos_async_client = CosmosClient(url=cosmos_conn, credential=cosmos_key)\n",
92+
"cosmos_client = CosmosClient(url=cosmos_conn, credential=cosmos_key)\n",
9393
"\n",
9494
"openai_endpoint = config['openai_endpoint']\n",
9595
"openai_key = config['openai_key']\n",
@@ -122,7 +122,7 @@
122122
"metadata": {},
123123
"outputs": [],
124124
"source": [
125-
"db = await cosmos_async_client.create_database_if_not_exists(cosmos_database)\n",
125+
"db = cosmos_client.create_database_if_not_exists(cosmos_database)\n",
126126
"\n",
127127
"# Create the vector embedding policy to specify vector details\n",
128128
"vector_embedding_policy = {\n",
@@ -146,29 +146,28 @@
146146
" ]\n",
147147
"} \n",
148148
"\n",
149-
"\n",
150149
"# Create the data collection with vector index (note: this creates a container with 10000 RUs to allow fast data load)\n",
151150
"try:\n",
152-
" container = await db.create_container_if_not_exists( id=cosmos_collection, \n",
151+
" movies_container = db.create_container_if_not_exists(id=cosmos_collection, \n",
153152
" partition_key=PartitionKey(path='/id'), \n",
154153
" vector_embedding_policy=vector_embedding_policy,\n",
155154
" offer_throughput=10000) \n",
156-
" print('Container with id \\'{0}\\' created'.format(id)) \n",
155+
" print('Container with id \\'{0}\\' created'.format(movies_container.id)) \n",
157156
"\n",
158157
"except exceptions.CosmosHttpResponseError: \n",
159-
" raise \n",
158+
" raise \n",
160159
"\n",
161160
"# Create the cache collection with vector index\n",
162161
"try:\n",
163-
" cache_container = await db.create_container_if_not_exists( id=cosmos_cache, \n",
162+
" cache_container = db.create_container_if_not_exists(id=cosmos_cache, \n",
164163
" partition_key=PartitionKey(path='/id'), \n",
165164
" indexing_policy=indexing_policy,\n",
166165
" vector_embedding_policy=vector_embedding_policy,\n",
167166
" offer_throughput=1000) \n",
168-
" print('Container with id \\'{0}\\' created'.format(id)) \n",
167+
" print('Container with id \\'{0}\\' created'.format(cache_container.id)) \n",
169168
"\n",
170169
"except exceptions.CosmosHttpResponseError: \n",
171-
" raise "
170+
" raise"
172171
]
173172
},
174173
{
@@ -222,7 +221,7 @@
222221
"outputs": [],
223222
"source": [
224223
"# Unzip the data file\n",
225-
"with zipfile.ZipFile(\"../../DataSet/Movies/MovieLens-4489-256D.zip\", 'r') as zip_ref:\n",
224+
"with zipfile.ZipFile(\"../../DataSet/Movies/MovieLens-4489-256D.zip\", 'r') as zip_ref: \n",
226225
" zip_ref.extractall(\"/Data\")\n",
227226
"zip_ref.close()\n",
228227
"# Load the data file\n",
@@ -262,16 +261,12 @@
262261
"outputs": [],
263262
"source": [
264263
"import asyncio\n",
265-
"import nest_asyncio\n",
266-
"import time # Import the time module to measure execution time\n",
267-
"\n",
268-
"nest_asyncio.apply()\n",
264+
"import time\n",
265+
"from concurrent.futures import ThreadPoolExecutor\n",
269266
"\n",
270-
"def generate_vectors(items, vector_property):\n",
267+
"async def generate_vectors(items, vector_property):\n",
271268
" for item in items:\n",
272-
" #print(f\"generating embedding for item {item}...\")\n",
273-
" vectorArray = generate_embeddings(item['overview'])\n",
274-
" time.sleep(0.1)\n",
269+
" vectorArray = await generate_embeddings(item['overview'])\n",
275270
" item[vector_property] = vectorArray\n",
276271
" return items\n",
277272
"\n",
@@ -282,22 +277,20 @@
282277
" tasks = []\n",
283278
" max_concurrency = 20 # Adjust this value to control the level of concurrency\n",
284279
" semaphore = asyncio.Semaphore(max_concurrency)\n",
285-
" \n",
286-
" await cosmos_async_client.__aenter__()\n",
287280
" print(\"Starting doc load, please wait...\")\n",
288281
" \n",
282+
" def upsert_item_sync(obj):\n",
283+
" movies_container.upsert_item(body=obj)\n",
284+
" \n",
289285
" async def upsert_object(obj):\n",
290286
" nonlocal counter\n",
291287
" async with semaphore:\n",
292-
" #The following code to create vector embeddings for the data is commented out as the sample data is already vectorized.\n",
293-
" #vectorArray = generate_embeddings(obj['overview'])\n",
294-
" #obj[cosmos_vector_property] = vectorArray\n",
295-
" await container.upsert_item(body=obj)\n",
288+
" await asyncio.get_event_loop().run_in_executor(None, upsert_item_sync, obj)\n",
296289
" # Progress reporting\n",
297290
" counter += 1\n",
298291
" if counter % 100 == 0:\n",
299-
" print(f\"Sent {counter} documents for insertion into collection.\") \n",
300-
" \n",
292+
" print(f\"Sent {counter} documents for insertion into collection.\")\n",
293+
" \n",
301294
" for obj in data:\n",
302295
" tasks.append(asyncio.create_task(upsert_object(obj)))\n",
303296
" \n",
@@ -310,29 +303,8 @@
310303
" print(f\"Time taken: {duration:.2f} seconds ({duration:.3f} milliseconds)\")\n",
311304
"\n",
312305
"# Run the async function\n",
313-
"await insert_data()"
314-
]
315-
},
316-
{
317-
"cell_type": "markdown",
318-
"id": "590df42f-5416-4744-9968-5406730ed036",
319-
"metadata": {},
320-
"source": [
321-
"# Set up containers for chat bot"
322-
]
323-
},
324-
{
325-
"cell_type": "code",
326-
"execution_count": null,
327-
"id": "70b0cb9e-14d6-48ef-bca4-76b626572bc3",
328-
"metadata": {},
329-
"outputs": [],
330-
"source": [
331-
"from azure.cosmos import CosmosClient\n",
332-
"cosmos_sync_client = CosmosClient(url=cosmos_conn, credential=cosmos_key)\n",
333-
"db = cosmos_sync_client.get_database_client(cosmos_database)\n",
334-
"movies_container = db.get_container_client(cosmos_collection)\n",
335-
"cache_container = db.get_container_client(cosmos_cache)"
306+
"await insert_data()\n",
307+
" "
336308
]
337309
},
338310
{

0 commit comments

Comments
 (0)