|
74 | 74 | "import gradio as gr\n", |
75 | 75 | "\n", |
76 | 76 | "# Cosmos DB imports\n", |
77 | | - "from azure.cosmos.aio import CosmosClient\n", |
| 77 | + "from azure.cosmos import CosmosClient\n", |
78 | 78 | "\n", |
79 | 79 | "# Load configuration\n", |
80 | 80 | "env_name = \"sample_env_file.env\"\n", |
|
89 | 89 | "cosmos_cache = config['cosmos_cache_collection_name']\n", |
90 | 90 | "\n", |
91 | 91 | "# Create the Azure Cosmos DB for NoSQL async client for faster data loading\n", |
92 | | - "cosmos_async_client = CosmosClient(url=cosmos_conn, credential=cosmos_key)\n", |
| 92 | + "cosmos_client = CosmosClient(url=cosmos_conn, credential=cosmos_key)\n", |
93 | 93 | "\n", |
94 | 94 | "openai_endpoint = config['openai_endpoint']\n", |
95 | 95 | "openai_key = config['openai_key']\n", |
|
122 | 122 | "metadata": {}, |
123 | 123 | "outputs": [], |
124 | 124 | "source": [ |
125 | | - "db = await cosmos_async_client.create_database_if_not_exists(cosmos_database)\n", |
| 125 | + "db = cosmos_client.create_database_if_not_exists(cosmos_database)\n", |
126 | 126 | "\n", |
127 | 127 | "# Create the vector embedding policy to specify vector details\n", |
128 | 128 | "vector_embedding_policy = {\n", |
|
146 | 146 | " ]\n", |
147 | 147 | "} \n", |
148 | 148 | "\n", |
149 | | - "\n", |
150 | 149 | "# Create the data collection with vector index (note: this creates a container with 10000 RUs to allow fast data load)\n", |
151 | 150 | "try:\n", |
152 | | - " container = await db.create_container_if_not_exists( id=cosmos_collection, \n", |
| 151 | + " movies_container = db.create_container_if_not_exists(id=cosmos_collection, \n", |
153 | 152 | " partition_key=PartitionKey(path='/id'), \n", |
154 | 153 | " vector_embedding_policy=vector_embedding_policy,\n", |
155 | 154 | " offer_throughput=10000) \n", |
156 | | - " print('Container with id \\'{0}\\' created'.format(id)) \n", |
| 155 | + " print('Container with id \\'{0}\\' created'.format(movies_container.id)) \n", |
157 | 156 | "\n", |
158 | 157 | "except exceptions.CosmosHttpResponseError: \n", |
159 | | - " raise \n", |
| 158 | + " raise \n", |
160 | 159 | "\n", |
161 | 160 | "# Create the cache collection with vector index\n", |
162 | 161 | "try:\n", |
163 | | - " cache_container = await db.create_container_if_not_exists( id=cosmos_cache, \n", |
| 162 | + " cache_container = db.create_container_if_not_exists(id=cosmos_cache, \n", |
164 | 163 | " partition_key=PartitionKey(path='/id'), \n", |
165 | 164 | " indexing_policy=indexing_policy,\n", |
166 | 165 | " vector_embedding_policy=vector_embedding_policy,\n", |
167 | 166 | " offer_throughput=1000) \n", |
168 | | - " print('Container with id \\'{0}\\' created'.format(id)) \n", |
| 167 | + " print('Container with id \\'{0}\\' created'.format(cache_container.id)) \n", |
169 | 168 | "\n", |
170 | 169 | "except exceptions.CosmosHttpResponseError: \n", |
171 | | - " raise " |
| 170 | + " raise" |
172 | 171 | ] |
173 | 172 | }, |
174 | 173 | { |
|
222 | 221 | "outputs": [], |
223 | 222 | "source": [ |
224 | 223 | "# Unzip the data file\n", |
225 | | - "with zipfile.ZipFile(\"../../DataSet/Movies/MovieLens-4489-256D.zip\", 'r') as zip_ref:\n", |
| 224 | + "with zipfile.ZipFile(\"../../DataSet/Movies/MovieLens-4489-256D.zip\", 'r') as zip_ref: \n", |
226 | 225 | " zip_ref.extractall(\"/Data\")\n", |
227 | 226 | "zip_ref.close()\n", |
228 | 227 | "# Load the data file\n", |
|
262 | 261 | "outputs": [], |
263 | 262 | "source": [ |
264 | 263 | "import asyncio\n", |
265 | | - "import nest_asyncio\n", |
266 | | - "import time # Import the time module to measure execution time\n", |
267 | | - "\n", |
268 | | - "nest_asyncio.apply()\n", |
| 264 | + "import time\n", |
| 265 | + "from concurrent.futures import ThreadPoolExecutor\n", |
269 | 266 | "\n", |
270 | | - "def generate_vectors(items, vector_property):\n", |
| 267 | + "async def generate_vectors(items, vector_property):\n", |
271 | 268 | " for item in items:\n", |
272 | | - " #print(f\"generating embedding for item {item}...\")\n", |
273 | | - " vectorArray = generate_embeddings(item['overview'])\n", |
274 | | - " time.sleep(0.1)\n", |
| 269 | + " vectorArray = await generate_embeddings(item['overview'])\n", |
275 | 270 | " item[vector_property] = vectorArray\n", |
276 | 271 | " return items\n", |
277 | 272 | "\n", |
|
282 | 277 | " tasks = []\n", |
283 | 278 | " max_concurrency = 20 # Adjust this value to control the level of concurrency\n", |
284 | 279 | " semaphore = asyncio.Semaphore(max_concurrency)\n", |
285 | | - " \n", |
286 | | - " await cosmos_async_client.__aenter__()\n", |
287 | 280 | " print(\"Starting doc load, please wait...\")\n", |
288 | 281 | " \n", |
| 282 | + " def upsert_item_sync(obj):\n", |
| 283 | + " movies_container.upsert_item(body=obj)\n", |
| 284 | + " \n", |
289 | 285 | " async def upsert_object(obj):\n", |
290 | 286 | " nonlocal counter\n", |
291 | 287 | " async with semaphore:\n", |
292 | | - " #The following code to create vector embeddings for the data is commented out as the sample data is already vectorized.\n", |
293 | | - " #vectorArray = generate_embeddings(obj['overview'])\n", |
294 | | - " #obj[cosmos_vector_property] = vectorArray\n", |
295 | | - " await container.upsert_item(body=obj)\n", |
| 288 | + " await asyncio.get_event_loop().run_in_executor(None, upsert_item_sync, obj)\n", |
296 | 289 | " # Progress reporting\n", |
297 | 290 | " counter += 1\n", |
298 | 291 | " if counter % 100 == 0:\n", |
299 | | - " print(f\"Sent {counter} documents for insertion into collection.\") \n", |
300 | | - " \n", |
| 292 | + " print(f\"Sent {counter} documents for insertion into collection.\")\n", |
| 293 | + " \n", |
301 | 294 | " for obj in data:\n", |
302 | 295 | " tasks.append(asyncio.create_task(upsert_object(obj)))\n", |
303 | 296 | " \n", |
|
310 | 303 | " print(f\"Time taken: {duration:.2f} seconds ({duration:.3f} milliseconds)\")\n", |
311 | 304 | "\n", |
312 | 305 | "# Run the async function\n", |
313 | | - "await insert_data()" |
314 | | - ] |
315 | | - }, |
316 | | - { |
317 | | - "cell_type": "markdown", |
318 | | - "id": "590df42f-5416-4744-9968-5406730ed036", |
319 | | - "metadata": {}, |
320 | | - "source": [ |
321 | | - "# Set up containers for chat bot" |
322 | | - ] |
323 | | - }, |
324 | | - { |
325 | | - "cell_type": "code", |
326 | | - "execution_count": null, |
327 | | - "id": "70b0cb9e-14d6-48ef-bca4-76b626572bc3", |
328 | | - "metadata": {}, |
329 | | - "outputs": [], |
330 | | - "source": [ |
331 | | - "from azure.cosmos import CosmosClient\n", |
332 | | - "cosmos_sync_client = CosmosClient(url=cosmos_conn, credential=cosmos_key)\n", |
333 | | - "db = cosmos_sync_client.get_database_client(cosmos_database)\n", |
334 | | - "movies_container = db.get_container_client(cosmos_collection)\n", |
335 | | - "cache_container = db.get_container_client(cosmos_cache)" |
| 306 | + "await insert_data()\n", |
| 307 | + " " |
336 | 308 | ] |
337 | 309 | }, |
338 | 310 | { |
|
0 commit comments