|
54 | 54 | }, |
55 | 55 | { |
56 | 56 | "cell_type": "code", |
57 | | - "execution_count": 2, |
| 57 | + "execution_count": null, |
58 | 58 | "id": "1224b921-17e3-49fd-8abb-63459eeb2c28", |
59 | 59 | "metadata": {}, |
60 | 60 | "outputs": [], |
|
117 | 117 | }, |
118 | 118 | { |
119 | 119 | "cell_type": "code", |
120 | | - "execution_count": 3, |
| 120 | + "execution_count": null, |
121 | 121 | "id": "305807f8-0205-481a-9d71-e8b88b504019", |
122 | 122 | "metadata": {}, |
123 | | - "outputs": [ |
124 | | - { |
125 | | - "name": "stdout", |
126 | | - "output_type": "stream", |
127 | | - "text": [ |
128 | | - "Container with id 'vectorstore' created\n", |
129 | | - "Container with id 'vectorcache' created\n" |
130 | | - ] |
131 | | - } |
132 | | - ], |
| 123 | + "outputs": [], |
133 | 124 | "source": [ |
134 | 125 | "db = cosmos_client.create_database_if_not_exists(cosmos_database)\n", |
135 | 126 | "\n", |
|
206 | 197 | }, |
207 | 198 | { |
208 | 199 | "cell_type": "code", |
209 | | - "execution_count": 4, |
| 200 | + "execution_count": null, |
210 | 201 | "id": "7ccf697d-f12c-4205-8a93-114ff3c3c86e", |
211 | 202 | "metadata": {}, |
212 | 203 | "outputs": [], |
213 | 204 | "source": [ |
214 | | - "from tenacity import retry, stop_after_attempt, wait_random_exponential\n", |
| 205 | + "from tenacity import retry, stop_after_attempt, wait_random_exponential \n", |
| 206 | + "import logging\n", |
215 | 207 | "@retry(wait=wait_random_exponential(min=2, max=300), stop=stop_after_attempt(20))\n", |
216 | 208 | "def generate_embeddings(text):\n", |
217 | | - " \n", |
218 | | - " response = openai_client.embeddings.create(\n", |
219 | | - " input=text,\n", |
220 | | - " model=openai_embeddings_deployment,\n", |
221 | | - " dimensions=openai_embeddings_dimensions\n", |
222 | | - " )\n", |
223 | | - " \n", |
224 | | - " embeddings = response.model_dump()\n", |
225 | | - " return embeddings['data'][0]['embedding']" |
| 209 | + " try: \n", |
| 210 | + " response = openai_client.embeddings.create(\n", |
| 211 | + " input=text,\n", |
| 212 | + " model=openai_embeddings_deployment,\n", |
| 213 | + " dimensions=openai_embeddings_dimensions\n", |
| 214 | + " )\n", |
| 215 | + " embeddings = response.model_dump()\n", |
| 216 | + " return embeddings['data'][0]['embedding']\n", |
| 217 | + " except Exception as e:\n", |
| 218 | + " # Log the exception with traceback for easier debugging\n", |
| 219 | + " logging.error(\"An error occurred while generating embeddings.\", exc_info=True)\n", |
| 220 | + " raise" |
226 | 221 | ] |
227 | 222 | }, |
228 | 223 | { |
|
236 | 231 | }, |
237 | 232 | { |
238 | 233 | "cell_type": "code", |
239 | | - "execution_count": 7, |
| 234 | + "execution_count": null, |
240 | 235 | "id": "efc296c5-82e3-4fc1-bff5-ea62893341f8", |
241 | 236 | "metadata": {}, |
242 | | - "outputs": [ |
243 | | - { |
244 | | - "data": { |
245 | | - "text/plain": [ |
246 | | - "4489" |
247 | | - ] |
248 | | - }, |
249 | | - "execution_count": 7, |
250 | | - "metadata": {}, |
251 | | - "output_type": "execute_result" |
252 | | - } |
253 | | - ], |
| 237 | + "outputs": [], |
254 | 238 | "source": [ |
255 | 239 | "# Unzip the data file\n", |
256 | 240 | "with zipfile.ZipFile(\"../../DataSet/Movies/MovieLens-4489-256D.zip\", 'r') as zip_ref: \n", |
|
275 | 259 | }, |
276 | 260 | { |
277 | 261 | "cell_type": "code", |
278 | | - "execution_count": 8, |
| 262 | + "execution_count": null, |
279 | 263 | "id": "f4555af4-cf1e-483f-a6e0-1d27fc139c8b", |
280 | 264 | "metadata": {}, |
281 | 265 | "outputs": [], |
|
287 | 271 | }, |
288 | 272 | { |
289 | 273 | "cell_type": "code", |
290 | | - "execution_count": 9, |
| 274 | + "execution_count": null, |
291 | 275 | "id": "becc3ad5-851c-4d44-8f6d-52a368d87b83", |
292 | 276 | "metadata": {}, |
293 | | - "outputs": [ |
294 | | - { |
295 | | - "name": "stdout", |
296 | | - "output_type": "stream", |
297 | | - "text": [ |
298 | | - "Starting doc load, please wait...\n", |
299 | | - "Sent 100 documents for insertion into collection.\n", |
300 | | - "Sent 200 documents for insertion into collection.\n", |
301 | | - "Sent 300 documents for insertion into collection.\n", |
302 | | - "Sent 400 documents for insertion into collection.\n", |
303 | | - "Sent 500 documents for insertion into collection.\n", |
304 | | - "Sent 600 documents for insertion into collection.\n", |
305 | | - "Sent 700 documents for insertion into collection.\n", |
306 | | - "Sent 800 documents for insertion into collection.\n", |
307 | | - "Sent 900 documents for insertion into collection.\n", |
308 | | - "Sent 1000 documents for insertion into collection.\n", |
309 | | - "Sent 1100 documents for insertion into collection.\n", |
310 | | - "Sent 1200 documents for insertion into collection.\n", |
311 | | - "Sent 1300 documents for insertion into collection.\n", |
312 | | - "Sent 1400 documents for insertion into collection.\n", |
313 | | - "Sent 1500 documents for insertion into collection.\n", |
314 | | - "Sent 1600 documents for insertion into collection.\n", |
315 | | - "Sent 1700 documents for insertion into collection.\n", |
316 | | - "Sent 1800 documents for insertion into collection.\n", |
317 | | - "Sent 1900 documents for insertion into collection.\n", |
318 | | - "Sent 2000 documents for insertion into collection.\n", |
319 | | - "Sent 2100 documents for insertion into collection.\n", |
320 | | - "Sent 2200 documents for insertion into collection.\n", |
321 | | - "Sent 2300 documents for insertion into collection.\n", |
322 | | - "Sent 2400 documents for insertion into collection.\n", |
323 | | - "Sent 2500 documents for insertion into collection.\n", |
324 | | - "Sent 2600 documents for insertion into collection.\n", |
325 | | - "Sent 2700 documents for insertion into collection.\n", |
326 | | - "Sent 2800 documents for insertion into collection.\n", |
327 | | - "Sent 2900 documents for insertion into collection.\n", |
328 | | - "Sent 3000 documents for insertion into collection.\n", |
329 | | - "Sent 3100 documents for insertion into collection.\n", |
330 | | - "Sent 3200 documents for insertion into collection.\n", |
331 | | - "Sent 3300 documents for insertion into collection.\n", |
332 | | - "Sent 3400 documents for insertion into collection.\n", |
333 | | - "Sent 3500 documents for insertion into collection.\n", |
334 | | - "Sent 3600 documents for insertion into collection.\n", |
335 | | - "Sent 3700 documents for insertion into collection.\n", |
336 | | - "Sent 3800 documents for insertion into collection.\n", |
337 | | - "Sent 3900 documents for insertion into collection.\n", |
338 | | - "Sent 4000 documents for insertion into collection.\n", |
339 | | - "Sent 4100 documents for insertion into collection.\n", |
340 | | - "Sent 4200 documents for insertion into collection.\n", |
341 | | - "Sent 4300 documents for insertion into collection.\n", |
342 | | - "Sent 4400 documents for insertion into collection.\n", |
343 | | - "All 4489 documents inserted!\n", |
344 | | - "Time taken: 92.83 seconds (92.834 milliseconds)\n" |
345 | | - ] |
346 | | - } |
347 | | - ], |
| 277 | + "outputs": [], |
348 | 278 | "source": [ |
349 | 279 | "import asyncio\n", |
350 | 280 | "import time\n", |
|
697 | 627 | "name": "python", |
698 | 628 | "nbconvert_exporter": "python", |
699 | 629 | "pygments_lexer": "ipython3", |
700 | | - "version": "3.11.0" |
| 630 | + "version": "3.12.6" |
701 | 631 | } |
702 | 632 | }, |
703 | 633 | "nbformat": 4, |
|
0 commit comments