Merge pull request #63 from TheovanKraay/add-debug-rag-python-sample

TheovanKraay · web-flow · commit ecc47028ec4c · 2024-09-20T16:39:31.000+01:00
add debug to generate_embeddings and remove cell outputs
diff --git a/Python/CosmosDB-NoSQL_VectorSearch/CosmosDB-NoSQL-Quickstart-RAG-Chatbot.ipynb b/Python/CosmosDB-NoSQL_VectorSearch/CosmosDB-NoSQL-Quickstart-RAG-Chatbot.ipynb
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "1224b921-17e3-49fd-8abb-63459eeb2c28",
    "metadata": {},
    "outputs": [],
@@ -117,19 +117,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "305807f8-0205-481a-9d71-e8b88b504019",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Container with id 'vectorstore' created\n",
-      "Container with id 'vectorcache' created\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "db = cosmos_client.create_database_if_not_exists(cosmos_database)\n",
     "\n",
@@ -206,23 +197,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "7ccf697d-f12c-4205-8a93-114ff3c3c86e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from tenacity import retry, stop_after_attempt, wait_random_exponential\n",
+    "from tenacity import retry, stop_after_attempt, wait_random_exponential \n",
+    "import logging\n",
     "@retry(wait=wait_random_exponential(min=2, max=300), stop=stop_after_attempt(20))\n",
     "def generate_embeddings(text):\n",
-    "    \n",
-    "    response = openai_client.embeddings.create(\n",
-    "        input=text,\n",
-    "        model=openai_embeddings_deployment,\n",
-    "        dimensions=openai_embeddings_dimensions\n",
-    "    )\n",
-    "    \n",
-    "    embeddings = response.model_dump()\n",
-    "    return embeddings['data'][0]['embedding']"
+    "    try:        \n",
+    "        response = openai_client.embeddings.create(\n",
+    "            input=text,\n",
+    "            model=openai_embeddings_deployment,\n",
+    "            dimensions=openai_embeddings_dimensions\n",
+    "        )\n",
+    "        embeddings = response.model_dump()\n",
+    "        return embeddings['data'][0]['embedding']\n",
+    "    except Exception as e:\n",
+    "        # Log the exception with traceback for easier debugging\n",
+    "        logging.error(\"An error occurred while generating embeddings.\", exc_info=True)\n",
+    "        raise"
    ]
   },
   {
@@ -236,21 +231,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "efc296c5-82e3-4fc1-bff5-ea62893341f8",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "4489"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Unzip the data file\n",
     "with zipfile.ZipFile(\"../../DataSet/Movies/MovieLens-4489-256D.zip\", 'r') as zip_ref: \n",
@@ -275,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "f4555af4-cf1e-483f-a6e0-1d27fc139c8b",
    "metadata": {},
    "outputs": [],
@@ -287,64 +271,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "becc3ad5-851c-4d44-8f6d-52a368d87b83",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Starting doc load, please wait...\n",
-      "Sent 100 documents for insertion into collection.\n",
-      "Sent 200 documents for insertion into collection.\n",
-      "Sent 300 documents for insertion into collection.\n",
-      "Sent 400 documents for insertion into collection.\n",
-      "Sent 500 documents for insertion into collection.\n",
-      "Sent 600 documents for insertion into collection.\n",
-      "Sent 700 documents for insertion into collection.\n",
-      "Sent 800 documents for insertion into collection.\n",
-      "Sent 900 documents for insertion into collection.\n",
-      "Sent 1000 documents for insertion into collection.\n",
-      "Sent 1100 documents for insertion into collection.\n",
-      "Sent 1200 documents for insertion into collection.\n",
-      "Sent 1300 documents for insertion into collection.\n",
-      "Sent 1400 documents for insertion into collection.\n",
-      "Sent 1500 documents for insertion into collection.\n",
-      "Sent 1600 documents for insertion into collection.\n",
-      "Sent 1700 documents for insertion into collection.\n",
-      "Sent 1800 documents for insertion into collection.\n",
-      "Sent 1900 documents for insertion into collection.\n",
-      "Sent 2000 documents for insertion into collection.\n",
-      "Sent 2100 documents for insertion into collection.\n",
-      "Sent 2200 documents for insertion into collection.\n",
-      "Sent 2300 documents for insertion into collection.\n",
-      "Sent 2400 documents for insertion into collection.\n",
-      "Sent 2500 documents for insertion into collection.\n",
-      "Sent 2600 documents for insertion into collection.\n",
-      "Sent 2700 documents for insertion into collection.\n",
-      "Sent 2800 documents for insertion into collection.\n",
-      "Sent 2900 documents for insertion into collection.\n",
-      "Sent 3000 documents for insertion into collection.\n",
-      "Sent 3100 documents for insertion into collection.\n",
-      "Sent 3200 documents for insertion into collection.\n",
-      "Sent 3300 documents for insertion into collection.\n",
-      "Sent 3400 documents for insertion into collection.\n",
-      "Sent 3500 documents for insertion into collection.\n",
-      "Sent 3600 documents for insertion into collection.\n",
-      "Sent 3700 documents for insertion into collection.\n",
-      "Sent 3800 documents for insertion into collection.\n",
-      "Sent 3900 documents for insertion into collection.\n",
-      "Sent 4000 documents for insertion into collection.\n",
-      "Sent 4100 documents for insertion into collection.\n",
-      "Sent 4200 documents for insertion into collection.\n",
-      "Sent 4300 documents for insertion into collection.\n",
-      "Sent 4400 documents for insertion into collection.\n",
-      "All 4489 documents inserted!\n",
-      "Time taken: 92.83 seconds (92.834 milliseconds)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import asyncio\n",
     "import time\n",
@@ -697,7 +627,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,