more detail added

redis-phil · redis-phil · commit e2569c6db2d4 · 2025-09-10T12:19:16.000-04:00
diff --git a/python-recipes/semantic-cache/03_context_enabled_semantic_caching.ipynb b/python-recipes/semantic-cache/03_context_enabled_semantic_caching.ipynb
@@ -80,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 1,
    "metadata": {
     "id": "v6g7eVRZAcFA"
    },
@@ -103,20 +103,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "id": "m04KxSuhBiOx"
    },
-   "outputs": [
-    {
-     "ename": "SyntaxError",
-     "evalue": "invalid syntax (2741142086.py, line 3)",
-     "output_type": "error",
-     "traceback": [
-      "  \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[31m    \u001b[39m\u001b[31mcurl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\u001b[39m\n               ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m invalid syntax\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# NBVAL_SKIP\n",
     "%%sh\n",
@@ -154,7 +145,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 2,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -169,7 +160,7 @@
        "True"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -197,40 +188,58 @@
     "redis_client.ping()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Essential Imports\n",
+    "\n",
+    "This cell imports all the key libraries needed for Context-Enabled Semantic Caching:\n",
+    "\n",
+    "**Core AI & ML:**\n",
+    "- `sentence_transformers` - For generating text embeddings using the all-MiniLM-L6-v2 model\n",
+    "- `openai` - Client libraries for both OpenAI and Azure OpenAI APIs\n",
+    "- `tiktoken` - Accurate token counting for cost calculation\n",
+    "\n",
+    "**Redis & Vector Search:**\n",
+    "- `redis` - Direct Redis client for database operations\n",
+    "- `redisvl` - Redis Vector Library for semantic search capabilities\n",
+    "- `SearchIndex` - Vector search index management\n",
+    "- `HFTextVectorizer` - Hugging Face text vectorization utilities\n",
+    "\n",
+    "**Data & Utilities:**\n",
+    "- `pandas` - Data analysis and telemetry reporting\n",
+    "- `numpy` - Numerical operations for vector handling\n",
+    "- `typing` - Type hints for better code clarity\n",
+    "- `dotenv` - Environment variable management for API keys"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\PhilipLaussermair\\Desktop\\Code\\Internal\\sc recipe\\redis-ai-resources\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
        "True"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "import os\n",
-    "\n",
-    "from dotenv import load_dotenv\n",
-    "\n",
-    "# Load environment variables from .env file\n",
-    "# Make sure you have a .env file in the root of this project\n",
-    "load_dotenv()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "id": "XtfiyQ4TEQmN"
-   },
-   "outputs": [],
-   "source": [
     "import time\n",
     "import uuid\n",
     "import numpy as np\n",
@@ -246,9 +255,16 @@
     "import logging\n",
     "import sys\n",
     "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Load environment variables from .env file\n",
+    "# Make sure you have a .env file in the root of this project\n",
+    "\n",
+    "\n",
     "# Suppress noisy loggers\n",
     "logging.getLogger(\"sentence_transformers\").setLevel(logging.WARNING)\n",
-    "logging.getLogger(\"httpx\").setLevel(logging.WARNING)"
+    "logging.getLogger(\"httpx\").setLevel(logging.WARNING)\n",
+    "load_dotenv()"
    ]
   },
   {
@@ -261,14 +277,12 @@
     "\n",
     "- **Priority 1**: OpenAI (if `OPENAI_API_KEY` is present)\n",
     "- **Priority 2**: Azure OpenAI (if `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` are present)  \n",
-    "- **Fallback**: Exit with clear instructions if no credentials found\n",
-    "\n",
-    "This approach ensures the notebook works in both development and CI/CD environments without interactive prompts."
+    "- **Fallback**: Exit with clear instructions if no credentials found"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -346,14 +360,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "12:03:18 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "12:16:59 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -417,7 +431,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -581,7 +595,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {
     "id": "i3LSCGr3E1t8"
    },
@@ -700,7 +714,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {
     "id": "6APF2GQaE3fm"
    },
@@ -856,7 +870,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -873,17 +887,17 @@
       "============================================================\n",
       "🧊 Scenario 1: Plain LLM – cache miss\n",
       "============================================================\n",
-      "First, verify the user's access permissions to ensure they have the appropriate role or rights to view the dashboard. Then, check for any connectivity issues, such as VPN or network problems, and confirm the dashboard service is up and running. If the issue persists, review potential account-specific restrictions or errors.\n",
+      "First, ensure the user has the correct permissions or roles assigned to access the dashboard. Next, verify if there are connectivity issues, incorrect login credentials, or if the dashboard tool is experiencing outages. If everything seems fine, check if their account is active and not locked or expired.\n",
       "\n",
       "============================================================\n",
       "📦 Scenario 2: Semantic Cache Hit – generic, extremely fast, no user memory\n",
       "============================================================\n",
-      "First, verify the user's access permissions to ensure they have the appropriate role or rights to view the dashboard. Then, check for any connectivity issues, such as VPN or network problems, and confirm the dashboard service is up and running. If the issue persists, review potential account-specific restrictions or errors.\n",
+      "First, ensure the user has the correct permissions or roles assigned to access the dashboard. Next, verify if there are connectivity issues, incorrect login credentials, or if the dashboard tool is experiencing outages. If everything seems fine, check if their account is active and not locked or expired.\n",
       "\n",
       "============================================================\n",
       "🧠 Scenario 3: Context-Enabled Semantic Cache Hit – personalized with user memory\n",
       "============================================================\n",
-      "First, check if the user has the 'finance_dashboard_viewer' role correctly assigned, as you've tackled similar issues before. Next, ensure they’re using the latest version of Chrome on macOS and confirm there are no VPN or network disruptions. If problems continue, investigate any SSO-related account restrictions that might be affecting access.\n",
+      "First, check if the user’s 'finance_dashboard_viewer' role is correctly configured to grant access to the dashboard. Since you know that SSO setups can sometimes be tricky, ensure there are no login issues and that the necessary permissions are intact. Lastly, verify that their account is active and not locked, especially after recent troubleshooting efforts.\n",
       "\n"
      ]
     }
@@ -969,7 +983,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 11,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -1025,17 +1039,17 @@
        "      <th>0</th>\n",
        "      <td>user_cold</td>\n",
        "      <td>miss</td>\n",
-       "      <td>1024.90</td>\n",
+       "      <td>1413.52</td>\n",
        "      <td>gpt-4o</td>\n",
        "      <td>25</td>\n",
-       "      <td>59</td>\n",
-       "      <td>84</td>\n",
+       "      <td>56</td>\n",
+       "      <td>81</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>user_nocontext</td>\n",
        "      <td>hit_raw</td>\n",
-       "      <td>15.95</td>\n",
+       "      <td>14.46</td>\n",
        "      <td>cache</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
@@ -1045,26 +1059,26 @@
        "      <th>2</th>\n",
        "      <td>user_withcontext</td>\n",
        "      <td>hit_personalized</td>\n",
-       "      <td>3121.80</td>\n",
+       "      <td>2727.46</td>\n",
        "      <td>gpt-4o-mini</td>\n",
-       "      <td>233</td>\n",
-       "      <td>67</td>\n",
-       "      <td>300</td>\n",
+       "      <td>230</td>\n",
+       "      <td>69</td>\n",
+       "      <td>299</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "            user_id      cache_status  latency_ms response_source  \\\n",
-       "0         user_cold              miss     1024.90          gpt-4o   \n",
-       "1    user_nocontext           hit_raw       15.95           cache   \n",
-       "2  user_withcontext  hit_personalized     3121.80     gpt-4o-mini   \n",
+       "0         user_cold              miss     1413.52          gpt-4o   \n",
+       "1    user_nocontext           hit_raw       14.46           cache   \n",
+       "2  user_withcontext  hit_personalized     2727.46     gpt-4o-mini   \n",
        "\n",
        "   input_tokens  output_tokens  total_tokens  \n",
-       "0            25             59            84  \n",
+       "0            25             56            81  \n",
        "1             0              0             0  \n",
-       "2           233             67           300  "
+       "2           230             69           299  "
       ]
      },
      "metadata": {},
@@ -1075,7 +1089,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "⏱️ Personalized response (user_withcontext) was 2096 ms slower than the plain LLM — a 67.2% slowdown.\n",
+      "⏱️ Personalized response (user_withcontext) was 1313 ms slower than the plain LLM — a 48.2% slowdown.\n",
       "📌 However, it returned a tailored response based on user memory, offering higher relevance.\n",
       "\n",
       "============================================================\n",
@@ -1123,11 +1137,11 @@
        "      <td>miss</td>\n",
        "      <td>gpt-4o</td>\n",
        "      <td>25</td>\n",
-       "      <td>59</td>\n",
-       "      <td>1024.90</td>\n",
-       "      <td>0.00101</td>\n",
-       "      <td>0.00101</td>\n",
-       "      <td>0.00000</td>\n",
+       "      <td>56</td>\n",
+       "      <td>1413.52</td>\n",
+       "      <td>0.000965</td>\n",
+       "      <td>0.000965</td>\n",
+       "      <td>0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1136,22 +1150,22 @@
        "      <td>cache</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
-       "      <td>15.95</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.00000</td>\n",
+       "      <td>14.46</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>user_withcontext</td>\n",
        "      <td>hit_personalized</td>\n",
        "      <td>gpt-4o-mini</td>\n",
-       "      <td>233</td>\n",
-       "      <td>67</td>\n",
-       "      <td>3121.80</td>\n",
-       "      <td>0.00055</td>\n",
-       "      <td>0.00217</td>\n",
-       "      <td>0.00162</td>\n",
+       "      <td>230</td>\n",
+       "      <td>69</td>\n",
+       "      <td>2727.46</td>\n",
+       "      <td>0.000552</td>\n",
+       "      <td>0.002185</td>\n",
+       "      <td>0.001633</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1161,12 +1175,12 @@
        "            user_id      cache_status response_source  input_tokens  \\\n",
        "0         user_cold              miss          gpt-4o            25   \n",
        "1    user_nocontext           hit_raw           cache             0   \n",
-       "2  user_withcontext  hit_personalized     gpt-4o-mini           233   \n",
+       "2  user_withcontext  hit_personalized     gpt-4o-mini           230   \n",
        "\n",
        "   output_tokens  latency_ms  cost_usd  baseline_cost_usd  savings_usd  \n",
-       "0             59     1024.90   0.00101            0.00101      0.00000  \n",
-       "1              0       15.95   0.00000            0.00000      0.00000  \n",
-       "2             67     3121.80   0.00055            0.00217      0.00162  "
+       "0             56     1413.52  0.000965           0.000965     0.000000  \n",
+       "1              0       14.46  0.000000           0.000000     0.000000  \n",
+       "2             69     2727.46  0.000552           0.002185     0.001633  "
       ]
      },
      "metadata": {},
@@ -1180,7 +1194,7 @@
       "🧾 Total Cost of Plain LLM Response: $0.0010\n",
       "🧾 Total Cost of Personalized Response: $0.0006\n",
       "\n",
-      "💡 Personalized response (user_withcontext) was cheaper than plain LLM by $0.0005 — a 45.5% cost improvement.\n"
+      "💡 Personalized response (user_withcontext) was cheaper than plain LLM by $0.0004 — a 42.8% cost improvement.\n"
      ]
     }
    ],