Skip to content

Commit e2569c6

Browse files
committed
more detail added
1 parent 51a90f4 commit e2569c6

File tree

1 file changed

+95
-81
lines changed

1 file changed

+95
-81
lines changed

python-recipes/semantic-cache/03_context_enabled_semantic_caching.ipynb

Lines changed: 95 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
},
8181
{
8282
"cell_type": "code",
83-
"execution_count": 12,
83+
"execution_count": 1,
8484
"metadata": {
8585
"id": "v6g7eVRZAcFA"
8686
},
@@ -103,20 +103,11 @@
103103
},
104104
{
105105
"cell_type": "code",
106-
"execution_count": 2,
106+
"execution_count": null,
107107
"metadata": {
108108
"id": "m04KxSuhBiOx"
109109
},
110-
"outputs": [
111-
{
112-
"ename": "SyntaxError",
113-
"evalue": "invalid syntax (2741142086.py, line 3)",
114-
"output_type": "error",
115-
"traceback": [
116-
" \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[31m \u001b[39m\u001b[31mcurl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\u001b[39m\n ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m invalid syntax\n"
117-
]
118-
}
119-
],
110+
"outputs": [],
120111
"source": [
121112
"# NBVAL_SKIP\n",
122113
"%%sh\n",
@@ -154,7 +145,7 @@
154145
},
155146
{
156147
"cell_type": "code",
157-
"execution_count": 13,
148+
"execution_count": 2,
158149
"metadata": {
159150
"colab": {
160151
"base_uri": "https://localhost:8080/"
@@ -169,7 +160,7 @@
169160
"True"
170161
]
171162
},
172-
"execution_count": 13,
163+
"execution_count": 2,
173164
"metadata": {},
174165
"output_type": "execute_result"
175166
}
@@ -197,40 +188,58 @@
197188
"redis_client.ping()"
198189
]
199190
},
191+
{
192+
"cell_type": "markdown",
193+
"metadata": {},
194+
"source": [
195+
"## Essential Imports\n",
196+
"\n",
197+
"This cell imports all the key libraries needed for Context-Enabled Semantic Caching:\n",
198+
"\n",
199+
"**Core AI & ML:**\n",
200+
"- `sentence_transformers` - For generating text embeddings using the all-MiniLM-L6-v2 model\n",
201+
"- `openai` - Client libraries for both OpenAI and Azure OpenAI APIs\n",
202+
"- `tiktoken` - Accurate token counting for cost calculation\n",
203+
"\n",
204+
"**Redis & Vector Search:**\n",
205+
"- `redis` - Direct Redis client for database operations\n",
206+
"- `redisvl` - Redis Vector Library for semantic search capabilities\n",
207+
"- `SearchIndex` - Vector search index management\n",
208+
"- `HFTextVectorizer` - Hugging Face text vectorization utilities\n",
209+
"\n",
210+
"**Data & Utilities:**\n",
211+
"- `pandas` - Data analysis and telemetry reporting\n",
212+
"- `numpy` - Numerical operations for vector handling\n",
213+
"- `typing` - Type hints for better code clarity\n",
214+
"- `dotenv` - Environment variable management for API keys"
215+
]
216+
},
200217
{
201218
"cell_type": "code",
202-
"execution_count": 20,
219+
"execution_count": 3,
203220
"metadata": {},
204221
"outputs": [
222+
{
223+
"name": "stderr",
224+
"output_type": "stream",
225+
"text": [
226+
"c:\\Users\\PhilipLaussermair\\Desktop\\Code\\Internal\\sc recipe\\redis-ai-resources\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
227+
" from .autonotebook import tqdm as notebook_tqdm\n"
228+
]
229+
},
205230
{
206231
"data": {
207232
"text/plain": [
208233
"True"
209234
]
210235
},
211-
"execution_count": 20,
236+
"execution_count": 3,
212237
"metadata": {},
213238
"output_type": "execute_result"
214239
}
215240
],
216241
"source": [
217242
"import os\n",
218-
"\n",
219-
"from dotenv import load_dotenv\n",
220-
"\n",
221-
"# Load environment variables from .env file\n",
222-
"# Make sure you have a .env file in the root of this project\n",
223-
"load_dotenv()"
224-
]
225-
},
226-
{
227-
"cell_type": "code",
228-
"execution_count": 21,
229-
"metadata": {
230-
"id": "XtfiyQ4TEQmN"
231-
},
232-
"outputs": [],
233-
"source": [
234243
"import time\n",
235244
"import uuid\n",
236245
"import numpy as np\n",
@@ -246,9 +255,16 @@
246255
"import logging\n",
247256
"import sys\n",
248257
"\n",
258+
"from dotenv import load_dotenv\n",
259+
"\n",
260+
"# Load environment variables from .env file\n",
261+
"# Make sure you have a .env file in the root of this project\n",
262+
"\n",
263+
"\n",
249264
"# Suppress noisy loggers\n",
250265
"logging.getLogger(\"sentence_transformers\").setLevel(logging.WARNING)\n",
251-
"logging.getLogger(\"httpx\").setLevel(logging.WARNING)"
266+
"logging.getLogger(\"httpx\").setLevel(logging.WARNING)\n",
267+
"load_dotenv()"
252268
]
253269
},
254270
{
@@ -261,14 +277,12 @@
261277
"\n",
262278
"- **Priority 1**: OpenAI (if `OPENAI_API_KEY` is present)\n",
263279
"- **Priority 2**: Azure OpenAI (if `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` are present) \n",
264-
"- **Fallback**: Exit with clear instructions if no credentials found\n",
265-
"\n",
266-
"This approach ensures the notebook works in both development and CI/CD environments without interactive prompts."
280+
"- **Fallback**: Exit with clear instructions if no credentials found"
267281
]
268282
},
269283
{
270284
"cell_type": "code",
271-
"execution_count": 22,
285+
"execution_count": 4,
272286
"metadata": {},
273287
"outputs": [
274288
{
@@ -346,14 +360,14 @@
346360
},
347361
{
348362
"cell_type": "code",
349-
"execution_count": 23,
363+
"execution_count": 5,
350364
"metadata": {},
351365
"outputs": [
352366
{
353367
"name": "stdout",
354368
"output_type": "stream",
355369
"text": [
356-
"12:03:18 redisvl.index.index INFO Index already exists, overwriting.\n"
370+
"12:16:59 redisvl.index.index INFO Index already exists, overwriting.\n"
357371
]
358372
}
359373
],
@@ -417,7 +431,7 @@
417431
},
418432
{
419433
"cell_type": "code",
420-
"execution_count": 24,
434+
"execution_count": 7,
421435
"metadata": {},
422436
"outputs": [],
423437
"source": [
@@ -581,7 +595,7 @@
581595
},
582596
{
583597
"cell_type": "code",
584-
"execution_count": null,
598+
"execution_count": 8,
585599
"metadata": {
586600
"id": "i3LSCGr3E1t8"
587601
},
@@ -700,7 +714,7 @@
700714
},
701715
{
702716
"cell_type": "code",
703-
"execution_count": null,
717+
"execution_count": 9,
704718
"metadata": {
705719
"id": "6APF2GQaE3fm"
706720
},
@@ -856,7 +870,7 @@
856870
},
857871
{
858872
"cell_type": "code",
859-
"execution_count": null,
873+
"execution_count": 10,
860874
"metadata": {
861875
"colab": {
862876
"base_uri": "https://localhost:8080/"
@@ -873,17 +887,17 @@
873887
"============================================================\n",
874888
"🧊 Scenario 1: Plain LLM – cache miss\n",
875889
"============================================================\n",
876-
"First, verify the user's access permissions to ensure they have the appropriate role or rights to view the dashboard. Then, check for any connectivity issues, such as VPN or network problems, and confirm the dashboard service is up and running. If the issue persists, review potential account-specific restrictions or errors.\n",
890+
"First, ensure the user has the correct permissions or roles assigned to access the dashboard. Next, verify if there are connectivity issues, incorrect login credentials, or if the dashboard tool is experiencing outages. If everything seems fine, check if their account is active and not locked or expired.\n",
877891
"\n",
878892
"============================================================\n",
879893
"📦 Scenario 2: Semantic Cache Hit – generic, extremely fast, no user memory\n",
880894
"============================================================\n",
881-
"First, verify the user's access permissions to ensure they have the appropriate role or rights to view the dashboard. Then, check for any connectivity issues, such as VPN or network problems, and confirm the dashboard service is up and running. If the issue persists, review potential account-specific restrictions or errors.\n",
895+
"First, ensure the user has the correct permissions or roles assigned to access the dashboard. Next, verify if there are connectivity issues, incorrect login credentials, or if the dashboard tool is experiencing outages. If everything seems fine, check if their account is active and not locked or expired.\n",
882896
"\n",
883897
"============================================================\n",
884898
"🧠 Scenario 3: Context-Enabled Semantic Cache Hit – personalized with user memory\n",
885899
"============================================================\n",
886-
"First, check if the user has the 'finance_dashboard_viewer' role correctly assigned, as you've tackled similar issues before. Next, ensure they’re using the latest version of Chrome on macOS and confirm there are no VPN or network disruptions. If problems continue, investigate any SSO-related account restrictions that might be affecting access.\n",
900+
"First, check if the user’s 'finance_dashboard_viewer' role is correctly configured to grant access to the dashboard. Since you know that SSO setups can sometimes be tricky, ensure there are no login issues and that the necessary permissions are intact. Lastly, verify that their account is active and not locked, especially after recent troubleshooting efforts.\n",
887901
"\n"
888902
]
889903
}
@@ -969,7 +983,7 @@
969983
},
970984
{
971985
"cell_type": "code",
972-
"execution_count": 28,
986+
"execution_count": 11,
973987
"metadata": {
974988
"colab": {
975989
"base_uri": "https://localhost:8080/",
@@ -1025,17 +1039,17 @@
10251039
" <th>0</th>\n",
10261040
" <td>user_cold</td>\n",
10271041
" <td>miss</td>\n",
1028-
" <td>1024.90</td>\n",
1042+
" <td>1413.52</td>\n",
10291043
" <td>gpt-4o</td>\n",
10301044
" <td>25</td>\n",
1031-
" <td>59</td>\n",
1032-
" <td>84</td>\n",
1045+
" <td>56</td>\n",
1046+
" <td>81</td>\n",
10331047
" </tr>\n",
10341048
" <tr>\n",
10351049
" <th>1</th>\n",
10361050
" <td>user_nocontext</td>\n",
10371051
" <td>hit_raw</td>\n",
1038-
" <td>15.95</td>\n",
1052+
" <td>14.46</td>\n",
10391053
" <td>cache</td>\n",
10401054
" <td>0</td>\n",
10411055
" <td>0</td>\n",
@@ -1045,26 +1059,26 @@
10451059
" <th>2</th>\n",
10461060
" <td>user_withcontext</td>\n",
10471061
" <td>hit_personalized</td>\n",
1048-
" <td>3121.80</td>\n",
1062+
" <td>2727.46</td>\n",
10491063
" <td>gpt-4o-mini</td>\n",
1050-
" <td>233</td>\n",
1051-
" <td>67</td>\n",
1052-
" <td>300</td>\n",
1064+
" <td>230</td>\n",
1065+
" <td>69</td>\n",
1066+
" <td>299</td>\n",
10531067
" </tr>\n",
10541068
" </tbody>\n",
10551069
"</table>\n",
10561070
"</div>"
10571071
],
10581072
"text/plain": [
10591073
" user_id cache_status latency_ms response_source \\\n",
1060-
"0 user_cold miss 1024.90 gpt-4o \n",
1061-
"1 user_nocontext hit_raw 15.95 cache \n",
1062-
"2 user_withcontext hit_personalized 3121.80 gpt-4o-mini \n",
1074+
"0 user_cold miss 1413.52 gpt-4o \n",
1075+
"1 user_nocontext hit_raw 14.46 cache \n",
1076+
"2 user_withcontext hit_personalized 2727.46 gpt-4o-mini \n",
10631077
"\n",
10641078
" input_tokens output_tokens total_tokens \n",
1065-
"0 25 59 84 \n",
1079+
"0 25 56 81 \n",
10661080
"1 0 0 0 \n",
1067-
"2 233 67 300 "
1081+
"2 230 69 299 "
10681082
]
10691083
},
10701084
"metadata": {},
@@ -1075,7 +1089,7 @@
10751089
"output_type": "stream",
10761090
"text": [
10771091
"\n",
1078-
"⏱️ Personalized response (user_withcontext) was 2096 ms slower than the plain LLM — a 67.2% slowdown.\n",
1092+
"⏱️ Personalized response (user_withcontext) was 1313 ms slower than the plain LLM — a 48.2% slowdown.\n",
10791093
"📌 However, it returned a tailored response based on user memory, offering higher relevance.\n",
10801094
"\n",
10811095
"============================================================\n",
@@ -1123,11 +1137,11 @@
11231137
" <td>miss</td>\n",
11241138
" <td>gpt-4o</td>\n",
11251139
" <td>25</td>\n",
1126-
" <td>59</td>\n",
1127-
" <td>1024.90</td>\n",
1128-
" <td>0.00101</td>\n",
1129-
" <td>0.00101</td>\n",
1130-
" <td>0.00000</td>\n",
1140+
" <td>56</td>\n",
1141+
" <td>1413.52</td>\n",
1142+
" <td>0.000965</td>\n",
1143+
" <td>0.000965</td>\n",
1144+
" <td>0.000000</td>\n",
11311145
" </tr>\n",
11321146
" <tr>\n",
11331147
" <th>1</th>\n",
@@ -1136,22 +1150,22 @@
11361150
" <td>cache</td>\n",
11371151
" <td>0</td>\n",
11381152
" <td>0</td>\n",
1139-
" <td>15.95</td>\n",
1140-
" <td>0.00000</td>\n",
1141-
" <td>0.00000</td>\n",
1142-
" <td>0.00000</td>\n",
1153+
" <td>14.46</td>\n",
1154+
" <td>0.000000</td>\n",
1155+
" <td>0.000000</td>\n",
1156+
" <td>0.000000</td>\n",
11431157
" </tr>\n",
11441158
" <tr>\n",
11451159
" <th>2</th>\n",
11461160
" <td>user_withcontext</td>\n",
11471161
" <td>hit_personalized</td>\n",
11481162
" <td>gpt-4o-mini</td>\n",
1149-
" <td>233</td>\n",
1150-
" <td>67</td>\n",
1151-
" <td>3121.80</td>\n",
1152-
" <td>0.00055</td>\n",
1153-
" <td>0.00217</td>\n",
1154-
" <td>0.00162</td>\n",
1163+
" <td>230</td>\n",
1164+
" <td>69</td>\n",
1165+
" <td>2727.46</td>\n",
1166+
" <td>0.000552</td>\n",
1167+
" <td>0.002185</td>\n",
1168+
" <td>0.001633</td>\n",
11551169
" </tr>\n",
11561170
" </tbody>\n",
11571171
"</table>\n",
@@ -1161,12 +1175,12 @@
11611175
" user_id cache_status response_source input_tokens \\\n",
11621176
"0 user_cold miss gpt-4o 25 \n",
11631177
"1 user_nocontext hit_raw cache 0 \n",
1164-
"2 user_withcontext hit_personalized gpt-4o-mini 233 \n",
1178+
"2 user_withcontext hit_personalized gpt-4o-mini 230 \n",
11651179
"\n",
11661180
" output_tokens latency_ms cost_usd baseline_cost_usd savings_usd \n",
1167-
"0 59 1024.90 0.00101 0.00101 0.00000 \n",
1168-
"1 0 15.95 0.00000 0.00000 0.00000 \n",
1169-
"2 67 3121.80 0.00055 0.00217 0.00162 "
1181+
"0 56 1413.52 0.000965 0.000965 0.000000 \n",
1182+
"1 0 14.46 0.000000 0.000000 0.000000 \n",
1183+
"2 69 2727.46 0.000552 0.002185 0.001633 "
11701184
]
11711185
},
11721186
"metadata": {},
@@ -1180,7 +1194,7 @@
11801194
"🧾 Total Cost of Plain LLM Response: $0.0010\n",
11811195
"🧾 Total Cost of Personalized Response: $0.0006\n",
11821196
"\n",
1183-
"💡 Personalized response (user_withcontext) was cheaper than plain LLM by $0.0005 — a 45.5% cost improvement.\n"
1197+
"💡 Personalized response (user_withcontext) was cheaper than plain LLM by $0.0004 — a 42.8% cost improvement.\n"
11841198
]
11851199
}
11861200
],

0 commit comments

Comments
 (0)