updates for notebook

rbs333 · rbs333 · commit 18ff1008c5a4 · 2025-03-04T09:34:22.000-05:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -105,7 +105,11 @@ jobs:
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
         run: |
           docker run -d --name redis -p 6379:6379 redis/redis-stack-server:latest
-          make test-notebooks 
+          if [[ "${{ matrix.python-version }}" > "3.9" ]]; then
+            make test-notebooks
+          else
+            poetry run test-notebooks --ignore ./docs/user_guide/09_threshold_optimization.ipynb
+          fi
 
   docs:
     runs-on: ubuntu-latest
diff --git a/README.md b/README.md
@@ -225,6 +225,8 @@ embeddings = co.embed_many(
 ### Threshold Optimization
 [Optimize distance thresholds for cache and router](https://docs.redisvl.com/en/stable/user_guide/09_threshold_optimization.html) with the utility `ThresholdOptimizer` classes.
 
+**Note:** only available for `python > 3.9`.
+
 
 
 ## 💫 Extensions
diff --git a/docs/user_guide/09_threshold_optimization.ipynb b/docs/user_guide/09_threshold_optimization.ipynb
@@ -6,7 +6,9 @@
    "source": [
     "# Threshold Optimization\n",
     "\n",
-    "After setting up `SemanticRouter` or `SemanticCache` it best to tune the `distance_threshold` to get the best performance out of your system. RedisVL provides helper classes to make this light weight optimization easy.\n",
+    "After setting up `SemanticRouter` or `SemanticCache` it's best to tune the `distance_threshold` to get the most performance out of your system. RedisVL provides helper classes to make this light weight optimization easy.\n",
+    "\n",
+    "> **Note:** Threshold optimization relies on `python > 3.9.`\n",
     "\n",
     "# CacheThresholdOptimizer\n",
     "\n",
@@ -53,8 +55,8 @@
        "  'prompt': 'what is the capital of france?',\n",
        "  'response': 'paris',\n",
        "  'vector_distance': 0.421104669571,\n",
-       "  'inserted_at': 1741033054.9,\n",
-       "  'updated_at': 1741033054.9,\n",
+       "  'inserted_at': 1741039231.99,\n",
+       "  'updated_at': 1741039231.99,\n",
        "  'key': 'sem_cache:c990cc06e5e77570e5f03360426d2b7f947cbb5a67daa8af8164bfe0b3e24fe3'}]"
       ]
      },
@@ -112,7 +114,7 @@
     }
    ],
    "source": [
-    "from redisvl.utils.threshold_optimizer.cache import CacheThresholdOptimizer\n",
+    "from redisvl.utils.optimize import CacheThresholdOptimizer\n",
     "\n",
     "test_data = [\n",
     "    {\n",
@@ -181,8 +183,8 @@
        "  'prompt': 'what is the capital of france?',\n",
        "  'response': 'paris',\n",
        "  'vector_distance': 0.0835866332054,\n",
-       "  'inserted_at': 1741033054.9,\n",
-       "  'updated_at': 1741033054.9,\n",
+       "  'inserted_at': 1741039231.99,\n",
+       "  'updated_at': 1741039231.99,\n",
        "  'key': 'sem_cache:c990cc06e5e77570e5f03360426d2b7f947cbb5a67daa8af8164bfe0b3e24fe3'}]"
       ]
      },
@@ -331,13 +333,13 @@
      "text": [
       "Route thresholds before: {'greeting': 0.5, 'farewell': 0.5} \n",
       "\n",
-      "Eval metric F1: start 0.438, end 0.562 \n",
-      "Ending thresholds: {'greeting': 0.09239303792843903, 'farewell': 0.6535353535353534}\n"
+      "Eval metric F1: start 0.438, end 0.719 \n",
+      "Ending thresholds: {'greeting': 1.0858585858585856, 'farewell': 0.5545454545454545}\n"
      ]
     }
    ],
    "source": [
-    "from redisvl.utils.threshold_optimizer.router import RouterThresholdOptimizer\n",
+    "from redisvl.utils.optimize import RouterThresholdOptimizer\n",
     "\n",
     "print(f\"Route thresholds before: {router.route_thresholds} \\n\")\n",
     "optimizer = RouterThresholdOptimizer(router, test_data)\n",
diff --git a/docs/user_guide/router.yaml b/docs/user_guide/router.yaml
@@ -7,7 +7,7 @@ routes:
   - what's trending in tech?
   metadata:
     category: tech
-    priority: '1'
+    priority: 1
   distance_threshold: 1.0
 - name: sports
   references:
@@ -18,7 +18,7 @@ routes:
   - basketball and football
   metadata:
     category: sports
-    priority: '2'
+    priority: 2
   distance_threshold: 0.5
 - name: entertainment
   references:
@@ -27,12 +27,11 @@ routes:
   - what's new in the entertainment industry?
   metadata:
     category: entertainment
-    priority: '3'
+    priority: 3
   distance_threshold: 0.7
 vectorizer:
   type: hf
   model: sentence-transformers/all-mpnet-base-v2
 routing_config:
-  distance_threshold: 0.5
   max_k: 3
   aggregation_method: min
diff --git a/redisvl/utils/optimize/__init__.py b/redisvl/utils/optimize/__init__.py
@@ -0,0 +1,12 @@
+from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
+from redisvl.utils.optimize.cache import CacheThresholdOptimizer
+from redisvl.utils.optimize.router import RouterThresholdOptimizer
+from redisvl.utils.optimize.schema import TestData
+
+__all__ = [
+    "CacheThresholdOptimizer",
+    "RouterThresholdOptimizer",
+    "EvalMetric",
+    "BaseThresholdOptimizer",
+    "TestData",
+]
diff --git a/redisvl/utils/optimize/base.py b/redisvl/utils/optimize/base.py
@@ -2,29 +2,16 @@
 from enum import Enum
 from typing import Any, Callable, Dict, List, TypeVar
 
-from redisvl.utils.threshold_optimizer.utils import _validate_test_dict
+from redisvl.utils.optimize.utils import _validate_test_dict
 
 
-class EvalMetric(Enum):
+class EvalMetric(str, Enum):
     """Evaluation metrics for threshold optimization."""
 
     F1 = "f1"
     PRECISION = "precision"
     RECALL = "recall"
 
-    def __str__(self) -> str:
-        return self.value
-
-    @classmethod
-    def from_string(cls, metric: str) -> "EvalMetric":
-        """Convert string to EvalMetric enum."""
-        try:
-            return cls(metric.lower())
-        except ValueError:
-            raise ValueError(
-                f"Invalid metric: {metric}. Valid options are: {', '.join(m.value for m in cls)}"
-            )
-
 
 T = TypeVar("T")  # Type variable for the optimizable object (Cache or Router)
 
diff --git a/redisvl/utils/optimize/cache.py b/redisvl/utils/optimize/cache.py
@@ -5,26 +5,26 @@
 
 from redisvl.extensions.llmcache.semantic import SemanticCache
 from redisvl.query import RangeQuery
-from redisvl.utils.threshold_optimizer.base import BaseThresholdOptimizer, EvalMetric
-from redisvl.utils.threshold_optimizer.schema import TestData
-from redisvl.utils.threshold_optimizer.utils import NULL_RESPONSE_KEY, _format_qrels
+from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
+from redisvl.utils.optimize.schema import TestData
+from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 
 
 def _generate_run_cache(test_data: List[TestData], threshold: float) -> Run:
     """Format observed data for evaluation with ranx"""
     run_dict: Dict[str, Dict[str, int]] = {}
 
     for td in test_data:
-        run_dict[td.q_id] = {}
+        run_dict[td.id] = {}
         for res in td.response:
             if float(res["vector_distance"]) < threshold:
                 # value of 1 is irrelevant checks only on match for f1
-                run_dict[td.q_id][res["id"]] = 1
+                run_dict[td.id][res["id"]] = 1
 
-        if not run_dict[td.q_id]:
+        if not run_dict[td.id]:
             # ranx is a little odd in that if there are no matches it errors
             # if however there are no keys that match you get the correct score
-            run_dict[td.q_id][NULL_RESPONSE_KEY] = 1
+            run_dict[td.id][NULL_RESPONSE_KEY] = 1
 
     return Run(run_dict)
 
diff --git a/redisvl/utils/optimize/router.py b/redisvl/utils/optimize/router.py
@@ -5,22 +5,22 @@
 from ranx import Qrels, Run, evaluate
 
 from redisvl.extensions.router.semantic import SemanticRouter
-from redisvl.utils.threshold_optimizer.base import BaseThresholdOptimizer, EvalMetric
-from redisvl.utils.threshold_optimizer.schema import TestData
-from redisvl.utils.threshold_optimizer.utils import NULL_RESPONSE_KEY, _format_qrels
+from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
+from redisvl.utils.optimize.schema import TestData
+from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 
 
 def _generate_run_router(test_data: List[TestData], router: SemanticRouter) -> Run:
     """Format router results into format for ranx Run"""
     run_dict: Dict[Any, Any] = {}
 
     for td in test_data:
-        run_dict[td.q_id] = {}
+        run_dict[td.id] = {}
         route_match = router(td.query)
         if route_match and route_match.name == td.query_match:
-            run_dict[td.q_id][td.query_match] = 1
+            run_dict[td.id][td.query_match] = 1
         else:
-            run_dict[td.q_id][NULL_RESPONSE_KEY] = 1
+            run_dict[td.id][NULL_RESPONSE_KEY] = 1
 
     return Run(run_dict)
 
diff --git a/redisvl/utils/optimize/schema.py b/redisvl/utils/optimize/schema.py
@@ -5,7 +5,7 @@
 
 
 class TestData(BaseModel):
-    q_id: str = Field(default_factory=lambda: str(ULID()))
+    id: str = Field(default_factory=lambda: str(ULID()))
     query: str
     query_match: Optional[str]
     response: List[dict] = []
diff --git a/redisvl/utils/optimize/utils.py b/redisvl/utils/optimize/utils.py
@@ -2,7 +2,7 @@
 
 from ranx import Qrels
 
-from redisvl.utils.threshold_optimizer.schema import TestData
+from redisvl.utils.optimize.schema import TestData
 
 NULL_RESPONSE_KEY = "no_match"
 
@@ -13,10 +13,10 @@ def _format_qrels(test_data: List[TestData]) -> Qrels:
 
     for td in test_data:
         if td.query_match:
-            qrels_dict[td.q_id] = {td.query_match: 1}
+            qrels_dict[td.id] = {td.query_match: 1}
         else:
             # This is for capturing true negatives from test set
-            qrels_dict[td.q_id] = {NULL_RESPONSE_KEY: 1}
+            qrels_dict[td.id] = {NULL_RESPONSE_KEY: 1}
 
     return Qrels(qrels_dict)
 
diff --git a/redisvl/utils/threshold_optimizer/__init__.py b/redisvl/utils/threshold_optimizer/__init__.py
diff --git a/scripts.py b/scripts.py
@@ -39,7 +39,16 @@ def test():
 
 
 def test_verbose():
-    test_cmd = ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"]
+    test_cmd = [
+        "python",
+        "-m",
+        "pytest",
+        "-n",
+        "auto",
+        "-vv",
+        "-s",
+        "--log-level=CRITICAL",
+    ]
     # Get any extra arguments passed to the script
     extra_args = sys.argv[1:]
     if extra_args:
@@ -48,7 +57,22 @@ def test_verbose():
 
 
 def test_notebooks():
-    subprocess.run("cd docs/ && python -m pytest --nbval-lax ./user_guide -vv", shell=True, check=True)
+    test_cmd = [
+        "python",
+        "-m",
+        "pytest",
+        "--nbval-lax",
+        "./docs/user_guide",
+        "-vvv",
+    ]
+    extra_args = sys.argv[1:]
+    if extra_args:
+        test_cmd.extend(extra_args)
+
+    subprocess.run(
+        test_cmd,
+        check=True,
+    )
 
 
 def build_docs():
diff --git a/tests/integration/test_threshold_optimizer.py b/tests/integration/test_threshold_optimizer.py
@@ -9,9 +9,11 @@
 from redisvl.extensions.router import Route, SemanticRouter
 from redisvl.extensions.router.schema import RoutingConfig
 from redisvl.redis.connection import compare_versions
-from redisvl.utils.threshold_optimizer.base import EvalMetric
-from redisvl.utils.threshold_optimizer.cache import CacheThresholdOptimizer
-from redisvl.utils.threshold_optimizer.router import RouterThresholdOptimizer
+from redisvl.utils.optimize import (
+    CacheThresholdOptimizer,
+    EvalMetric,
+    RouterThresholdOptimizer,
+)
 
 
 @pytest.fixture
@@ -273,19 +275,15 @@ def test_optimize_threshold_cache_recall(client, redis_url):
 
 def test_eval_metric_from_string():
     """Test that EvalMetric.from_string works for valid metrics."""
-    assert EvalMetric.from_string("f1") == EvalMetric.F1
-    assert EvalMetric.from_string("precision") == EvalMetric.PRECISION
-    assert EvalMetric.from_string("recall") == EvalMetric.RECALL
-
-    # Test case insensitivity
-    assert EvalMetric.from_string("F1") == EvalMetric.F1
-    assert EvalMetric.from_string("PRECISION") == EvalMetric.PRECISION
+    assert EvalMetric("f1") == EvalMetric.F1
+    assert EvalMetric("precision") == EvalMetric.PRECISION
+    assert EvalMetric("recall") == EvalMetric.RECALL
 
 
 def test_eval_metric_invalid():
     """Test that EvalMetric.from_string raises ValueError for invalid metrics."""
     with pytest.raises(ValueError):
-        EvalMetric.from_string("invalid_metric")
+        EvalMetric("invalid_metric")
 
 
 def test_optimizer_with_invalid_metric(redis_url):
diff --git a/tests/unit/test_threshold_optimizer_utility.py b/tests/unit/test_threshold_optimizer_utility.py
@@ -7,9 +7,9 @@
 
 from ranx import evaluate
 
-from redisvl.utils.threshold_optimizer.cache import _generate_run_cache
-from redisvl.utils.threshold_optimizer.schema import TestData
-from redisvl.utils.threshold_optimizer.utils import _format_qrels
+from redisvl.utils.optimize import TestData
+from redisvl.utils.optimize.cache import _generate_run_cache
+from redisvl.utils.optimize.utils import _format_qrels
 
 # Note: these tests are not intended to test ranx but to test that our data formatting for the package is correct