Merge branch '0.5.0' into feat/RAAE-599/distance-normalization

rbs333 · web-flow · commit 15cfeba7fedd · 2025-03-27T10:10:32.000-04:00
diff --git a/redisvl/query/query.py b/redisvl/query/query.py
diff --git a/redisvl/utils/log.py b/redisvl/utils/log.py
@@ -3,7 +3,6 @@
 
 import coloredlogs
 
-# constants for logging
 coloredlogs.DEFAULT_DATE_FORMAT = "%H:%M:%S"
 coloredlogs.DEFAULT_LOG_FORMAT = "%(asctime)s %(name)s %(levelname)s   %(message)s"
 
@@ -15,5 +14,16 @@ def get_logger(name, log_level="info", fmt=None):
     name = "RedisVL" if log_level == "debug" else name
 
     logger = logging.getLogger(name)
-    coloredlogs.install(level=log_level, logger=logger, fmt=fmt, stream=sys.stdout)
+
+    # Only configure this specific logger, not the root logger
+    # Check if the logger already has handlers to respect existing configuration
+    if not logger.handlers:
+        coloredlogs.install(
+            level=log_level,
+            logger=logger,  # Pass the specific logger
+            fmt=fmt,
+            stream=sys.stdout,
+            isatty=True,  # Only use colors when supported
+            reconfigure=False,  # Don't reconfigure existing loggers
+        )
     return logger
diff --git a/redisvl/utils/optimize/__init__.py b/redisvl/utils/optimize/__init__.py
@@ -1,12 +1,12 @@
 from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
 from redisvl.utils.optimize.cache import CacheThresholdOptimizer
 from redisvl.utils.optimize.router import RouterThresholdOptimizer
-from redisvl.utils.optimize.schema import TestData
+from redisvl.utils.optimize.schema import LabeledData
 
 __all__ = [
     "CacheThresholdOptimizer",
     "RouterThresholdOptimizer",
     "EvalMetric",
     "BaseThresholdOptimizer",
-    "TestData",
+    "LabeledData",
 ]
diff --git a/redisvl/utils/optimize/cache.py b/redisvl/utils/optimize/cache.py
@@ -6,11 +6,11 @@
 from redisvl.extensions.llmcache.semantic import SemanticCache
 from redisvl.query import RangeQuery
 from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
-from redisvl.utils.optimize.schema import TestData
+from redisvl.utils.optimize.schema import LabeledData
 from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 
 
-def _generate_run_cache(test_data: List[TestData], threshold: float) -> Run:
+def _generate_run_cache(test_data: List[LabeledData], threshold: float) -> Run:
     """Format observed data for evaluation with ranx"""
     run_dict: Dict[str, Dict[str, int]] = {}
 
@@ -30,7 +30,7 @@ def _generate_run_cache(test_data: List[TestData], threshold: float) -> Run:
 
 
 def _eval_cache(
-    test_data: List[TestData], threshold: float, qrels: Qrels, metric: str
+    test_data: List[LabeledData], threshold: float, qrels: Qrels, metric: str
 ) -> float:
     """Formats run data and evaluates supported metric"""
     run = _generate_run_cache(test_data, threshold)
@@ -46,7 +46,7 @@ def _get_best_threshold(metrics: dict) -> float:
 
 
 def _grid_search_opt_cache(
-    cache: SemanticCache, test_data: List[TestData], eval_metric: EvalMetric
+    cache: SemanticCache, test_data: List[LabeledData], eval_metric: EvalMetric
 ):
     """Evaluates all thresholds in linspace for cache to determine optimal"""
     thresholds = np.linspace(0.01, 0.8, 60)
diff --git a/redisvl/utils/optimize/router.py b/redisvl/utils/optimize/router.py
@@ -6,11 +6,11 @@
 
 from redisvl.extensions.router.semantic import SemanticRouter
 from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
-from redisvl.utils.optimize.schema import TestData
+from redisvl.utils.optimize.schema import LabeledData
 from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 
 
-def _generate_run_router(test_data: List[TestData], router: SemanticRouter) -> Run:
+def _generate_run_router(test_data: List[LabeledData], router: SemanticRouter) -> Run:
     """Format router results into format for ranx Run"""
     run_dict: Dict[Any, Any] = {}
 
@@ -26,7 +26,7 @@ def _generate_run_router(test_data: List[TestData], router: SemanticRouter) -> R
 
 
 def _eval_router(
-    router: SemanticRouter, test_data: List[TestData], qrels: Qrels, eval_metric: str
+    router: SemanticRouter, test_data: List[LabeledData], qrels: Qrels, eval_metric: str
 ) -> float:
     """Evaluate acceptable metric given run and qrels data"""
     run = _generate_run_router(test_data, router)
@@ -55,7 +55,7 @@ def _router_random_search(
 
 def _random_search_opt_router(
     router: SemanticRouter,
-    test_data: List[TestData],
+    test_data: List[LabeledData],
     qrels: Qrels,
     eval_metric: EvalMetric,
     **kwargs: Any,
@@ -67,12 +67,15 @@ def _random_search_opt_router(
     best_thresholds = router.route_thresholds
 
     max_iterations = kwargs.get("max_iterations", 20)
+    search_step = kwargs.get("search_step", 0.10)
 
     for _ in range(max_iterations):
         route_names = router.route_names
         route_thresholds = router.route_thresholds
         thresholds = _router_random_search(
-            route_names=route_names, route_thresholds=route_thresholds
+            route_names=route_names,
+            route_thresholds=route_thresholds,
+            search_step=search_step,
         )
         router.update_route_thresholds(thresholds)
         score = _eval_router(router, test_data, qrels, eval_metric.value)
diff --git a/redisvl/utils/optimize/schema.py b/redisvl/utils/optimize/schema.py
@@ -4,7 +4,7 @@
 from ulid import ULID
 
 
-class TestData(BaseModel):
+class LabeledData(BaseModel):
     id: str = Field(default_factory=lambda: str(ULID()))
     query: str
     query_match: Optional[str]
diff --git a/redisvl/utils/optimize/utils.py b/redisvl/utils/optimize/utils.py
@@ -2,12 +2,12 @@
 
 from ranx import Qrels
 
-from redisvl.utils.optimize.schema import TestData
+from redisvl.utils.optimize.schema import LabeledData
 
 NULL_RESPONSE_KEY = "no_match"
 
 
-def _format_qrels(test_data: List[TestData]) -> Qrels:
+def _format_qrels(test_data: List[LabeledData]) -> Qrels:
     """Utility function for creating qrels for evaluation with ranx"""
     qrels_dict = {}
 
@@ -21,6 +21,6 @@ def _format_qrels(test_data: List[TestData]) -> Qrels:
     return Qrels(qrels_dict)
 
 
-def _validate_test_dict(test_dict: List[dict]) -> List[TestData]:
+def _validate_test_dict(test_dict: List[dict]) -> List[LabeledData]:
     """Convert/validate test_dict for use in optimizer"""
-    return [TestData(**d) for d in test_dict]
+    return [LabeledData(**d) for d in test_dict]
diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py
@@ -14,6 +14,7 @@
     Text,
     Timestamp,
 )
+from redisvl.query.query import VectorRangeQuery
 from redisvl.redis.utils import array_to_buffer
 
 # TODO expand to multiple schema types and sync + async
@@ -662,3 +663,129 @@ def test_range_query_normalize_bad_input(index):
             return_fields=["user", "credit_score", "age", "job", "location"],
             distance_threshold=1.2,
         )
+        
+def test_hybrid_policy_batches_mode(index, vector_query):
+    """Test vector query with BATCHES hybrid policy."""
+    # Create a filter
+    t = Tag("credit_score") == "high"
+
+    # Set hybrid policy to BATCHES
+    vector_query.set_hybrid_policy("BATCHES")
+    vector_query.set_batch_size(2)
+
+    # Set the filter
+    vector_query.set_filter(t)
+
+    # Check query string
+    assert "HYBRID_POLICY BATCHES BATCH_SIZE 2" in str(vector_query)
+
+    # Execute query
+    results = index.query(vector_query)
+
+    # Check results - should have filtered to "high" credit scores
+    assert len(results) > 0
+    for result in results:
+        assert result["credit_score"] == "high"
+
+
+def test_hybrid_policy_adhoc_bf_mode(index, vector_query):
+    """Test vector query with ADHOC_BF hybrid policy."""
+    # Create a filter
+    t = Tag("credit_score") == "high"
+
+    # Set hybrid policy to ADHOC_BF
+    vector_query.set_hybrid_policy("ADHOC_BF")
+
+    # Set the filter
+    vector_query.set_filter(t)
+
+    # Check query string
+    assert "HYBRID_POLICY ADHOC_BF" in str(vector_query)
+
+    # Execute query
+    results = index.query(vector_query)
+
+    # Check results - should have filtered to "high" credit scores
+    assert len(results) > 0
+    for result in results:
+        assert result["credit_score"] == "high"
+
+
+def test_range_query_with_epsilon(index):
+    """Integration test: Execute range query with epsilon parameter against Redis."""
+    # Create a range query with epsilon
+    epsilon_query = VectorRangeQuery(
+        vector=[0.1, 0.1, 0.5],
+        vector_field_name="user_embedding",
+        return_fields=["user", "credit_score", "age", "job"],
+        distance_threshold=0.3,
+        epsilon=0.5,  # Larger than default to get potentially more results
+    )
+
+    # Verify query string contains epsilon attribute
+    query_string = str(epsilon_query)
+    assert "$EPSILON: 0.5" in query_string
+
+    # Verify epsilon property is set
+    assert epsilon_query.epsilon == 0.5
+
+    # Test setting epsilon
+    epsilon_query.set_epsilon(0.1)
+    assert epsilon_query.epsilon == 0.1
+    assert "$EPSILON: 0.1" in str(epsilon_query)
+
+    # Execute basic query without epsilon to ensure functionality
+    basic_query = VectorRangeQuery(
+        vector=[0.1, 0.1, 0.5],
+        vector_field_name="user_embedding",
+        return_fields=["user", "credit_score", "age", "job"],
+        distance_threshold=0.2,
+    )
+
+    results = index.query(basic_query)
+
+    # Check results
+    for result in results:
+        assert float(result["vector_distance"]) <= 0.2
+
+
+def test_range_query_with_filter_and_hybrid_policy(index):
+    """Integration test: Test construction of a range query with filter and hybrid policy."""
+    # Create a filter for high credit score
+    credit_filter = Tag("credit_score") == "high"
+
+    # Create a range query with filter and hybrid policy
+    query = VectorRangeQuery(
+        vector=[0.1, 0.1, 0.5],
+        vector_field_name="user_embedding",
+        return_fields=["user", "credit_score", "age", "job"],
+        filter_expression=credit_filter,
+        distance_threshold=0.5,
+        hybrid_policy="BATCHES",
+        batch_size=2,
+    )
+
+    # Check query string and parameters
+    query_string = str(query)
+    assert "@credit_score:{high}" in query_string
+    assert "HYBRID_POLICY" not in query_string
+    assert query.hybrid_policy == "BATCHES"
+    assert query.batch_size == 2
+    assert query.params["HYBRID_POLICY"] == "BATCHES"
+    assert query.params["BATCH_SIZE"] == 2
+
+    # Execute basic query with filter but without hybrid policy
+    basic_filter_query = VectorRangeQuery(
+        vector=[0.1, 0.1, 0.5],
+        vector_field_name="user_embedding",
+        return_fields=["user", "credit_score", "age", "job"],
+        filter_expression=credit_filter,
+        distance_threshold=0.5,
+    )
+
+    results = index.query(basic_filter_query)
+
+    # Check results
+    for result in results:
+        assert result["credit_score"] == "high"
+        assert float(result["vector_distance"]) <= 0.5
diff --git a/tests/integration/test_threshold_optimizer.py b/tests/integration/test_threshold_optimizer.py
@@ -111,7 +111,7 @@ def test_routes_different_distance_thresholds_optimizer_default(
 
     # now run optimizer
     router_optimizer = RouterThresholdOptimizer(router, test_data_optimization)
-    router_optimizer.optimize(max_iterations=10)
+    router_optimizer.optimize(max_iterations=10, search_step=0.5)
 
     # test that it updated thresholds beyond the null case
     for route in routes:
diff --git a/tests/unit/logger_interference_checker.py b/tests/unit/logger_interference_checker.py
@@ -0,0 +1,25 @@
+import logging
+import sys
+
+# Set up custom logging
+handler = logging.StreamHandler(sys.stdout)
+handler.setFormatter(
+    logging.Formatter(
+        "%(asctime)s %(levelname)s [%(name)s] [%(filename)s:%(lineno)s] %(message)s"
+    )
+)
+
+# Configure root logger
+root_logger = logging.getLogger()
+root_logger.handlers = [handler]
+root_logger.setLevel(logging.INFO)
+
+# Log before import
+app_logger = logging.getLogger("app")
+app_logger.info("PRE_IMPORT_FORMAT")
+
+# Import RedisVL
+from redisvl.query.filter import Text  # noqa: E402, F401
+
+# Log after import
+app_logger.info("POST_IMPORT_FORMAT")
diff --git a/tests/unit/test_query_types.py b/tests/unit/test_query_types.py
diff --git a/tests/unit/test_threshold_optimizer_utility.py b/tests/unit/test_threshold_optimizer_utility.py
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py