|
| 1 | +import pytest |
| 2 | +from ranx import evaluate |
| 3 | + |
| 4 | +from redisvl.extensions.threshold_optimizer.cache import _generate_run_cache |
| 5 | +from redisvl.extensions.threshold_optimizer.schema import TestData |
| 6 | +from redisvl.extensions.threshold_optimizer.utils import ( |
| 7 | + NULL_RESPONSE_KEY, |
| 8 | + _format_qrels, |
| 9 | +) |
| 10 | + |
| 11 | +# Note: these tests are not intended to test ranx but to test that our data formatting for the package is correct |
| 12 | + |
| 13 | + |
| 14 | +def test_known_precision_case(): |
| 15 | + """ |
| 16 | + Test case with known precision value. |
| 17 | +
|
| 18 | + Setup: |
| 19 | + - 2 queries |
| 20 | + - Query 1 expects doc1, gets doc1 and doc2 (precision 0.5) |
| 21 | + - Query 2 expects doc3, gets doc3 (precision 1.0) |
| 22 | + Expected overall precision: 0.75 |
| 23 | + """ |
| 24 | + # Setup test data |
| 25 | + test_data = [ |
| 26 | + TestData( |
| 27 | + query="test query 1", |
| 28 | + query_match="doc1", |
| 29 | + response=[ |
| 30 | + {"id": "doc1", "vector_distance": 0.2}, |
| 31 | + {"id": "doc2", "vector_distance": 0.3}, |
| 32 | + ], |
| 33 | + ), |
| 34 | + TestData( |
| 35 | + query="test query 2", |
| 36 | + query_match="doc3", |
| 37 | + response=[ |
| 38 | + {"id": "doc3", "vector_distance": 0.2}, |
| 39 | + {"id": "doc4", "vector_distance": 0.8}, |
| 40 | + ], |
| 41 | + ), |
| 42 | + ] |
| 43 | + |
| 44 | + # Create qrels (ground truth) |
| 45 | + qrels = _format_qrels(test_data) |
| 46 | + |
| 47 | + threshold = 0.4 |
| 48 | + run = _generate_run_cache(test_data, threshold) |
| 49 | + |
| 50 | + # Calculate precision using ranx |
| 51 | + precision = evaluate(qrels, run, "precision") |
| 52 | + assert precision == 0.75 # (0.5 + 1.0) / 2 |
| 53 | + |
| 54 | + |
| 55 | +def test_known_precision_with_no_matches(): |
| 56 | + """Test case where some queries have no matches.""" |
| 57 | + test_data = [ |
| 58 | + TestData( |
| 59 | + query="test query 2", |
| 60 | + query_match="", # Expecting no match |
| 61 | + response=[], |
| 62 | + ), |
| 63 | + ] |
| 64 | + |
| 65 | + # Create qrels |
| 66 | + qrels = _format_qrels(test_data) |
| 67 | + |
| 68 | + # Generate run with threshold that excludes all docs for first query |
| 69 | + threshold = 0.3 |
| 70 | + run = _generate_run_cache(test_data, threshold) |
| 71 | + |
| 72 | + # Calculate precision |
| 73 | + precision = evaluate(qrels, run, "precision") |
| 74 | + assert precision == 1.0 # (0.0 + 1.0) / 2 |
0 commit comments