|
14 | 14 | /** |
15 | 15 | * HybridQuery combines text and vector search in Redis using aggregation. |
16 | 16 | * |
17 | | - * <p>Ported from Python: redisvl/query/aggregate.py:23-230 (HybridQuery class) |
| 17 | + * <p>Ported from Python: redisvl/query/aggregate.py:57-329 (AggregateHybridQuery class) |
18 | 18 | * |
19 | 19 | * <p>It allows you to perform a hybrid search using both text and vector similarity. It scores |
20 | | - * documents based on a weighted combination of text and vector similarity. |
| 20 | + * documents based on a weighted combination of text and vector similarity using the formula: |
| 21 | + * |
| 22 | + * <pre> |
| 23 | + * hybrid_score = (1 - alpha) * text_score + alpha * vector_similarity |
| 24 | + * </pre> |
| 25 | + * |
| 26 | + * <p>Where {@code text_score} is the BM25 score from the text search and {@code vector_similarity} |
| 27 | + * is the normalized cosine similarity from the vector search. |
| 28 | + * |
| 29 | + * <p><strong>Redis Version Requirements:</strong> This query uses the ADDSCORES option in |
| 30 | + * FT.AGGREGATE to expose the internal text search score (@__score). This feature requires |
| 31 | + * <strong>Redis 7.4.0 or later</strong>. On older Redis versions, the query will fail. |
21 | 32 | * |
22 | 33 | * <p><strong>Note on Runtime Parameters:</strong> HybridQuery uses Redis FT.AGGREGATE for |
23 | 34 | * aggregation-based hybrid search. As of Redis Stack 7.2+, runtime parameters (efRuntime, epsilon, |
@@ -598,30 +609,30 @@ public AggregationBuilder buildRedisAggregation() { |
598 | 609 | // Set dialect |
599 | 610 | aggregation.dialect(dialect); |
600 | 611 |
|
601 | | - // Set text scorer (Python: self.scorer(text_scorer)) |
602 | | - // Note: In Jedis, we need to use WITHSCORE to get the text score |
603 | | - // For now, we'll use vector similarity only and calculate text score differently |
| 612 | + // Enable ADDSCORES to expose @__score field containing the text search score |
| 613 | + // (Python: self.add_scores() - line 169) |
| 614 | + // Note: Requires Redis 7.4.0+. Uses default BM25 scorer. |
| 615 | + aggregation.addScores(); |
604 | 616 |
|
605 | | - // Apply vector similarity calculation (Python: line 122-123) |
| 617 | + // Apply vector similarity calculation (Python: line 170-172) |
606 | 618 | // vector_similarity = (2 - @vector_distance) / 2 |
| 619 | + // Normalizes cosine distance [0,2] to similarity [0,1] |
607 | 620 | aggregation.apply("(2 - @" + DISTANCE_ID + ")/2", "vector_similarity"); |
608 | 621 |
|
609 | | - // Apply text score - for hybrid queries, the text matching score is implicit |
610 | | - // Since we can't easily access __score in aggregations, we'll use a constant of 1.0 |
611 | | - // This means the hybrid score will be based primarily on vector similarity |
612 | | - // TODO: Investigate using WITHSCORE or custom scoring |
613 | | - aggregation.apply("1.0", "text_score"); |
| 622 | + // Apply text score from @__score (the BM25/text search score exposed by ADDSCORES) |
| 623 | + // (Python: text_score="@__score" - line 171) |
| 624 | + aggregation.apply("@__score", "text_score"); |
614 | 625 |
|
615 | | - // Apply hybrid score calculation (Python: line 125) |
| 626 | + // Apply hybrid score calculation (Python: line 173) |
616 | 627 | // hybrid_score = (1-alpha) * text_score + alpha * vector_similarity |
617 | 628 | String hybridScoreFormula = |
618 | 629 | String.format("%f*@text_score + %f*@vector_similarity", (1 - alpha), alpha); |
619 | 630 | aggregation.apply(hybridScoreFormula, "hybrid_score"); |
620 | 631 |
|
621 | | - // Sort by hybrid score descending (Python: line 126) |
| 632 | + // Sort by hybrid score descending (Python: line 174) |
622 | 633 | aggregation.sortBy(numResults, SortedField.desc("@hybrid_score")); |
623 | 634 |
|
624 | | - // Load return fields (Python: line 129) |
| 635 | + // Load return fields (Python: line 176-177) |
625 | 636 | if (!returnFields.isEmpty()) { |
626 | 637 | aggregation.load(returnFields.toArray(String[]::new)); |
627 | 638 | } |
|
0 commit comments