Skip to content

Commit 44b0c8a

Browse files
committed
feat: add support for string filter expressions in HybridQuery
Implements support for passing raw Redis filter query strings directly to HybridQuery, in addition to existing Filter object support. This provides feature parity with Python RedisVL and allows users to write custom Redis filter syntax when needed. Changes: - Updated HybridQuery to accept both Filter objects and String filters - Added overloaded filterExpression(String) builder method - Modified buildQueryString() to handle both filter types - Ported Python unit tests from test_aggregation_types.py - Added comprehensive unit tests in new HybridQueryTest class - Added integration tests for string and wildcard filters Test Coverage: - String filter expressions work correctly - Filter objects continue to work (backward compatibility) - Wildcard filters ("*") don't add AND clauses - All 18 tests pass (9 unit + 9 integration) Ported from Python RedisVL PR #375: https://github.com/RedisVentures/redisvl/pull/375 Python reference: /redis-vl-python/tests/unit/test_aggregation_types.py
1 parent 10db320 commit 44b0c8a

File tree

3 files changed

+319
-7
lines changed

3 files changed

+319
-7
lines changed

core/src/main/java/com/redis/vl/query/HybridQuery.java

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,11 @@ public final class HybridQuery extends AggregationQuery {
9393
/**
9494
* The filter expression to use.
9595
*
96+
* <p>Can be either a Filter object or a String containing a raw Redis filter expression.
97+
*
9698
* <p>Defaults to null (no filter).
9799
*/
98-
private final Filter filterExpression;
100+
private final Object filterExpression;
99101

100102
/**
101103
* The weight of the vector similarity.
@@ -233,9 +235,11 @@ public String getTextScorer() {
233235
/**
234236
* Get the filter expression.
235237
*
236-
* @return The filter to apply, or null if no filter
238+
* <p>Can be either a Filter object or a String containing a raw Redis filter expression.
239+
*
240+
* @return The filter to apply (Filter or String), or null if no filter
237241
*/
238-
public Filter getFilterExpression() {
242+
public Object getFilterExpression() {
239243
return filterExpression;
240244
}
241245

@@ -300,7 +304,7 @@ public static class HybridQueryBuilder {
300304
private float[] vector;
301305
private String vectorFieldName;
302306
private String textScorer = "BM25STD";
303-
private Filter filterExpression;
307+
private Object filterExpression;
304308
private float alpha = 0.7f;
305309
private String dtype = "float32";
306310
private int numResults = 10;
@@ -367,7 +371,7 @@ public HybridQueryBuilder textScorer(String textScorer) {
367371
}
368372

369373
/**
370-
* Set an additional filter expression for the query.
374+
* Set an additional filter expression for the query using a Filter object.
371375
*
372376
* @param filterExpression The filter to apply
373377
* @return This builder for chaining
@@ -377,6 +381,22 @@ public HybridQueryBuilder filterExpression(Filter filterExpression) {
377381
return this;
378382
}
379383

384+
/**
385+
* Set an additional filter expression for the query using a raw Redis query string.
386+
*
387+
* <p>This allows passing custom Redis filter syntax directly, such as:
388+
* "@category:{tech|science|engineering}"
389+
*
390+
* <p>Ported from Python PR #375 to support string filter expressions.
391+
*
392+
* @param filterExpression The raw Redis filter string
393+
* @return This builder for chaining
394+
*/
395+
public HybridQueryBuilder filterExpression(String filterExpression) {
396+
this.filterExpression = filterExpression;
397+
return this;
398+
}
399+
380400
/**
381401
* Set the weight for combining text and vector scores.
382402
*
@@ -533,7 +553,13 @@ public String tokenizeAndEscapeQuery(String userQuery) {
533553
*/
534554
@Override
535555
public String buildQueryString() {
536-
String filterStr = (filterExpression != null) ? filterExpression.build() : null;
556+
// Handle both Filter objects and String filter expressions (Python port: PR #375)
557+
String filterStr = null;
558+
if (filterExpression instanceof Filter) {
559+
filterStr = ((Filter) filterExpression).build();
560+
} else if (filterExpression instanceof String) {
561+
filterStr = (String) filterExpression;
562+
}
537563

538564
// Base KNN query
539565
String knnQuery =
@@ -543,7 +569,7 @@ public String buildQueryString() {
543569
// Text query with fuzzy matching (~)
544570
String textQuery = String.format("(~@%s:(%s)", textFieldName, tokenizeAndEscapeQuery(text));
545571

546-
// Add filter if present
572+
// Add filter if present and not wildcard
547573
if (filterStr != null && !filterStr.equals("*")) {
548574
textQuery += " AND " + filterStr;
549575
}

core/src/test/java/com/redis/vl/query/HybridQueryIntegrationTest.java

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,75 @@ void testHybridQueryWithTextFilter() {
467467
}
468468
}
469469

470+
/**
471+
* Integration test for string filter expressions - port of Python PR #375
472+
*
473+
* <p>Tests that raw Redis filter strings work correctly in HybridQuery
474+
*/
475+
@Test
476+
void testHybridQueryWithStringFilterExpression() {
477+
String text = "a medical professional with expertise in lung cancer";
478+
String textField = "description";
479+
float[] vector = new float[] {0.1f, 0.1f, 0.5f};
480+
String vectorField = "user_embedding";
481+
482+
// Use raw Redis filter string: credit_score is "high" AND age > 30
483+
String stringFilter = "(@credit_score:{high} @age:[31 +inf])";
484+
485+
HybridQuery query =
486+
HybridQuery.builder()
487+
.text(text)
488+
.textFieldName(textField)
489+
.vector(vector)
490+
.vectorFieldName(vectorField)
491+
.filterExpression(stringFilter)
492+
.returnFields(List.of("user", "credit_score", "age", "job"))
493+
.build();
494+
495+
List<Map<String, Object>> results = index.query(query);
496+
497+
// Should return only high credit_score users with age > 30
498+
assertThat(results).hasSize(2); // nancy (94) and tyler (100)
499+
500+
for (Map<String, Object> result : results) {
501+
assertThat(result.get("credit_score")).isEqualTo("high");
502+
int age = getIntValue(result, "age");
503+
assertThat(age).isGreaterThan(30);
504+
}
505+
}
506+
507+
/**
508+
* Integration test for wildcard string filter - port of Python test
509+
*
510+
* <p>Tests that wildcard filter "*" doesn't add an AND clause
511+
*/
512+
@Test
513+
void testHybridQueryWithWildcardStringFilter() {
514+
String text = "engineer";
515+
String textField = "job";
516+
float[] vector = new float[] {0.1f, 0.1f, 0.5f};
517+
String vectorField = "user_embedding";
518+
519+
// Wildcard filter should match all documents
520+
HybridQuery query =
521+
HybridQuery.builder()
522+
.text(text)
523+
.textFieldName(textField)
524+
.vector(vector)
525+
.vectorFieldName(vectorField)
526+
.filterExpression("*")
527+
.build();
528+
529+
List<Map<String, Object>> results = index.query(query);
530+
531+
// Should return all documents (wildcard doesn't filter)
532+
assertThat(results).hasSize(7);
533+
534+
// Verify query string doesn't contain "AND *"
535+
String queryString = query.buildQueryString();
536+
assertThat(queryString).doesNotContain("AND *");
537+
}
538+
470539
// Helper methods for type conversion (Hash storage returns strings)
471540
private double getDoubleValue(Map<String, Object> map, String key) {
472541
Object value = map.get(key);
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
package com.redis.vl.query;
2+
3+
import static org.assertj.core.api.Assertions.assertThat;
4+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
5+
6+
import org.junit.jupiter.api.DisplayName;
7+
import org.junit.jupiter.api.Test;
8+
9+
/**
10+
* Unit tests for HybridQuery - ported from Python test_aggregation_types.py
11+
*
12+
* <p>Python reference: /redis-vl-python/tests/unit/test_aggregation_types.py
13+
*
14+
* <p>Tests the ability to pass string filter expressions directly to HybridQuery, in addition to
15+
* Filter objects. This is a port of the test added in PR #375.
16+
*/
17+
@DisplayName("HybridQuery Unit Tests")
18+
class HybridQueryTest {
19+
20+
private static final float[] SAMPLE_VECTOR = new float[] {0.1f, 0.2f, 0.3f};
21+
22+
/**
23+
* Port of Python test_hybrid_query_with_string_filter (test_aggregation_types.py:118-191)
24+
*
25+
* <p>This test ensures that when a string filter expression is passed to HybridQuery, it's
26+
* properly included in the generated query string and not set to empty. Regression test for bug
27+
* where string filters were being ignored in Python.
28+
*
29+
* <p>In Java, this test verifies we support BOTH Filter objects and raw string filters for
30+
* feature parity with Python.
31+
*/
32+
@Test
33+
@DisplayName("Should support string filter expressions")
34+
void testHybridQueryWithStringFilter() {
35+
String text = "search for document 12345";
36+
String textFieldName = "description";
37+
String vectorFieldName = "embedding";
38+
39+
// Test with string filter expression - should include filter in query string
40+
String stringFilter = "@category:{tech|science|engineering}";
41+
HybridQuery hybridQuery =
42+
HybridQuery.builder()
43+
.text(text)
44+
.textFieldName(textFieldName)
45+
.vector(SAMPLE_VECTOR)
46+
.vectorFieldName(vectorFieldName)
47+
.filterExpression(stringFilter)
48+
.build();
49+
50+
// Check that filter is stored correctly
51+
assertThat(hybridQuery.getFilterExpression()).isEqualTo(stringFilter);
52+
53+
// Check that the generated query string includes both text search and filter
54+
String queryString = hybridQuery.buildQueryString();
55+
assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)");
56+
assertThat(queryString).contains("AND " + stringFilter);
57+
}
58+
59+
/** Port of Python test - verify Filter objects still work */
60+
@Test
61+
@DisplayName("Should support Filter objects")
62+
void testHybridQueryWithFilterObject() {
63+
String text = "search for document 12345";
64+
String textFieldName = "description";
65+
String vectorFieldName = "embedding";
66+
67+
// Test with FilterExpression - should also work (existing functionality)
68+
Filter filterExpression = Filter.tag("category", "tech");
69+
HybridQuery hybridQuery =
70+
HybridQuery.builder()
71+
.text(text)
72+
.textFieldName(textFieldName)
73+
.vector(SAMPLE_VECTOR)
74+
.vectorFieldName(vectorFieldName)
75+
.filterExpression(filterExpression)
76+
.build();
77+
78+
// Check that filter is stored correctly
79+
assertThat(hybridQuery.getFilterExpression()).isEqualTo(filterExpression);
80+
81+
// Check that the generated query string includes both text search and filter
82+
String queryString = hybridQuery.buildQueryString();
83+
assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)");
84+
assertThat(queryString).contains("AND @category:{tech}");
85+
}
86+
87+
/** Port of Python test - verify no filter works */
88+
@Test
89+
@DisplayName("Should work without filter")
90+
void testHybridQueryNoFilter() {
91+
String text = "search for document 12345";
92+
String textFieldName = "description";
93+
String vectorFieldName = "embedding";
94+
95+
// Test with no filter - should only have text search
96+
HybridQuery hybridQuery =
97+
HybridQuery.builder()
98+
.text(text)
99+
.textFieldName(textFieldName)
100+
.vector(SAMPLE_VECTOR)
101+
.vectorFieldName(vectorFieldName)
102+
.build();
103+
104+
String queryString = hybridQuery.buildQueryString();
105+
assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)");
106+
assertThat(queryString).doesNotContain("AND");
107+
}
108+
109+
/** Port of Python test - verify wildcard filter works */
110+
@Test
111+
@DisplayName("Should handle wildcard filter")
112+
void testHybridQueryWildcardFilter() {
113+
String text = "search for document 12345";
114+
String textFieldName = "description";
115+
String vectorFieldName = "embedding";
116+
117+
// Test with wildcard filter - should only have text search (no AND clause)
118+
HybridQuery hybridQuery =
119+
HybridQuery.builder()
120+
.text(text)
121+
.textFieldName(textFieldName)
122+
.vector(SAMPLE_VECTOR)
123+
.vectorFieldName(vectorFieldName)
124+
.filterExpression("*")
125+
.build();
126+
127+
String queryString = hybridQuery.buildQueryString();
128+
assertThat(queryString).contains("@" + textFieldName + ":(search | document | 12345)");
129+
assertThat(queryString).doesNotContain("AND");
130+
}
131+
132+
/** Test that empty text throws exception */
133+
@Test
134+
@DisplayName("Should reject empty text")
135+
void testRejectsEmptyText() {
136+
assertThatThrownBy(
137+
() ->
138+
HybridQuery.builder()
139+
.text("")
140+
.textFieldName("description")
141+
.vector(SAMPLE_VECTOR)
142+
.vectorFieldName("embedding")
143+
.build())
144+
.isInstanceOf(IllegalArgumentException.class)
145+
.hasMessageContaining("text string cannot be empty");
146+
}
147+
148+
/** Test that text becomes empty after stopwords are removed */
149+
@Test
150+
@DisplayName("Should reject text that becomes empty after stopwords removal")
151+
void testRejectsTextThatBecomesEmptyAfterStopwords() {
152+
// "with a for but and" will all be removed as default English stopwords
153+
assertThatThrownBy(
154+
() ->
155+
HybridQuery.builder()
156+
.text("with a for but and")
157+
.textFieldName("description")
158+
.vector(SAMPLE_VECTOR)
159+
.vectorFieldName("embedding")
160+
.build())
161+
.isInstanceOf(IllegalArgumentException.class)
162+
.hasMessageContaining("text string cannot be empty after removing stopwords");
163+
}
164+
165+
/** Test query string building */
166+
@Test
167+
@DisplayName("Should build correct query string format")
168+
void testQueryStringFormat() {
169+
HybridQuery query =
170+
HybridQuery.builder()
171+
.text("medical professional")
172+
.textFieldName("description")
173+
.vector(SAMPLE_VECTOR)
174+
.vectorFieldName("user_embedding")
175+
.numResults(5)
176+
.build();
177+
178+
String queryString = query.buildQueryString();
179+
180+
// Verify format: (~@text_field:(tokens))=>[KNN num @vector_field $vector AS vector_distance]
181+
assertThat(queryString).matches(".*\\(~@description:\\(.*\\)\\)=>\\[KNN.*\\]");
182+
assertThat(queryString).contains("KNN 5 @user_embedding");
183+
assertThat(queryString).contains("AS vector_distance");
184+
}
185+
186+
/** Test alpha parameter */
187+
@Test
188+
@DisplayName("Should store alpha parameter correctly")
189+
void testAlphaParameter() {
190+
HybridQuery query =
191+
HybridQuery.builder()
192+
.text("test")
193+
.textFieldName("description")
194+
.vector(SAMPLE_VECTOR)
195+
.vectorFieldName("embedding")
196+
.alpha(0.3f)
197+
.build();
198+
199+
assertThat(query.getAlpha()).isEqualTo(0.3f);
200+
}
201+
202+
/** Test numResults parameter */
203+
@Test
204+
@DisplayName("Should store numResults parameter correctly")
205+
void testNumResultsParameter() {
206+
HybridQuery query =
207+
HybridQuery.builder()
208+
.text("test")
209+
.textFieldName("description")
210+
.vector(SAMPLE_VECTOR)
211+
.vectorFieldName("embedding")
212+
.numResults(20)
213+
.build();
214+
215+
assertThat(query.getNumResults()).isEqualTo(20);
216+
}
217+
}

0 commit comments

Comments
 (0)