Skip to content

Commit f4931e2

Browse files
committed
feat(filter): add wildcard TAG filter support with tagLike()
Port wildcard TAG filter support from Python redis-vl (ff415fb): - Add Filter.tagLike() method for pattern matching on tag fields - Support prefix (tech*), suffix (*tech), and contains (*tech*) patterns - Add escapeTagValuePreserveWildcard() to escape special chars but keep * - Add comprehensive unit tests for all wildcard patterns Unlike Filter.tag() which escapes asterisks, tagLike() preserves them for wildcard matching. Special characters like hyphens and spaces are still properly escaped. Example usage: Filter.tagLike("category", "tech*") // prefix match Filter.tagLike("category", "*tech") // suffix match Filter.tagLike("category", "*tech*") // contains match
1 parent 2be2932 commit f4931e2

File tree

2 files changed

+207
-0
lines changed

2 files changed

+207
-0
lines changed

core/src/main/java/com/redis/vl/query/Filter.java

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,44 @@ public static Filter tag(String field, String... values) {
7171
return new Filter(FilterType.TAG, field, expr, null);
7272
}
7373

74+
/**
75+
* Create a tag wildcard filter for pattern matching.
76+
*
77+
* <p>This enables wildcard pattern matching on tag fields using the {@code *} character. Unlike
78+
* the {@link #tag(String, String...)} method, wildcards are not escaped, allowing patterns with
79+
* wildcards in any position, such as prefix ({@code "tech*"}), suffix ({@code "*tech"}), or
80+
* middle ({@code "*tech*"}) matches.
81+
*
82+
* <p>Examples:
83+
*
84+
* <pre>{@code
85+
* Filter.tagLike("category", "tech*"); // Prefix match
86+
* Filter.tagLike("category", "*tech"); // Suffix match
87+
* Filter.tagLike("category", "*tech*"); // Contains match
88+
* Filter.tagLike("category", "tech*", "*soft"); // Multiple patterns
89+
* }</pre>
90+
*
91+
* @param field Field name
92+
* @param patterns Tag patterns with wildcards (e.g., "tech*", "*tech", "*tech*")
93+
* @return FilterQuery for wildcard matching
94+
*/
95+
public static Filter tagLike(String field, String... patterns) {
96+
validateField(field);
97+
if (patterns == null || patterns.length == 0) {
98+
// Return wildcard filter for empty case (graceful fallback)
99+
return new Filter(FilterType.CUSTOM, field, "*", null);
100+
}
101+
102+
// Escape special characters but preserve wildcards (*)
103+
String[] escapedPatterns =
104+
Arrays.stream(patterns).map(Filter::escapeTagValuePreserveWildcard).toArray(String[]::new);
105+
String valueStr =
106+
escapedPatterns.length == 1 ? escapedPatterns[0] : String.join("|", escapedPatterns);
107+
108+
String expr = String.format("@%s:{%s}", escapeFieldName(field), valueStr);
109+
return new Filter(FilterType.TAG, field, expr, null);
110+
}
111+
74112
/**
75113
* Create a numeric filter builder
76114
*
@@ -323,6 +361,55 @@ private static String escapeTagValue(String value) {
323361
return escapeSpecialCharacters(value).replace(" ", "\\ ");
324362
}
325363

364+
/**
365+
* Escape special characters in tag values but preserve wildcards (*).
366+
*
367+
* <p>Used for wildcard/pattern matching on tag fields where * should not be escaped.
368+
*/
369+
private static String escapeTagValuePreserveWildcard(String value) {
370+
// For tag patterns, escape all special characters including spaces, but NOT *
371+
return escapeSpecialCharactersPreserveWildcard(value).replace(" ", "\\ ");
372+
}
373+
374+
/**
375+
* Escape special characters in search queries but preserve wildcards (*).
376+
*
377+
* <p>Used for wildcard/pattern matching where * should remain unescaped.
378+
*/
379+
private static String escapeSpecialCharactersPreserveWildcard(String value) {
380+
// Escape Redis search special characters EXCEPT *
381+
return value
382+
.replace("\\", "\\\\")
383+
.replace("-", "\\-")
384+
.replace("@", "\\@")
385+
.replace(":", "\\:")
386+
// Note: NOT escaping * for wildcard support
387+
.replace("[", "\\[")
388+
.replace("]", "\\]")
389+
.replace("(", "\\(")
390+
.replace(")", "\\)")
391+
.replace("{", "\\{")
392+
.replace("}", "\\}")
393+
.replace("+", "\\+")
394+
.replace("~", "\\~")
395+
.replace("\"", "\\\"")
396+
.replace("'", "\\'")
397+
.replace("/", "\\/")
398+
.replace("%", "\\%")
399+
.replace("<", "\\<")
400+
.replace(">", "\\>")
401+
.replace("=", "\\=")
402+
.replace("|", "\\|")
403+
.replace("&", "\\&")
404+
.replace("^", "\\^")
405+
.replace("$", "\\$")
406+
.replace(".", "\\.")
407+
.replace(",", "\\,")
408+
.replace("!", "\\!")
409+
.replace("?", "\\?")
410+
.replace(";", "\\;");
411+
}
412+
326413
/**
327414
* Build the filter query string
328415
*

core/src/test/java/com/redis/vl/query/FilterQueryTest.java

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,4 +539,124 @@ void shouldCreateTimestampGtLtFiltersWithDateTimeObjects() {
539539
Filter ltInstant = Filter.timestamp("last_updated").lt(instant);
540540
assertThat(ltInstant.build()).isEqualTo("@last_updated:[-inf (1742147139]");
541541
}
542+
543+
// ========== Tag wildcard (tagLike) filter tests ==========
544+
545+
@Test
546+
@DisplayName("Should create tag wildcard filter with prefix pattern")
547+
void shouldCreateTagWildcardFilterWithPrefixPattern() {
548+
// When - basic prefix wildcard
549+
Filter filter = Filter.tagLike("category", "tech*");
550+
551+
// Then - asterisk should NOT be escaped
552+
assertThat(filter.build()).isEqualTo("@category:{tech*}");
553+
}
554+
555+
@Test
556+
@DisplayName("Should create tag wildcard filter with suffix pattern")
557+
void shouldCreateTagWildcardFilterWithSuffixPattern() {
558+
// When - suffix wildcard
559+
Filter filter = Filter.tagLike("category", "*tech");
560+
561+
// Then
562+
assertThat(filter.build()).isEqualTo("@category:{*tech}");
563+
}
564+
565+
@Test
566+
@DisplayName("Should create tag wildcard filter with contains pattern")
567+
void shouldCreateTagWildcardFilterWithContainsPattern() {
568+
// When - contains wildcard (asterisk on both sides)
569+
Filter filter = Filter.tagLike("category", "*tech*");
570+
571+
// Then
572+
assertThat(filter.build()).isEqualTo("@category:{*tech*}");
573+
}
574+
575+
@Test
576+
@DisplayName("Should create tag wildcard filter with multiple patterns")
577+
void shouldCreateTagWildcardFilterWithMultiplePatterns() {
578+
// When - multiple patterns
579+
Filter filter = Filter.tagLike("category", "tech*", "*soft");
580+
581+
// Then
582+
assertThat(filter.build()).isEqualTo("@category:{tech*|*soft}");
583+
}
584+
585+
@Test
586+
@DisplayName("Should escape special characters but preserve wildcards in tagLike")
587+
void shouldEscapeSpecialCharsButPreserveWildcards() {
588+
// When - pattern with special char (hyphen) AND wildcard
589+
Filter filter = Filter.tagLike("category", "tech*-pro");
590+
591+
// Then - hyphen should be escaped, asterisk should NOT be escaped
592+
assertThat(filter.build()).isEqualTo("@category:{tech*\\-pro}");
593+
594+
// When - pattern with space and wildcard
595+
Filter filterWithSpace = Filter.tagLike("category", "hello w*");
596+
597+
// Then - space should be escaped, asterisk should NOT be escaped
598+
assertThat(filterWithSpace.build()).isEqualTo("@category:{hello\\ w*}");
599+
600+
// When - pattern with special character ($) and wildcard
601+
Filter filterWithDollar = Filter.tagLike("category", "cat$*");
602+
603+
// Then - $ should be escaped, asterisk should NOT be escaped
604+
assertThat(filterWithDollar.build()).isEqualTo("@category:{cat\\$*}");
605+
}
606+
607+
@Test
608+
@DisplayName("Should return wildcard for empty tagLike patterns")
609+
void shouldReturnWildcardForEmptyTagLikePatterns() {
610+
// Test empty string array
611+
Filter emptyFilter = Filter.tagLike("category");
612+
assertThat(emptyFilter.build()).isEqualTo("*");
613+
614+
// Test null array
615+
Filter nullFilter = Filter.tagLike("category", (String[]) null);
616+
assertThat(nullFilter.build()).isEqualTo("*");
617+
}
618+
619+
@Test
620+
@DisplayName("Tag filter should escape asterisk while tagLike preserves it")
621+
void tagFilterShouldEscapeAsteriskWhileTagLikePreservesIt() {
622+
// When - using regular tag filter with asterisk
623+
Filter tagFilter = Filter.tag("category", "tech*");
624+
625+
// Then - asterisk SHOULD be escaped in regular tag
626+
assertThat(tagFilter.build()).isEqualTo("@category:{tech\\*}");
627+
628+
// When - using tagLike filter with asterisk
629+
Filter tagLikeFilter = Filter.tagLike("category", "tech*");
630+
631+
// Then - asterisk should NOT be escaped in tagLike
632+
assertThat(tagLikeFilter.build()).isEqualTo("@category:{tech*}");
633+
}
634+
635+
@Test
636+
@DisplayName("Should combine tagLike with exact tag filters")
637+
void shouldCombineTagLikeWithExactTagFilters() {
638+
// Create filters with different operators
639+
Filter exactMatch = Filter.tag("brand", "nike");
640+
Filter wildcardMatch = Filter.tagLike("category", "tech*");
641+
642+
// Verify individual filters work correctly
643+
assertThat(exactMatch.build()).isEqualTo("@brand:{nike}");
644+
assertThat(wildcardMatch.build()).isEqualTo("@category:{tech*}");
645+
646+
// Combine with AND - wildcard should be preserved, exact match should not have unescaped *
647+
Filter combinedAnd = Filter.and(exactMatch, wildcardMatch);
648+
assertThat(combinedAnd.build()).isEqualTo("(@brand:{nike} @category:{tech*})");
649+
650+
// Combine with OR
651+
Filter combinedOr = Filter.or(exactMatch, wildcardMatch);
652+
assertThat(combinedOr.build()).isEqualTo("(@brand:{nike} | @category:{tech*})");
653+
654+
// Mix of exact, wildcard, and exact with * in value
655+
Filter exactWithAsterisk = Filter.tag("status", "active*"); // * should be escaped
656+
Filter complexFilter = Filter.and(exactMatch, wildcardMatch, exactWithAsterisk);
657+
String result = complexFilter.build();
658+
assertThat(result).contains("@brand:{nike}");
659+
assertThat(result).contains("@category:{tech*}"); // wildcard preserved
660+
assertThat(result).contains("@status:{active\\*}"); // asterisk escaped
661+
}
542662
}

0 commit comments

Comments
 (0)