Skip to content

Commit a2f6f4c

Browse files
committed
Vector store FilterExpressionConverter fix and enhancements
Changes doSingleValue() from concrete to abstract method, forcing explicit handling in all implementations. BREAKING CHANGE: Custom FilterExpressionConverter implementations must now implement doSingleValue() and use appropriate helpers. Signed-off-by: Ilayaperumal Gopinathan <ilayaperumal.gopinathan@broadcom.com>
1 parent 2708445 commit a2f6f4c

File tree

27 files changed

+1313
-200
lines changed

27 files changed

+1313
-200
lines changed

spring-ai-test/src/main/java/org/springframework/ai/test/vectorstore/BaseVectorStoreTests.java

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
package org.springframework.ai.test.vectorstore;
1818

1919
import java.time.Duration;
20+
import java.util.ArrayList;
2021
import java.util.HashMap;
2122
import java.util.List;
2223
import java.util.Map;
2324
import java.util.concurrent.TimeUnit;
25+
import java.util.concurrent.atomic.AtomicReference;
2426
import java.util.function.Consumer;
2527
import java.util.stream.Collectors;
2628

@@ -70,20 +72,28 @@ protected List<Document> setupTestDocuments(VectorStore vectorStore) {
7072
return documents;
7173
}
7274

75+
/**
76+
* Timeout for verifying documents exist or are deleted. Override in subclasses for
77+
* stores with higher eventual-consistency latency (e.g. Pinecone serverless).
78+
*/
79+
protected Duration getVerificationTimeout() {
80+
return Duration.ofSeconds(5);
81+
}
82+
7383
private String normalizeValue(Object value) {
7484
return value.toString().replaceAll("^\"|\"$", "").trim();
7585
}
7686

7787
private void verifyDocumentsExist(VectorStore vectorStore, List<Document> documents) {
78-
await().atMost(5, TimeUnit.SECONDS).pollInterval(Duration.ofMillis(500)).untilAsserted(() -> {
88+
await().atMost(getVerificationTimeout()).pollInterval(Duration.ofMillis(500)).untilAsserted(() -> {
7989
List<Document> results = vectorStore.similaritySearch(
8090
SearchRequest.builder().query("The World").topK(documents.size()).similarityThresholdAll().build());
8191
assertThat(results).hasSize(documents.size());
8292
});
8393
}
8494

8595
private void verifyDocumentsDeleted(VectorStore vectorStore, List<String> deletedIds) {
86-
await().atMost(5, TimeUnit.SECONDS).pollInterval(Duration.ofMillis(500)).untilAsserted(() -> {
96+
await().atMost(getVerificationTimeout()).pollInterval(Duration.ofMillis(500)).untilAsserted(() -> {
8797
List<Document> results = vectorStore
8898
.similaritySearch(SearchRequest.builder().query("The World").topK(10).similarityThresholdAll().build());
8999

@@ -93,6 +103,23 @@ private void verifyDocumentsDeleted(VectorStore vectorStore, List<String> delete
93103
});
94104
}
95105

106+
/**
107+
* Waits until a search for "The World" returns the expected number of documents, and
108+
* returns that list. Use after deletes so eventually-consistent stores have time to
109+
* reflect the change. Callers must use the returned list for assertions (do not
110+
* search again) to avoid a race where a second search returns stale/empty results.
111+
*/
112+
private List<Document> awaitRemainingDocumentCount(VectorStore vectorStore, int expectedCount) {
113+
AtomicReference<List<Document>> ref = new AtomicReference<>();
114+
await().atMost(getVerificationTimeout()).pollInterval(Duration.ofMillis(500)).untilAsserted(() -> {
115+
List<Document> results = vectorStore
116+
.similaritySearch(SearchRequest.builder().query("The World").topK(10).similarityThresholdAll().build());
117+
assertThat(results).hasSize(expectedCount);
118+
ref.set(new ArrayList<>(results));
119+
});
120+
return ref.get();
121+
}
122+
96123
@Test
97124
protected void deleteById() {
98125
executeTest(vectorStore -> {
@@ -103,9 +130,7 @@ protected void deleteById() {
103130
vectorStore.delete(idsToDelete);
104131
verifyDocumentsDeleted(vectorStore, idsToDelete);
105132

106-
List<Document> results = vectorStore
107-
.similaritySearch(SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build());
108-
133+
List<Document> results = awaitRemainingDocumentCount(vectorStore, 1);
109134
assertThat(results).hasSize(1);
110135
assertThat(results.get(0).getId()).isEqualTo(documents.get(2).getId());
111136
Map<String, Object> metadata = results.get(0).getMetadata();
@@ -131,9 +156,7 @@ protected void deleteWithStringFilterExpression() {
131156
vectorStore.delete("country == 'BG'");
132157
verifyDocumentsDeleted(vectorStore, bgDocIds);
133158

134-
List<Document> results = vectorStore
135-
.similaritySearch(SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build());
136-
159+
List<Document> results = awaitRemainingDocumentCount(vectorStore, 1);
137160
assertThat(results).hasSize(1);
138161
assertThat(normalizeValue(results.get(0).getMetadata().get("country"))).isEqualTo("NL");
139162

@@ -158,9 +181,7 @@ protected void deleteByFilter() {
158181
vectorStore.delete(filterExpression);
159182
verifyDocumentsDeleted(vectorStore, bgDocIds);
160183

161-
List<Document> results = vectorStore
162-
.similaritySearch(SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build());
163-
184+
List<Document> results = awaitRemainingDocumentCount(vectorStore, 1);
164185
assertThat(results).hasSize(1);
165186
assertThat(normalizeValue(results.get(0).getMetadata().get("country"))).isEqualTo("NL");
166187

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/filter/converter/AbstractFilterExpressionConverter.java

Lines changed: 134 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2026 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -16,7 +16,16 @@
1616

1717
package org.springframework.ai.vectorstore.filter.converter;
1818

19+
import java.time.Instant;
20+
import java.time.ZoneOffset;
21+
import java.time.format.DateTimeFormatter;
22+
import java.time.format.DateTimeParseException;
23+
import java.util.Date;
1924
import java.util.List;
25+
import java.util.regex.Pattern;
26+
27+
import com.fasterxml.jackson.core.JsonProcessingException;
28+
import com.fasterxml.jackson.databind.ObjectMapper;
2029

2130
import org.springframework.ai.vectorstore.filter.Filter;
2231
import org.springframework.ai.vectorstore.filter.Filter.Expression;
@@ -37,6 +46,25 @@
3746
*/
3847
public abstract class AbstractFilterExpressionConverter implements FilterExpressionConverter {
3948

49+
/**
50+
* ObjectMapper used for JSON string escaping.
51+
*/
52+
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
53+
54+
/**
55+
* Pattern for ISO-8601 date strings in UTC (yyyy-MM-dd'T'HH:mm:ss'Z') used to
56+
* recognize and normalize date strings before passing to converters.
57+
*/
58+
protected static final Pattern ISO_DATE_PATTERN = Pattern
59+
.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{1,9})?Z");
60+
61+
/**
62+
* Formatter for parsing and normalizing ISO date strings.
63+
*/
64+
protected static final DateTimeFormatter ISO_DATE_FORMATTER = DateTimeFormatter
65+
.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.SSS]'Z'")
66+
.withZone(ZoneOffset.UTC);
67+
4068
/**
4169
* Create a new AbstractFilterExpressionConverter.
4270
*/
@@ -126,29 +154,126 @@ protected void doValue(Filter.Value filterValue, StringBuilder context) {
126154
doStartValueRange(filterValue, context);
127155
int c = 0;
128156
for (Object v : list) {
129-
this.doSingleValue(v, context);
157+
this.doSingleValue(normalizeDateString(v), context);
130158
if (c++ < list.size() - 1) {
131159
this.doAddValueRangeSpitter(filterValue, context);
132160
}
133161
}
134162
this.doEndValueRange(filterValue, context);
135163
}
136164
else {
137-
this.doSingleValue(filterValue.value(), context);
165+
this.doSingleValue(normalizeDateString(filterValue.value()), context);
138166
}
139167
}
140168

141169
/**
142-
* Convert the given value into a string representation.
170+
* If the value is a string matching the ISO date pattern, parse and return as
171+
* {@link Date} so that all converters that handle {@code Date} automatically support
172+
* date strings. Otherwise return the value unchanged.
173+
* @param value the value (possibly a date string)
174+
* @return the value, or a {@code Date} if the value was a parseable date string
175+
*/
176+
protected static Object normalizeDateString(Object value) {
177+
if (!(value instanceof String text) || !ISO_DATE_PATTERN.matcher(text).matches()) {
178+
return value;
179+
}
180+
try {
181+
return Date.from(Instant.from(ISO_DATE_FORMATTER.parse(text)));
182+
}
183+
catch (DateTimeParseException e) {
184+
throw new IllegalArgumentException("Invalid date type: " + text, e);
185+
}
186+
}
187+
188+
/**
189+
* Convert the given single value into a string representation and append it to the
190+
* context. This method handles all value types including String, Number, Boolean,
191+
* Date, etc.
192+
* <p>
193+
* For convenience, implementations can use the provided static helper methods such as
194+
* {@link #emitJsonValue(Object, StringBuilder)} for JSON-based filters,
195+
* {@link #emitLuceneString(String, StringBuilder)} for Lucene-based filters, or
196+
* implement their own format-specific escaping logic as needed.
143197
* @param value the value to convert
144198
* @param context the context to append the string representation to
145199
*/
146-
protected void doSingleValue(Object value, StringBuilder context) {
147-
if (value instanceof String) {
148-
context.append(String.format("\"%s\"", value));
200+
protected abstract void doSingleValue(Object value, StringBuilder context);
201+
202+
// Helper Methods for String Value Formatting
203+
204+
/**
205+
* Emit a string value formatted for Lucene query syntax by appending escaped
206+
* characters to the provided context. Used by Elasticsearch, OpenSearch, and GemFire
207+
* VectorDB query string filters.
208+
* <p>
209+
* Lucene/Elasticsearch query strings require backslash-escaping of special
210+
* characters: {@code + - = ! ( ) { } [ ] ^ " ~ * ? : \ / & | < >}
211+
* @param value the string value to format
212+
* @param context the context to append the escaped string to
213+
* @see <a href=
214+
* "https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters">Elasticsearch
215+
* Reserved Characters</a>
216+
*/
217+
protected static void emitLuceneString(String value, StringBuilder context) {
218+
for (int i = 0; i < value.length(); i++) {
219+
char c = value.charAt(i);
220+
221+
// Escape Lucene query string special characters
222+
switch (c) {
223+
case '+':
224+
case '-':
225+
case '=':
226+
case '!':
227+
case '(':
228+
case ')':
229+
case '{':
230+
case '}':
231+
case '[':
232+
case ']':
233+
case '^':
234+
case '"':
235+
case '~':
236+
case '*':
237+
case '?':
238+
case ':':
239+
case '\\':
240+
case '/':
241+
case '&':
242+
case '|':
243+
case '<':
244+
case '>':
245+
context.append('\\').append(c);
246+
break;
247+
default:
248+
context.append(c);
249+
break;
250+
}
149251
}
150-
else {
151-
context.append(value);
252+
}
253+
254+
/**
255+
* Emit a value formatted as JSON by appending its JSON representation to the provided
256+
* context. Used for PostgreSQL JSONPath, Neo4j Cypher, Weaviate GraphQL, and other
257+
* JSON-based filter expressions.
258+
* <p>
259+
* This method uses Jackson's ObjectMapper to properly serialize all value types:
260+
* <ul>
261+
* <li>Strings: properly quoted and escaped with double quotes, backslashes, and
262+
* control characters handled</li>
263+
* <li>Numbers: formatted without quotes (e.g., 42, 3.14)</li>
264+
* <li>Booleans: formatted as JSON literals {@code true} or {@code false}</li>
265+
* <li>null: formatted as JSON literal {@code null}</li>
266+
* <li>Other types: handled according to Jackson's default serialization</li>
267+
* </ul>
268+
* @param value the value to format (can be any type)
269+
* @param context the context to append the JSON representation to
270+
*/
271+
protected static void emitJsonValue(Object value, StringBuilder context) {
272+
try {
273+
context.append(OBJECT_MAPPER.writeValueAsString(value));
274+
}
275+
catch (JsonProcessingException e) {
276+
throw new RuntimeException("Error serializing value to JSON.", e);
152277
}
153278
}
154279

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/filter/converter/PineconeFilterExpressionConverter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ protected void doKey(Key key, StringBuilder context) {
6161
context.append("\"").append(identifier).append("\": ");
6262
}
6363

64+
@Override
65+
protected void doSingleValue(Object value, StringBuilder context) {
66+
emitJsonValue(value, context);
67+
}
68+
6469
}

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/filter/converter/PrintFilterExpressionConverter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,9 @@ public void doEndGroup(Group group, StringBuilder context) {
4848
context.append(")");
4949
}
5050

51+
@Override
52+
protected void doSingleValue(Object value, StringBuilder context) {
53+
emitJsonValue(value, context);
54+
}
55+
5156
}

0 commit comments

Comments
 (0)