Skip to content

Commit 07b9058

Browse files
sobychackomarkpollack
authored andcommitted
Add filter-based deletion to Neo4j and OpenSearch vector stores
Add string-based filter deletion alongside the Filter.Expression-based deletion for Neo4j and OpenSearch vector stores, providing consistent deletion capabilities with other vector store implementations. Key changes: - Add delete(Filter.Expression) implementation for Neo4j store using Cypher queries - Add delete(Filter.Expression) implementation for OpenSearch store using query_string - Leverage existing filter expression converters for both stores - Use Neo4j's transaction batching for efficient large-scale deletions - Use OpenSearch's delete_by_query API for metadata-based deletion - Add comprehensive integration tests for both stores covering: * Simple equality filters * String-based filter expressions * Complex filter expressions with multiple conditions This maintains consistency with other vector store implementations while utilizing store-specific features for efficient metadata-based deletion. Signed-off-by: Soby Chacko <[email protected]>
1 parent ef0bade commit 07b9058

File tree

4 files changed

+268
-6
lines changed

4 files changed

+268
-6
lines changed

vector-stores/spring-ai-neo4j-store/src/main/java/org/springframework/ai/vectorstore/neo4j/Neo4jVectorStore.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,26 @@
2121
import java.util.Map;
2222
import java.util.Optional;
2323

24+
import org.apache.commons.logging.LogFactory;
2425
import org.neo4j.cypherdsl.support.schema_name.SchemaNames;
2526
import org.neo4j.driver.Driver;
2627
import org.neo4j.driver.SessionConfig;
2728
import org.neo4j.driver.Values;
2829

2930
import org.springframework.ai.document.Document;
3031
import org.springframework.ai.document.DocumentMetadata;
31-
import org.springframework.ai.embedding.BatchingStrategy;
3232
import org.springframework.ai.embedding.EmbeddingModel;
3333
import org.springframework.ai.embedding.EmbeddingOptionsBuilder;
34-
import org.springframework.ai.embedding.TokenCountBatchingStrategy;
3534
import org.springframework.ai.observation.conventions.VectorStoreProvider;
3635
import org.springframework.ai.observation.conventions.VectorStoreSimilarityMetric;
3736
import org.springframework.ai.vectorstore.AbstractVectorStoreBuilder;
3837
import org.springframework.ai.vectorstore.SearchRequest;
38+
import org.springframework.ai.vectorstore.filter.Filter;
3939
import org.springframework.ai.vectorstore.neo4j.filter.Neo4jVectorFilterExpressionConverter;
4040
import org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore;
4141
import org.springframework.ai.vectorstore.observation.VectorStoreObservationContext;
4242
import org.springframework.beans.factory.InitializingBean;
43+
import org.springframework.core.log.LogAccessor;
4344
import org.springframework.util.Assert;
4445
import org.springframework.util.StringUtils;
4546

@@ -133,6 +134,8 @@
133134
*/
134135
public class Neo4jVectorStore extends AbstractObservationVectorStore implements InitializingBean {
135136

137+
private static final LogAccessor logger = new LogAccessor(LogFactory.getLog(Neo4jVectorStore.class));
138+
136139
public static final int DEFAULT_EMBEDDING_DIMENSION = 1536;
137140

138141
public static final int DEFAULT_TRANSACTION_SIZE = 10_000;
@@ -235,6 +238,29 @@ public Optional<Boolean> doDelete(List<String> idList) {
235238
}
236239
}
237240

241+
@Override
242+
protected void doDelete(Filter.Expression filterExpression) {
243+
Assert.notNull(filterExpression, "Filter expression must not be null");
244+
245+
try (var session = this.driver.session(this.sessionConfig)) {
246+
String whereClause = this.filterExpressionConverter.convertExpression(filterExpression);
247+
248+
// Create Cypher query with transaction batching
249+
String cypher = """
250+
MATCH (node:%s) WHERE %s
251+
CALL { WITH node DETACH DELETE node } IN TRANSACTIONS OF $transactionSize ROWS
252+
""".formatted(this.label, whereClause);
253+
254+
var summary = session.run(cypher, Map.of("transactionSize", DEFAULT_TRANSACTION_SIZE)).consume();
255+
256+
logger.debug("Deleted " + summary.counters().nodesDeleted() + " nodes matching filter expression");
257+
}
258+
catch (Exception e) {
259+
logger.error(e, "Failed to delete nodes by filter: " + e.getMessage());
260+
throw new IllegalStateException("Failed to delete nodes by filter", e);
261+
}
262+
}
263+
238264
@Override
239265
public List<Document> doSimilaritySearch(SearchRequest request) {
240266
Assert.isTrue(request.getTopK() > 0, "The number of documents to returned must be greater than zero");

vector-stores/spring-ai-neo4j-store/src/test/java/org/springframework/ai/vectorstore/neo4j/Neo4jVectorStoreIT.java

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
2020
import java.util.List;
2121
import java.util.Map;
2222
import java.util.UUID;
23+
import java.util.stream.Collectors;
2324

2425
import org.junit.Assert;
2526
import org.junit.jupiter.api.BeforeEach;
@@ -39,6 +40,7 @@
3940
import org.springframework.ai.openai.api.OpenAiApi;
4041
import org.springframework.ai.vectorstore.SearchRequest;
4142
import org.springframework.ai.vectorstore.VectorStore;
43+
import org.springframework.ai.vectorstore.filter.Filter;
4244
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser;
4345
import org.springframework.boot.SpringBootConfiguration;
4446
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
@@ -53,6 +55,7 @@
5355
* @author Michael Simons
5456
* @author Christian Tzolov
5557
* @author Thomas Vitale
58+
* @author Soby Chacko
5659
*/
5760
@Testcontainers
5861
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
@@ -301,6 +304,99 @@ void ensureIdIndexGetsCreated() {
301304
.isTrue());
302305
}
303306

307+
@Test
308+
void deleteByFilter() {
309+
this.contextRunner.run(context -> {
310+
VectorStore vectorStore = context.getBean(VectorStore.class);
311+
312+
var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
313+
Map.of("country", "BG", "year", 2020));
314+
var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
315+
Map.of("country", "NL", "year", 2021));
316+
var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner",
317+
Map.of("country", "BG", "year", 2023));
318+
319+
vectorStore.add(List.of(bgDocument, nlDocument, bgDocument2));
320+
321+
SearchRequest searchRequest = SearchRequest.builder()
322+
.query("The World")
323+
.topK(5)
324+
.similarityThresholdAll()
325+
.build();
326+
327+
List<Document> results = vectorStore.similaritySearch(searchRequest);
328+
assertThat(results).hasSize(3);
329+
330+
Filter.Expression filterExpression = new Filter.Expression(Filter.ExpressionType.EQ,
331+
new Filter.Key("country"), new Filter.Value("BG"));
332+
333+
vectorStore.delete(filterExpression);
334+
335+
results = vectorStore.similaritySearch(searchRequest);
336+
assertThat(results).hasSize(1);
337+
assertThat(results.get(0).getMetadata()).containsEntry("country", "NL");
338+
});
339+
}
340+
341+
@Test
342+
void deleteWithStringFilterExpression() {
343+
this.contextRunner.run(context -> {
344+
VectorStore vectorStore = context.getBean(VectorStore.class);
345+
346+
var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
347+
Map.of("country", "BG", "year", 2020));
348+
var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
349+
Map.of("country", "NL", "year", 2021));
350+
var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner",
351+
Map.of("country", "BG", "year", 2023));
352+
353+
vectorStore.add(List.of(bgDocument, nlDocument, bgDocument2));
354+
355+
var searchRequest = SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build();
356+
357+
List<Document> results = vectorStore.similaritySearch(searchRequest);
358+
assertThat(results).hasSize(3);
359+
360+
vectorStore.delete("country == 'BG'");
361+
362+
results = vectorStore.similaritySearch(searchRequest);
363+
assertThat(results).hasSize(1);
364+
assertThat(results.get(0).getMetadata()).containsEntry("country", "NL");
365+
});
366+
}
367+
368+
@Test
369+
void deleteWithComplexFilterExpression() {
370+
this.contextRunner.run(context -> {
371+
VectorStore vectorStore = context.getBean(VectorStore.class);
372+
373+
var doc1 = new Document("Content 1", Map.of("type", "A", "priority", 1L));
374+
var doc2 = new Document("Content 2", Map.of("type", "A", "priority", 2L));
375+
var doc3 = new Document("Content 3", Map.of("type", "B", "priority", 1L));
376+
377+
vectorStore.add(List.of(doc1, doc2, doc3));
378+
379+
// Complex filter expression: (type == 'A' AND priority > 1)
380+
Filter.Expression priorityFilter = new Filter.Expression(Filter.ExpressionType.GT,
381+
new Filter.Key("priority"), new Filter.Value(1));
382+
Filter.Expression typeFilter = new Filter.Expression(Filter.ExpressionType.EQ, new Filter.Key("type"),
383+
new Filter.Value("A"));
384+
Filter.Expression complexFilter = new Filter.Expression(Filter.ExpressionType.AND, typeFilter,
385+
priorityFilter);
386+
387+
vectorStore.delete(complexFilter);
388+
389+
var results = vectorStore
390+
.similaritySearch(SearchRequest.builder().query("Content").topK(5).similarityThresholdAll().build());
391+
392+
assertThat(results).hasSize(2);
393+
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).collect(Collectors.toList()))
394+
.containsExactlyInAnyOrder("A", "B");
395+
assertThat(results.stream().map(doc -> doc.getMetadata().get("priority")).collect(Collectors.toList()))
396+
.containsExactlyInAnyOrder(1L, 1L);
397+
});
398+
}
399+
304400
@SpringBootConfiguration
305401
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
306402
public static class TestApplication {

vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.Optional;
2525
import java.util.stream.Collectors;
2626

27+
import org.apache.commons.logging.LogFactory;
2728
import org.opensearch.client.json.JsonData;
2829
import org.opensearch.client.json.JsonpMapper;
2930
import org.opensearch.client.opensearch.OpenSearchClient;
@@ -32,17 +33,17 @@
3233
import org.opensearch.client.opensearch._types.query_dsl.Query;
3334
import org.opensearch.client.opensearch.core.BulkRequest;
3435
import org.opensearch.client.opensearch.core.BulkResponse;
36+
import org.opensearch.client.opensearch.core.DeleteByQueryRequest;
37+
import org.opensearch.client.opensearch.core.DeleteByQueryResponse;
3538
import org.opensearch.client.opensearch.core.search.Hit;
3639
import org.opensearch.client.opensearch.indices.CreateIndexRequest;
3740
import org.opensearch.client.opensearch.indices.CreateIndexResponse;
3841
import org.opensearch.client.transport.endpoints.BooleanResponse;
3942

4043
import org.springframework.ai.document.Document;
4144
import org.springframework.ai.document.DocumentMetadata;
42-
import org.springframework.ai.embedding.BatchingStrategy;
4345
import org.springframework.ai.embedding.EmbeddingModel;
4446
import org.springframework.ai.embedding.EmbeddingOptionsBuilder;
45-
import org.springframework.ai.embedding.TokenCountBatchingStrategy;
4647
import org.springframework.ai.observation.conventions.VectorStoreProvider;
4748
import org.springframework.ai.observation.conventions.VectorStoreSimilarityMetric;
4849
import org.springframework.ai.vectorstore.AbstractVectorStoreBuilder;
@@ -52,6 +53,7 @@
5253
import org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore;
5354
import org.springframework.ai.vectorstore.observation.VectorStoreObservationContext;
5455
import org.springframework.beans.factory.InitializingBean;
56+
import org.springframework.core.log.LogAccessor;
5557
import org.springframework.util.Assert;
5658

5759
/**
@@ -139,6 +141,8 @@
139141
*/
140142
public class OpenSearchVectorStore extends AbstractObservationVectorStore implements InitializingBean {
141143

144+
private static final LogAccessor logger = new LogAccessor(LogFactory.getLog(OpenSearchVectorStore.class));
145+
142146
public static final String COSINE_SIMILARITY_FUNCTION = "cosinesimil";
143147

144148
public static final String DEFAULT_INDEX_NAME = "spring-ai-document-index";
@@ -230,6 +234,31 @@ private BulkResponse bulkRequest(BulkRequest bulkRequest) {
230234
}
231235
}
232236

237+
@Override
238+
protected void doDelete(Filter.Expression filterExpression) {
239+
Assert.notNull(filterExpression, "Filter expression must not be null");
240+
241+
try {
242+
String filterStr = this.filterExpressionConverter.convertExpression(filterExpression);
243+
244+
// Create delete by query request
245+
DeleteByQueryRequest request = new DeleteByQueryRequest.Builder().index(this.index)
246+
.query(q -> q.queryString(qs -> qs.query(filterStr)))
247+
.build();
248+
249+
DeleteByQueryResponse response = this.openSearchClient.deleteByQuery(request);
250+
logger.debug("Deleted " + response.deleted() + " documents matching filter expression");
251+
252+
if (!response.failures().isEmpty()) {
253+
throw new IllegalStateException("Failed to delete some documents: " + response.failures());
254+
}
255+
}
256+
catch (Exception e) {
257+
logger.error(e, "Failed to delete documents by filter: " + e.getMessage());
258+
throw new IllegalStateException("Failed to delete documents by filter", e);
259+
}
260+
}
261+
233262
@Override
234263
public List<Document> doSimilaritySearch(SearchRequest searchRequest) {
235264
Assert.notNull(searchRequest, "The search request must not be null.");

0 commit comments

Comments
 (0)