diff --git a/vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStore.java b/vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStore.java index a60ef527cb2..9575ef5bcbf 100644 --- a/vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStore.java +++ b/vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStore.java @@ -31,6 +31,7 @@ import java.util.concurrent.Executor; import java.util.concurrent.Executors; import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.Stream; import com.datastax.oss.driver.api.core.CqlSession; @@ -73,6 +74,7 @@ import org.springframework.ai.vectorstore.AbstractVectorStoreBuilder; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.filter.Filter; import org.springframework.ai.vectorstore.filter.FilterExpressionConverter; import org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore; import org.springframework.ai.vectorstore.observation.VectorStoreObservationContext; @@ -315,6 +317,44 @@ public Optional doDelete(List idList) { return Optional.of(Boolean.TRUE); } + @Override + protected void doDelete(Filter.Expression filterExpression) { + Assert.notNull(filterExpression, "Filter expression must not be null"); + + try { + // TODO - Investigate why we can't do a direct filter based delete in + // Cassandra + // This SO thread seems to indicate that this is not possible in Cassandra + // https://stackoverflow.com/questions/70953262/unable-to-delete-multiple-rows-getting-some-partition-key-parts-are-missing-i + // Needs more research into this matter. + SearchRequest searchRequest = SearchRequest.builder() + .query("") // empty query since we only want filter matches + .filterExpression(filterExpression) + .topK(1000) // large enough to get all matches + .similarityThresholdAll() + .build(); + + List matchingDocs = similaritySearch(searchRequest); + + if (!matchingDocs.isEmpty()) { + // Then delete those documents by ID + List idsToDelete = matchingDocs.stream().map(Document::getId).collect(Collectors.toList()); + + Optional result = delete(idsToDelete); + + if (result.isPresent() && !result.get()) { + throw new IllegalStateException("Failed to delete some documents"); + } + + logger.debug(() -> "Deleted " + idsToDelete.size() + " documents matching filter expression"); + } + } + catch (Exception e) { + logger.error(e, () -> "Failed to delete documents by filter"); + throw new IllegalStateException("Failed to delete documents by filter", e); + } + } + @Override public List doSimilaritySearch(SearchRequest request) { Preconditions.checkArgument(request.getTopK() <= 1000); diff --git a/vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java b/vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java index 1168c7e4533..737131950be 100644 --- a/vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java +++ b/vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023-2025 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.stream.Collectors; import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.CqlSessionBuilder; @@ -42,6 +43,7 @@ import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.cassandra.CassandraVectorStore.SchemaColumn; import org.springframework.ai.vectorstore.cassandra.CassandraVectorStore.SchemaColumnTags; +import org.springframework.ai.vectorstore.filter.Filter; import org.springframework.boot.SpringBootConfiguration; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; @@ -57,6 +59,7 @@ * * @author Mick Semb Wever * @author Thomas Vitale + * @author Soby Chacko * @since 1.0.0 */ @Testcontainers @@ -417,6 +420,110 @@ void searchWithThreshold() { }); } + @Test + void deleteByFilter() { + this.contextRunner.run(context -> { + try (CassandraVectorStore store = createTestStore(context, + new SchemaColumn("country", DataTypes.TEXT, SchemaColumnTags.INDEXED), + new SchemaColumn("year", DataTypes.SMALLINT, SchemaColumnTags.INDEXED))) { + + var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", (short) 2020)); + var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "NL")); + var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", (short) 2023)); + + store.add(List.of(bgDocument, nlDocument, bgDocument2)); + + // Verify initial state + List results = store + .similaritySearch(SearchRequest.builder().query("The World").topK(5).build()); + assertThat(results).hasSize(3); + + // Delete documents with country = BG + Filter.Expression filterExpression = new Filter.Expression(Filter.ExpressionType.EQ, + new Filter.Key("country"), new Filter.Value("BG")); + + store.delete(filterExpression); + + results = store.similaritySearch( + SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build()); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getMetadata()).containsEntry("country", "NL"); + } + }); + } + + @Test + void deleteWithStringFilterExpression() { + this.contextRunner.run(context -> { + try (CassandraVectorStore store = createTestStore(context, + new SchemaColumn("country", DataTypes.TEXT, SchemaColumnTags.INDEXED), + new SchemaColumn("year", DataTypes.SMALLINT, SchemaColumnTags.INDEXED))) { + + var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", (short) 2020)); + var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "NL")); + var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", (short) 2023)); + + store.add(List.of(bgDocument, nlDocument, bgDocument2)); + + // Verify initial state + List results = store + .similaritySearch(SearchRequest.builder().query("The World").topK(5).build()); + assertThat(results).hasSize(3); + + store.delete("country == 'BG'"); + + results = store.similaritySearch( + SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build()); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getMetadata()).containsEntry("country", "NL"); + } + }); + } + + @Test + void deleteWithComplexFilterExpression() { + this.contextRunner.run(context -> { + try (CassandraVectorStore store = createTestStore(context, + new SchemaColumn("type", DataTypes.TEXT, SchemaColumnTags.INDEXED), + new SchemaColumn("priority", DataTypes.SMALLINT, SchemaColumnTags.INDEXED))) { + + var doc1 = new Document("Content 1", Map.of("type", "A", "priority", (short) 1)); + var doc2 = new Document("Content 2", Map.of("type", "A", "priority", (short) 2)); + var doc3 = new Document("Content 3", Map.of("type", "B", "priority", (short) 1)); + + store.add(List.of(doc1, doc2, doc3)); + + // Complex filter expression: (type == 'A' AND priority > 1) + Filter.Expression priorityFilter = new Filter.Expression(Filter.ExpressionType.GT, + new Filter.Key("priority"), new Filter.Value((short) 1)); + Filter.Expression typeFilter = new Filter.Expression(Filter.ExpressionType.EQ, new Filter.Key("type"), + new Filter.Value("A")); + Filter.Expression complexFilter = new Filter.Expression(Filter.ExpressionType.AND, typeFilter, + priorityFilter); + + store.delete(complexFilter); + + var results = store.similaritySearch( + SearchRequest.builder().query("Content").topK(5).similarityThresholdAll().build()); + + assertThat(results).hasSize(2); + assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).collect(Collectors.toList())) + .containsExactlyInAnyOrder("A", "B"); + assertThat(results.stream() + .map(doc -> ((Short) doc.getMetadata().get("priority")).intValue()) + .collect(Collectors.toList())).containsExactlyInAnyOrder(1, 1); + } + }); + } + @SpringBootConfiguration @EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class }) public static class TestApplication {