Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
package org.springframework.ai.vectorstore.pinecone;

import java.time.Duration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
Expand All @@ -31,10 +33,12 @@
import io.pinecone.PineconeConnection;
import io.pinecone.PineconeConnectionConfig;
import io.pinecone.proto.DeleteRequest;
import io.pinecone.proto.DeleteResponse;
import io.pinecone.proto.QueryRequest;
import io.pinecone.proto.QueryResponse;
import io.pinecone.proto.UpsertRequest;
import io.pinecone.proto.Vector;
import org.apache.commons.logging.LogFactory;

import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentMetadata;
Expand All @@ -46,10 +50,12 @@
import org.springframework.ai.observation.conventions.VectorStoreProvider;
import org.springframework.ai.vectorstore.AbstractVectorStoreBuilder;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.filter.Filter;
import org.springframework.ai.vectorstore.filter.FilterExpressionConverter;
import org.springframework.ai.vectorstore.filter.converter.PineconeFilterExpressionConverter;
import org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore;
import org.springframework.ai.vectorstore.observation.VectorStoreObservationContext;
import org.springframework.core.log.LogAccessor;
import org.springframework.lang.Nullable;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
Expand Down Expand Up @@ -82,6 +88,8 @@ public class PineconeVectorStore extends AbstractObservationVectorStore {

private final ObjectMapper objectMapper;

private static final LogAccessor logger = new LogAccessor(LogFactory.getLog(PineconeVectorStore.class));

/**
* Creates a new PineconeVectorStore using the builder pattern.
* @param builder The configured builder instance
Expand Down Expand Up @@ -248,6 +256,43 @@ public List<Document> similaritySearch(SearchRequest request, String namespace)
.toList();
}

@Override
protected void doDelete(Filter.Expression filterExpression) {
Assert.notNull(filterExpression, "Filter expression must not be null");

try {
// Direct filter based deletion is not working in pinecone, so we are
// retrieving the documents
// by doing a similarity search with an empty query and then passing the ID's
// of the documents to the delete(Id) API method.
SearchRequest searchRequest = SearchRequest.builder()
.query("") // empty query since we only want filter matches
.filterExpression(filterExpression)
.topK(10000) // large enough to get all matches
.similarityThresholdAll()
.build();

List<Document> matchingDocs = similaritySearch(searchRequest, this.pineconeNamespace);

if (!matchingDocs.isEmpty()) {
// Then delete those documents by ID
List<String> idsToDelete = matchingDocs.stream().map(Document::getId).collect(Collectors.toList());

Optional<Boolean> result = delete(idsToDelete, this.pineconeNamespace);

if (result.isPresent() && !result.get()) {
throw new IllegalStateException("Failed to delete some documents");
}

logger.debug(() -> "Deleted " + idsToDelete.size() + " documents matching filter expression");
}
}
catch (Exception e) {
logger.error(e, () -> "Failed to delete documents by filter");
throw new IllegalStateException("Failed to delete documents by filter", e);
}
}

@Override
public List<Document> doSimilaritySearch(SearchRequest request) {
return similaritySearch(request, this.pineconeNamespace);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2023-2024 the original author or authors.
* Copyright 2023-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,7 @@
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import org.awaitility.Awaitility;
import org.awaitility.Duration;
Expand All @@ -36,6 +37,7 @@
import org.springframework.ai.transformers.TransformersEmbeddingModel;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.Filter;
import org.springframework.boot.SpringBootConfiguration;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
Expand All @@ -49,6 +51,7 @@
/**
* @author Christian Tzolov
* @author Thomas Vitale
* @author Soby Chacko
*/
@EnabledIfEnvironmentVariable(named = "PINECONE_API_KEY", matches = ".+")
public class PineconeVectorStoreIT {
Expand All @@ -66,6 +69,8 @@ public class PineconeVectorStoreIT {

private static final String CUSTOM_CONTENT_FIELD_NAME = "article";

private static final int DEFAULT_TOP_K = 50;

private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
.withUserConfiguration(TestApplication.class);

Expand Down Expand Up @@ -283,6 +288,135 @@ public void searchThresholdTest() {
});
}

@Test
void deleteByFilter() {
this.contextRunner.run(context -> {
VectorStore vectorStore = context.getBean(VectorStore.class);

cleanupExistingDocuments(vectorStore, "The World");

var documents = createWorldDocuments();
vectorStore.add(documents);

awaitDocumentsCount(vectorStore, "The World", 3);

Filter.Expression filterExpression = new Filter.Expression(Filter.ExpressionType.EQ,
new Filter.Key("country"), new Filter.Value("BG"));

vectorStore.delete(filterExpression);

awaitDocumentsCount(vectorStore, "The World", 1);

List<Document> results = searchDocuments(vectorStore, "The World", 5);
assertThat(results).hasSize(1);
assertThat(results.get(0).getMetadata()).containsEntry("country", "NL");

vectorStore.delete(List.of(documents.get(1).getId())); // nlDocument
awaitDocumentsCount(vectorStore, "The World", 0);
});
}

@Test
void deleteWithStringFilterExpression() {
this.contextRunner.run(context -> {
VectorStore vectorStore = context.getBean(VectorStore.class);

cleanupExistingDocuments(vectorStore, "The World");

var documents = createWorldDocuments();
vectorStore.add(documents);

awaitDocumentsCount(vectorStore, "The World", 3);

vectorStore.delete("country == 'BG'");

awaitDocumentsCount(vectorStore, "The World", 1);

List<Document> results = searchDocuments(vectorStore, "The World", 5);
assertThat(results).hasSize(1);
assertThat(results.get(0).getMetadata()).containsEntry("country", "NL");

vectorStore.delete(List.of(documents.get(1).getId())); // nlDocument
awaitDocumentsCount(vectorStore, "The World", 0);
});
}

@Test
void deleteWithComplexFilterExpression() {
this.contextRunner.run(context -> {
VectorStore vectorStore = context.getBean(VectorStore.class);

cleanupExistingDocuments(vectorStore, "Content");

var documents = createContentDocuments();
vectorStore.add(documents);

awaitDocumentsCount(vectorStore, "Content", 3);

Filter.Expression complexFilter = createComplexFilter();
vectorStore.delete(complexFilter);

awaitDocumentsCount(vectorStore, "Content", 2);

List<Document> results = searchDocuments(vectorStore, "Content", 5);
assertThat(results).hasSize(2);
assertComplexFilterResults(results);

vectorStore.delete(List.of(documents.get(0).getId(), documents.get(2).getId())); // doc1
// and
// doc3
awaitDocumentsCount(vectorStore, "Content", 0);
});
}

private void cleanupExistingDocuments(VectorStore vectorStore, String query) {
List<Document> existingDocs = searchDocuments(vectorStore, query, DEFAULT_TOP_K);
if (!existingDocs.isEmpty()) {
vectorStore.delete(existingDocs.stream().map(Document::getId).toList());
}
awaitDocumentsCount(vectorStore, query, 0);
}

private List<Document> createWorldDocuments() {
return List.of(
new Document("The World is Big and Salvation Lurks Around the Corner",
Map.of("country", "BG", "year", 2020)),
new Document("The World is Big and Salvation Lurks Around the Corner", Map.of("country", "NL")),
new Document("The World is Big and Salvation Lurks Around the Corner",
Map.of("country", "BG", "year", 2023)));
}

private List<Document> createContentDocuments() {
return List.of(new Document("Content 1", Map.of("type", "A", "priority", 1)),
new Document("Content 2", Map.of("type", "A", "priority", 2)),
new Document("Content 3", Map.of("type", "B", "priority", 1)));
}

private Filter.Expression createComplexFilter() {
Filter.Expression priorityFilter = new Filter.Expression(Filter.ExpressionType.GT, new Filter.Key("priority"),
new Filter.Value(1));
Filter.Expression typeFilter = new Filter.Expression(Filter.ExpressionType.EQ, new Filter.Key("type"),
new Filter.Value("A"));
return new Filter.Expression(Filter.ExpressionType.AND, typeFilter, priorityFilter);
}

private void assertComplexFilterResults(List<Document> results) {
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).collect(Collectors.toList()))
.containsExactlyInAnyOrder("A", "B");
assertThat(results.stream()
.map(doc -> ((Number) doc.getMetadata().get("priority")).intValue())
.collect(Collectors.toList())).containsExactlyInAnyOrder(1, 1);
}

private List<Document> searchDocuments(VectorStore vectorStore, String query, int topK) {
return vectorStore
.similaritySearch(SearchRequest.builder().query(query).topK(topK).similarityThresholdAll().build());
}

private void awaitDocumentsCount(VectorStore vectorStore, String query, int expectedCount) {
Awaitility.await().until(() -> searchDocuments(vectorStore, query, DEFAULT_TOP_K), hasSize(expectedCount));
}

@SpringBootConfiguration
@EnableAutoConfiguration
public static class TestApplication {
Expand Down