From 8c05d6863251652f6dbe938a93f87ffcfdd7eb68 Mon Sep 17 00:00:00 2001 From: Omkar Shetkar Date: Sat, 16 Dec 2023 13:29:12 +0530 Subject: [PATCH] - Merge SimplePersistentVectorStore and InMemoryVector store to be SimpleVectorStore --- .../ai/openai/acme/AcmeIT.java | 4 +- .../SimplePersistentVectorStoreIT.java | 6 +- .../SimplePersistentVectorStore.java | 96 ------------------- ...ectorStore.java => SimpleVectorStore.java} | 94 +++++++++++++++++- .../modules/ROOT/pages/api/etl-pipeline.adoc | 2 +- .../modules/ROOT/pages/api/vectordbs.adoc | 2 +- 6 files changed, 97 insertions(+), 107 deletions(-) delete mode 100644 spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimplePersistentVectorStore.java rename spring-ai-core/src/main/java/org/springframework/ai/vectorstore/{InMemoryVectorStore.java => SimpleVectorStore.java} (53%) diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/acme/AcmeIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/acme/AcmeIT.java index b78e25d4b10..0f3dc040977 100644 --- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/acme/AcmeIT.java +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/acme/AcmeIT.java @@ -22,7 +22,7 @@ import org.springframework.ai.reader.JsonReader; import org.springframework.ai.retriever.VectorStoreRetriever; import org.springframework.ai.transformer.splitter.TokenTextSplitter; -import org.springframework.ai.vectorstore.InMemoryVectorStore; +import org.springframework.ai.vectorstore.SimpleVectorStore; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -67,7 +67,7 @@ void acmeChain() { // Step 2 - Create embeddings and save to vector store logger.info("Creating Embeddings..."); - VectorStore vectorStore = new InMemoryVectorStore(embeddingClient); + VectorStore vectorStore = new SimpleVectorStore(embeddingClient); vectorStore.accept(textSplitter.apply(jsonReader.get())); diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/vectorstore/SimplePersistentVectorStoreIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/vectorstore/SimplePersistentVectorStoreIT.java index cbdf1365bf2..af73b4e08b8 100644 --- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/vectorstore/SimplePersistentVectorStoreIT.java +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/vectorstore/SimplePersistentVectorStoreIT.java @@ -6,7 +6,7 @@ import org.springframework.ai.document.Document; import org.springframework.ai.embedding.EmbeddingClient; import org.springframework.ai.reader.JsonReader; -import org.springframework.ai.vectorstore.SimplePersistentVectorStore; +import org.springframework.ai.vectorstore.SimpleVectorStore; import org.springframework.ai.reader.JsonMetadataGenerator; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -34,14 +34,14 @@ void persist(@TempDir(cleanup = CleanupMode.ON_SUCCESS) Path workingDir) { JsonReader jsonReader = new JsonReader(bikesJsonResource, new ProductMetadataGenerator(), "price", "name", "shortDescription", "description", "tags"); List documents = jsonReader.get(); - SimplePersistentVectorStore vectorStore = new SimplePersistentVectorStore(this.embeddingClient); + SimpleVectorStore vectorStore = new SimpleVectorStore(this.embeddingClient); vectorStore.add(documents); File tempFile = new File(workingDir.toFile(), "temp.txt"); vectorStore.save(tempFile); assertThat(tempFile).isNotEmpty(); assertThat(tempFile).content().contains("Velo 99 XR1 AXS"); - SimplePersistentVectorStore vectorStore2 = new SimplePersistentVectorStore(this.embeddingClient); + SimpleVectorStore vectorStore2 = new SimpleVectorStore(this.embeddingClient); vectorStore2.load(tempFile); List similaritySearch = vectorStore2.similaritySearch("Velo 99 XR1 AXS"); diff --git a/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimplePersistentVectorStore.java b/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimplePersistentVectorStore.java deleted file mode 100644 index 81b39e21d69..00000000000 --- a/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimplePersistentVectorStore.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.springframework.ai.vectorstore; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.ai.document.Document; -import org.springframework.ai.embedding.EmbeddingClient; -import org.springframework.core.io.Resource; -import org.springframework.util.StreamUtils; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; -import java.util.HashMap; -import java.util.Map; - -/** - * Adds simple serialization/deserialization to the data stored in the InMemoryVectorStore - */ -public class SimplePersistentVectorStore extends InMemoryVectorStore { - - private static final Logger logger = LoggerFactory.getLogger(SimplePersistentVectorStore.class); - - public SimplePersistentVectorStore(EmbeddingClient embeddingClient) { - super(embeddingClient); - } - - public void save(File file) { - String json = getVectorDbAsJson(); - try { - if (!file.exists()) { - logger.info("Creating new vector store file: " + file); - file.createNewFile(); - } - else { - logger.info("Replacing existing vector store file: " + file); - file.delete(); - file.createNewFile(); - } - } - catch (IOException ex) { - throw new RuntimeException(ex); - } - try (OutputStream stream = new FileOutputStream(file)) { - StreamUtils.copy(json, Charset.forName("UTF-8"), stream); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - - public void load(File file) { - TypeReference> typeRef = new TypeReference<>() { - }; - ObjectMapper objectMapper = new ObjectMapper(); - try { - Map deserializedMap = objectMapper.readValue(file, typeRef); - this.store = deserializedMap; - } - catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - public void load(Resource resource) { - TypeReference> typeRef = new TypeReference<>() { - }; - ObjectMapper objectMapper = new ObjectMapper(); - try { - Map deserializedMap = objectMapper.readValue(resource.getInputStream(), typeRef); - this.store = deserializedMap; - } - catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - private String getVectorDbAsJson() { - ObjectMapper objectMapper = new ObjectMapper(); - ObjectWriter objectWriter = objectMapper.writerWithDefaultPrettyPrinter(); - String json; - try { - json = objectWriter.writeValueAsString(this.store); - } - catch (JsonProcessingException e) { - throw new RuntimeException("Error serializing documentMap to JSON.", e); - } - return json; - } - -} diff --git a/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/InMemoryVectorStore.java b/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStore.java similarity index 53% rename from spring-ai-core/src/main/java/org/springframework/ai/vectorstore/InMemoryVectorStore.java rename to spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStore.java index 25ec4dde352..9695576a165 100644 --- a/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/InMemoryVectorStore.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/vectorstore/SimpleVectorStore.java @@ -1,10 +1,21 @@ package org.springframework.ai.vectorstore; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; import org.springframework.ai.embedding.EmbeddingClient; - +import org.springframework.core.io.Resource; +import org.springframework.util.StreamUtils; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; import java.util.*; import java.util.concurrent.ConcurrentHashMap; @@ -14,15 +25,15 @@ * @author Mark Pollack * @author Christian Tzolov */ -public class InMemoryVectorStore implements VectorStore { +public class SimpleVectorStore implements VectorStore { - private static final Logger logger = LoggerFactory.getLogger(InMemoryVectorStore.class); + private static final Logger logger = LoggerFactory.getLogger(SimpleVectorStore.class); protected Map store = new ConcurrentHashMap<>(); protected EmbeddingClient embeddingClient; - public InMemoryVectorStore(EmbeddingClient embeddingClient) { + public SimpleVectorStore(EmbeddingClient embeddingClient) { Objects.requireNonNull(embeddingClient, "EmbeddingClient must not be null"); this.embeddingClient = embeddingClient; } @@ -66,6 +77,81 @@ public List similaritySearch(SearchRequest request) { return similarities; } + /** + * Serialize the vector store content into a file in JSON format. + * @param file the file to save the vector store content + */ + public void save(File file) { + String json = getVectorDbAsJson(); + try { + if (!file.exists()) { + logger.info("Creating new vector store file: " + file); + file.createNewFile(); + } + else { + logger.info("Replacing existing vector store file: " + file); + file.delete(); + file.createNewFile(); + } + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + try (OutputStream stream = new FileOutputStream(file)) { + StreamUtils.copy(json, Charset.forName("UTF-8"), stream); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Deserialize the vector store content from a file in JSON format into memory. + * @param file the file to load the vector store content + */ + public void load(File file) { + TypeReference> typeRef = new TypeReference<>() { + }; + ObjectMapper objectMapper = new ObjectMapper(); + try { + Map deserializedMap = objectMapper.readValue(file, typeRef); + this.store = deserializedMap; + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Deserialize the vector store content from a resource in JSON format into memory. + * @param resource the resource to load the vector store content + */ + public void load(Resource resource) { + TypeReference> typeRef = new TypeReference<>() { + }; + ObjectMapper objectMapper = new ObjectMapper(); + try { + Map deserializedMap = objectMapper.readValue(resource.getInputStream(), typeRef); + this.store = deserializedMap; + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + private String getVectorDbAsJson() { + ObjectMapper objectMapper = new ObjectMapper(); + ObjectWriter objectWriter = objectMapper.writerWithDefaultPrettyPrinter(); + String json; + try { + json = objectWriter.writeValueAsString(this.store); + } + catch (JsonProcessingException e) { + throw new RuntimeException("Error serializing documentMap to JSON.", e); + } + return json; + } + private List getUserQueryEmbedding(String query) { List userQueryEmbedding = this.embeddingClient.embed(query); return userQueryEmbedding; diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/etl-pipeline.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/etl-pipeline.adoc index e37aa43c898..b973c28605f 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/etl-pipeline.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/etl-pipeline.adoc @@ -84,7 +84,7 @@ public interface DocumentWriter extends Consumer> { *PgVectorStore*:: + Provides vector storage capabilities using PostgreSQL. -*SimplePersistentVectorStore*:: +*SimpleVectorStore*:: + A straightforward approach to persistent vector storage. *InMemoryVectorStore*:: diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc index 3f80c74abd5..09e92840ec8 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc @@ -87,7 +87,7 @@ country == 'UK' && year >= 2020 && isActive == true. These are the available implementations of the `VectorStore` interface: -* `InMemoryVectorStore` and `SimplePersistentVectorStore`. +* `SimpleVectorStore` * Pinecone: https://www.pinecone.io/[PineCone] vector store. * PgVector [`PgVectorStore`]: The https://github.com/pgvector/pgvector[PostgreSQL/PGVector] vector store. * Milvus [`MilvusVectorStore`]: The https://milvus.io/[Milvus] vector store