From c66dfbfa1fd59837f3856d1c18bba12fb0a5a785 Mon Sep 17 00:00:00 2001 From: Ilayaperumal Gopinathan Date: Tue, 12 Nov 2024 12:42:13 +0000 Subject: [PATCH 1/3] GH-1589 Fix OpenSearch client indices mapping - Fix the embedding dimension configuration for opensearch client indices mapping - The dimension config is obtained by the underlying embedding model's dimension Resolves #1589 --- .../OpenSearchVectorStoreAutoConfiguration.java | 2 +- .../ai/vectorstore/OpenSearchVectorStore.java | 9 ++++----- .../ai/vectorstore/OpenSearchVectorStoreIT.java | 2 +- .../vectorstore/OpenSearchVectorStoreObservationIT.java | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java index 78fc694a7b4..83f45c2ffef 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java @@ -75,7 +75,7 @@ OpenSearchVectorStore vectorStore(OpenSearchVectorStoreProperties properties, Op BatchingStrategy batchingStrategy) { var indexName = Optional.ofNullable(properties.getIndexName()).orElse(OpenSearchVectorStore.DEFAULT_INDEX_NAME); var mappingJson = Optional.ofNullable(properties.getMappingJson()) - .orElse(OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536); + .orElse(OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION); return new OpenSearchVectorStore(indexName, openSearchClient, embeddingModel, mappingJson, properties.isInitializeSchema(), observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP), customObservationConvention.getIfAvailable(() -> null), batchingStrategy); diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java index 35710cf9d80..68bcde773b8 100644 --- a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java @@ -69,12 +69,12 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem public static final String DEFAULT_INDEX_NAME = "spring-ai-document-index"; - public static final String DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536 = """ + public static final String DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION = """ { "properties":{ "embedding":{ "type":"knn_vector", - "dimension":1536 + "dimension":%s } } } @@ -100,8 +100,7 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, boolean initializeSchema) { - this(openSearchClient, embeddingModel, DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536, - initializeSchema); + this(openSearchClient, embeddingModel, DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION, initializeSchema); } public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, String mappingJson, @@ -263,7 +262,7 @@ private CreateIndexResponse createIndexMapping(String index, String mappingJson) @Override public void afterPropertiesSet() { if (this.initializeSchema && !exists(this.index)) { - createIndexMapping(this.index, this.mappingJson); + createIndexMapping(this.index, String.format(this.mappingJson, this.embeddingModel.dimensions())); } } diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java index 26cf006b817..6652ecd7882 100644 --- a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java @@ -411,7 +411,7 @@ public OpenSearchVectorStore anotherVectorStore(EmbeddingModel embeddingModel) { new OpenSearchClient(ApacheHttpClient5TransportBuilder .builder(HttpHost.create(opensearchContainer.getHttpHostAddress())) .build()), - embeddingModel, OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536, + embeddingModel, OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION, true); } catch (URISyntaxException e) { diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreObservationIT.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreObservationIT.java index 7ce5101a110..69a36ce52cd 100644 --- a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreObservationIT.java +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreObservationIT.java @@ -209,8 +209,8 @@ public OpenSearchVectorStore vectorStore(EmbeddingModel embeddingModel, new OpenSearchClient(ApacheHttpClient5TransportBuilder .builder(HttpHost.create(opensearchContainer.getHttpHostAddress())) .build()), - embeddingModel, OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536, - true, observationRegistry, null, new TokenCountBatchingStrategy()); + embeddingModel, OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION, true, + observationRegistry, null, new TokenCountBatchingStrategy()); } catch (URISyntaxException e) { throw new RuntimeException(e); From 4cb1f7acc7d957b960d170eafbc411a8f28ed110 Mon Sep 17 00:00:00 2001 From: Ilayaperumal Gopinathan Date: Tue, 12 Nov 2024 18:02:00 +0000 Subject: [PATCH 2/3] Add tests - Verify the mappingJson field is correctly set - verify the override works fine - Add integration tests with Ollama embedding model --- ...nSearchVectorStoreAutoConfigurationIT.java | 10 + .../spring-ai-opensearch-store/pom.xml | 7 + .../OpenSearchVectorStoreWithOllamaIT.java | 187 ++++++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreWithOllamaIT.java diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java index d5b4876076d..55022d938b4 100644 --- a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java +++ b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java @@ -89,6 +89,16 @@ public void addAndSearchTest() { this.contextRunner.run(context -> { OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); TestObservationRegistry observationRegistry = context.getBean(TestObservationRegistry.class); + assertThat(vectorStore).isNotNull(); + assertThat(vectorStore).hasFieldOrPropertyWithValue("mappingJson", """ + { + "properties":{ + "embedding":{ + "type":"knn_vector", + "dimension":384 + } + } + }"""); vectorStore.add(this.documents); diff --git a/vector-stores/spring-ai-opensearch-store/pom.xml b/vector-stores/spring-ai-opensearch-store/pom.xml index d7cd5d1746b..824e3bc7ae6 100644 --- a/vector-stores/spring-ai-opensearch-store/pom.xml +++ b/vector-stores/spring-ai-opensearch-store/pom.xml @@ -68,6 +68,13 @@ test + + org.springframework.ai + spring-ai-ollama + ${parent.version} + test + + org.springframework.ai diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreWithOllamaIT.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreWithOllamaIT.java new file mode 100644 index 00000000000..bcccbeae168 --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreWithOllamaIT.java @@ -0,0 +1,187 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.vectorstore; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import org.apache.hc.core5.http.HttpHost; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; +import org.opensearch.testcontainers.OpensearchContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.ollama.OllamaEmbeddingModel; +import org.springframework.ai.ollama.api.OllamaApi; +import org.springframework.ai.ollama.api.OllamaModel; +import org.springframework.ai.ollama.api.OllamaOptions; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.springframework.context.annotation.Bean; +import org.springframework.core.io.DefaultResourceLoader; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.hamcrest.Matchers.hasSize; + +@Testcontainers +@EnabledIfEnvironmentVariable(named = "OLLAMA_TESTS_ENABLED", matches = "true") +class OpenSearchVectorStoreWithOllamaIT { + + @Container + private static final OpensearchContainer opensearchContainer = new OpensearchContainer<>( + OpenSearchImage.DEFAULT_IMAGE); + + private static final String DEFAULT = "cosinesimil"; + + private List documents = List.of( + new Document("1", getText("classpath:/test/data/spring.ai.txt"), Map.of("meta1", "meta1")), + new Document("2", getText("classpath:/test/data/time.shelter.txt"), Map.of()), + new Document("3", getText("classpath:/test/data/great.depression.txt"), Map.of("meta2", "meta2"))); + + @BeforeAll + public static void beforeAll() { + Awaitility.setDefaultPollInterval(2, TimeUnit.SECONDS); + Awaitility.setDefaultPollDelay(Duration.ZERO); + Awaitility.setDefaultTimeout(Duration.ofMinutes(1)); + } + + private String getText(String uri) { + var resource = new DefaultResourceLoader().getResource(uri); + try { + return resource.getContentAsString(StandardCharsets.UTF_8); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private ApplicationContextRunner getContextRunner() { + return new ApplicationContextRunner().withUserConfiguration(TestApplication.class); + } + + @BeforeEach + void cleanDatabase() { + getContextRunner().run(context -> { + VectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class); + vectorStore.delete(List.of("_all")); + + VectorStore anotherVectorStore = context.getBean("anotherVectorStore", OpenSearchVectorStore.class); + anotherVectorStore.delete(List.of("_all")); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + public void addAndSearchTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class); + + if (!DEFAULT.equals(similarityFunction)) { + vectorStore.withSimilarityFunction(similarityFunction); + } + + vectorStore.add(this.documents); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(1)); + + List results = vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(this.documents.get(2).getId()); + assertThat(resultDoc.getContent()).contains("The Great Depression (1929–1939) was an economic shock"); + assertThat(resultDoc.getMetadata()).hasSize(2); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(this.documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(0)); + }); + } + + @SpringBootConfiguration + public static class TestApplication { + + @Bean + @Qualifier("vectorStore") + public OpenSearchVectorStore vectorStore(EmbeddingModel embeddingModel) { + try { + return new OpenSearchVectorStore(new OpenSearchClient(ApacheHttpClient5TransportBuilder + .builder(HttpHost.create(opensearchContainer.getHttpHostAddress())) + .build()), embeddingModel, true); + } + catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @Bean + @Qualifier("anotherVectorStore") + public OpenSearchVectorStore anotherVectorStore(EmbeddingModel embeddingModel) { + try { + return new OpenSearchVectorStore("another_index", + new OpenSearchClient(ApacheHttpClient5TransportBuilder + .builder(HttpHost.create(opensearchContainer.getHttpHostAddress())) + .build()), + embeddingModel, OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION, + true); + } + catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @Bean + public EmbeddingModel embeddingModel() { + return OllamaEmbeddingModel.builder() + .withOllamaApi(new OllamaApi()) + .withDefaultOptions(OllamaOptions.create() + .withModel(OllamaModel.MXBAI_EMBED_LARGE) + .withMainGPU(11) + .withUseMMap(true) + .withNumGPU(1)) + .build(); + } + + } + +} From 910484e5a4b0e640fb6d3674a31af7ffd016a0c6 Mon Sep 17 00:00:00 2001 From: Ilayaperumal Gopinathan Date: Tue, 12 Nov 2024 18:17:13 +0000 Subject: [PATCH 3/3] Update docs to refer to sample mappings configuration --- .../antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc index d7be154415b..66e6e7dfefc 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc @@ -174,7 +174,7 @@ You can use the following properties in your Spring Boot configuration to custom |`spring.ai.vectorstore.opensearch.password`| Password for the specified username. | - |`spring.ai.vectorstore.opensearch.indexName`| Name of the default index to be used within the OpenSearch cluster. | `spring-ai-document-index` |`spring.ai.vectorstore.opensearch.mappingJson`| JSON string defining the mapping for the index; specifies how documents and their -fields are stored and indexed. | +fields are stored and indexed. Refer link:https://opensearch.org/docs/latest/search-plugins/vector-search/[here] for some sample configurations | { "properties":{ "embedding":{