diff --git a/pom.xml b/pom.xml index 0f21647f870..85ad9588aa7 100644 --- a/pom.xml +++ b/pom.xml @@ -74,6 +74,8 @@ vector-stores/spring-ai-elasticsearch-store spring-ai-spring-boot-starters/spring-ai-starter-watsonx-ai spring-ai-spring-boot-starters/spring-ai-starter-elasticsearch-store + vector-stores/spring-ai-opensearch-store + spring-ai-spring-boot-starters/spring-ai-starter-opensearch-store @@ -151,6 +153,12 @@ 11.6.1 4.5.1 1.7.1 + 2.10.1 + 5.3.1 + + + 1.19.7 + 2.0.1 0.0.4 diff --git a/spring-ai-bom/pom.xml b/spring-ai-bom/pom.xml index d1c857f9da9..43a481c157d 100644 --- a/spring-ai-bom/pom.xml +++ b/spring-ai-bom/pom.xml @@ -210,6 +210,12 @@ ${project.version} + + org.springframework.ai + spring-ai-opensearch-store + ${project.version} + + org.springframework.ai diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc new file mode 100644 index 00000000000..53bc41b81b9 --- /dev/null +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc @@ -0,0 +1,227 @@ += OpenSearch + +This section guides you through setting up the OpenSearch `VectorStore` to store document embeddings and perform similarity searches. + +link:https://opensearch.org[OpenSearch] is an open-source search and analytics engine originally forked from Elasticsearch, distributed under the Apache License 2.0. It enhances AI application development by simplifying the integration and management of AI-generated assets. OpenSearch supports vector, lexical, and hybrid search capabilities, leveraging advanced vector database functionalities to facilitate low-latency queries and similarity searches as detailed on the link:https://opensearch.org/platform/search/vector-database.html[vector database page]. This platform is ideal for building scalable AI-driven applications and offers robust tools for data management, fault tolerance, and resource access controls. + +== Prerequisites + +* A running OpenSearch instance. The following options are available: +** link:https://opensearch.org/docs/latest/opensearch/install/index/[Self-Managed OpenSearch] +** link:https://docs.aws.amazon.com/opensearch-service/[Amazon OpenSearch Service] +* `EmbeddingClient` instance to compute the document embeddings. Several options are available: +- If required, an API key for the xref:api/embeddings.adoc#available-implementations[EmbeddingClient] to generate the +embeddings stored by the `OpenSearchVectorStore`. + +== Dependencies + +Add the OpenSearch Vector Store dependency to your project: + +[source,xml] +---- + + org.springframework.ai + spring-ai-opensearch-store + +---- + +or to your Gradle `build.gradle` build file. + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-opensearch-store' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +== Configuration + +To connect to OpenSearch and use the `OpenSearchVectorStore`, you need to provide access details for your instance. +A simple configuration can either be provided via Spring Boot's `application.yml`, +[source,yaml] +---- +spring: + opensearch: + uris: + username: + password: + indexName: + mappingJson: +# API key if needed, e.g. OpenAI + ai: + openai: + api: + key: +---- +TIP: Check the list of xref:#_configuration_properties[configuration parameters] to learn about the default values and configuration options. + +== Auto-configuration + +Spring AI provides Spring Boot auto-configuration for the OpenSearch Vector Store. +To enable it, add the following dependency to your project's Maven `pom.xml` file: + +[source,xml] +---- + + org.springframework.ai + spring-ai-opensearch-store-spring-boot-starter + +---- + +or to your Gradle `build.gradle` build file. + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-opensearch-store-spring-boot-starter' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +Here is an example of the needed bean: + +[source,java] +---- +@Bean +public EmbeddingClient embeddingClient() { + // Can be any other EmbeddingClient implementation + return new OpenAiEmbeddingClient(new OpenAiApi(System.getenv("SPRING_AI_OPENAI_API_KEY"))); +} +---- + +Now you can auto-wire the `OpenSearchVectorStore` as a vector store in your application. + +[source,java] +---- +@Autowired VectorStore vectorStore; + +// ... + +List documents = List.of( + new Document("Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!", Map.of("meta1", "meta1")), + new Document("The World is Big and Salvation Lurks Around the Corner"), + new Document("You walk forward facing the past and you turn back toward the future.", Map.of("meta2", "meta2"))); + +// Add the documents to OpenSearch +vectorStore.add(List.of(document)); + +// Retrieve documents similar to a query +List results = vectorStore.similaritySearch(SearchRequest.query("Spring").withTopK(5)); +---- + +=== Configuration properties + +You can use the following properties in your Spring Boot configuration to customize the PGVector vector store. + +[cols="2,5,1"] +|=== +|Property| Description | Default value + +|`spring.opensearch.uris`| URIs of the OpenSearch cluster endpoints. | - +|`spring.opensearch.username`| Username for accessing the OpenSearch cluster. | - +|`spring.opensearch.password`| Password for the specified username. | - +|`spring.opensearch.indexName`| Name of the default index to be used within the OpenSearch cluster. | `spring-ai-document-index` +|`spring.opensearch.mappingJson`| JSON string defining the mapping for the index; specifies how documents and their +fields are stored and indexed. | +{ + "properties":{ + "embedding":{ + "type":"knn_vector", + "dimension":1536 + } + } +} +|=== + +=== Customizing OpenSearch Client Configuration + +In cases where the Spring Boot auto-configured OpenSearchClient with `Apache HttpClient 5 Transport` bean is not what +you want or need, you can still define your own bean. +Please read the link:https://opensearch.org/docs/latest/clients/java/[OpenSearch Java Client Documentation] + +for more in-depth information about the configuration of Amazon OpenSearch Service. +To enable it, add the following dependency to your project's Maven `pom.xml` file: + +[source,xml] +---- + + software.amazon.awssdk + apache-client + 2.25.40 + +---- + +or to your Gradle `build.gradle` build file. + +[source,groovy] +---- +dependencies { + implementation 'software.amazon.awssdk:apache-client:2.25.40' +} +---- + +Here is an example of the needed bean: + +[source,java] +---- +@Bean +public OpenSearchClient openSearchClient() { + return new OpenSearchClient( + new AwsSdk2Transport( + ApacheHttpClient.builder().build(), + "search-...us-west-2.es.amazonaws.com", // OpenSearch endpoint, without https:// + "es", + Region.US_WEST_2, // signing service region + AwsSdk2TransportOptions.builder().build()) + ); +} +---- + +== Metadata Filtering + +You can leverage the generic, portable xref:api/vectordbs.adoc#metadata-filters[metadata filters] with OpenSearch as well. + +For example, you can use either the text expression language: + +[source,java] +---- +vectorStore.similaritySearch(SearchRequest.defaults() + .withQuery("The World") + .withTopK(TOP_K) + .withSimilarityThreshold(SIMILARITY_THRESHOLD) + .withFilterExpression("author in ['john', 'jill'] && 'article_type' == 'blog'")); +---- + +or programmatically using the `Filter.Expression` DSL: + +[source,java] +---- +FilterExpressionBuilder b = new FilterExpressionBuilder(); + +vectorStore.similaritySearch(SearchRequest.defaults() + .withQuery("The World") + .withTopK(TOP_K) + .withSimilarityThreshold(SIMILARITY_THRESHOLD) + .withFilterExpression(b.and( + b.in("john", "jill"), + b.eq("article_type", "blog")).build())); +---- + +NOTE: Those (portable) filter expressions get automatically converted into the proprietary OpenSearch link:https://opensearch.org/docs/latest/query-dsl/full-text/query-string/[Query string query]. + +For example, this portable filter expression: + +[source,sql] +---- +author in ['john', 'jill'] && 'article_type' == 'blog' +---- + +is converted into the proprietary OpenSearch filter format: + +[source,text] +---- +(metadata.author:john OR jill) AND metadata.article_type:blog +---- diff --git a/spring-ai-spring-boot-autoconfigure/pom.xml b/spring-ai-spring-boot-autoconfigure/pom.xml index d48275f3d83..a01b11affc9 100644 --- a/spring-ai-spring-boot-autoconfigure/pom.xml +++ b/spring-ai-spring-boot-autoconfigure/pom.xml @@ -267,6 +267,13 @@ true + + org.springframework.ai + spring-ai-opensearch-store + ${project.parent.version} + true + + @@ -354,6 +361,13 @@ test + + org.opensearch + opensearch-testcontainers + ${testcontainers.opensearch.version} + test + + org.skyscreamer jsonassert diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java new file mode 100644 index 00000000000..00a792dd452 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java @@ -0,0 +1,82 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.autoconfigure.vectorstore.opensearch; + +import org.apache.hc.client5.http.auth.AuthScope; +import org.apache.hc.client5.http.auth.UsernamePasswordCredentials; +import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider; +import org.apache.hc.core5.http.HttpHost; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; +import org.springframework.ai.embedding.EmbeddingClient; +import org.springframework.ai.vectorstore.OpenSearchVectorStore; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; + +import java.net.URISyntaxException; +import java.util.Optional; + +@AutoConfiguration +@ConditionalOnClass({ OpenSearchVectorStore.class, EmbeddingClient.class, OpenSearchClient.class }) +@EnableConfigurationProperties(OpenSearchVectorStoreProperties.class) +class OpenSearchVectorStoreAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + OpenSearchVectorStore vectorStore(OpenSearchVectorStoreProperties properties, OpenSearchClient openSearchClient, + EmbeddingClient embeddingClient) { + return new OpenSearchVectorStore( + Optional.ofNullable(properties.getIndexName()).orElse(OpenSearchVectorStore.DEFAULT_INDEX_NAME), + openSearchClient, embeddingClient, Optional.ofNullable(properties.getMappingJson()) + .orElse(OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536)); + } + + @Bean + @ConditionalOnMissingBean + OpenSearchClient openSearchClient(OpenSearchVectorStoreProperties properties) { + HttpHost[] httpHosts = properties.getUris().stream().map(s -> createHttpHost(s)).toArray(HttpHost[]::new); + ApacheHttpClient5TransportBuilder transportBuilder = ApacheHttpClient5TransportBuilder.builder(httpHosts); + + Optional.ofNullable(properties.getUsername()) + .map(username -> createBasicCredentialsProvider(httpHosts[0], username, properties.getPassword())) + .ifPresent(basicCredentialsProvider -> transportBuilder + .setHttpClientConfigCallback(httpAsyncClientBuilder -> httpAsyncClientBuilder + .setDefaultCredentialsProvider(basicCredentialsProvider))); + + return new OpenSearchClient(transportBuilder.build()); + } + + private BasicCredentialsProvider createBasicCredentialsProvider(HttpHost httpHost, String username, + String password) { + BasicCredentialsProvider basicCredentialsProvider = new BasicCredentialsProvider(); + basicCredentialsProvider.setCredentials(new AuthScope(httpHost), + new UsernamePasswordCredentials(username, password.toCharArray())); + return basicCredentialsProvider; + } + + private HttpHost createHttpHost(String s) { + try { + return HttpHost.create(s); + } + catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java new file mode 100644 index 00000000000..723e0b388a4 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java @@ -0,0 +1,80 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.autoconfigure.vectorstore.opensearch; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +import java.util.List; + +@ConfigurationProperties(prefix = OpenSearchVectorStoreProperties.CONFIG_PREFIX) +public class OpenSearchVectorStoreProperties { + + public static final String CONFIG_PREFIX = "spring.ai.vectorstore.opensearch"; + + /** + * Comma-separated list of the OpenSearch instances to use. + */ + private List uris; + + private String indexName; + + private String username; + + private String password; + + private String mappingJson; + + public List getUris() { + return uris; + } + + public void setUris(List uris) { + this.uris = uris; + } + + public String getIndexName() { + return this.indexName; + } + + public void setIndexName(String indexName) { + this.indexName = indexName; + } + + public String getUsername() { + return username; + } + + public void setUsername(String username) { + this.username = username; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public String getMappingJson() { + return mappingJson; + } + + public void setMappingJson(String mappingJson) { + this.mappingJson = mappingJson; + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports index c2816002bcd..1d1532873d9 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports +++ b/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -32,3 +32,4 @@ org.springframework.ai.autoconfigure.anthropic.AnthropicAutoConfiguration org.springframework.ai.autoconfigure.watsonxai.WatsonxAiAutoConfiguration org.springframework.ai.autoconfigure.vectorstore.elasticsearch.ElasticsearchVectorStoreAutoConfiguration org.springframework.ai.autoconfigure.vectorstore.cassandra.CassandraVectorStoreAutoConfiguration +org.springframework.ai.autoconfigure.vectorstore.opensearch.OpenSearchVectorStoreAutoConfiguration diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java new file mode 100644 index 00000000000..219bf7de13c --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfigurationIT.java @@ -0,0 +1,130 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.autoconfigure.vectorstore.opensearch; + +import org.awaitility.Awaitility; +import org.junit.jupiter.api.Test; +import org.opensearch.testcontainers.OpensearchContainer; +import org.springframework.ai.autoconfigure.retry.SpringAiRetryAutoConfiguration; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingClient; +import org.springframework.ai.transformers.TransformersEmbeddingClient; +import org.springframework.ai.vectorstore.OpenSearchVectorStore; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.DefaultResourceLoader; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.hamcrest.Matchers.hasSize; + +@Testcontainers +class OpenSearchVectorStoreAutoConfigurationIT { + + @Container + private static final OpensearchContainer opensearchContainer = new OpensearchContainer<>( + DockerImageName.parse("opensearchproject/opensearch:2.12.0")); + + private static final String DOCUMENT_INDEX = "auto-spring-ai-document-index"; + + private List documents = List.of( + new Document("1", getText("classpath:/test/data/spring.ai.txt"), Map.of("meta1", "meta1")), + new Document("2", getText("classpath:/test/data/time.shelter.txt"), Map.of()), + new Document("3", getText("classpath:/test/data/great.depression.txt"), Map.of("meta2", "meta2"))); + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withConfiguration(AutoConfigurations.of(OpenSearchVectorStoreAutoConfiguration.class, + SpringAiRetryAutoConfiguration.class)) + .withUserConfiguration(Config.class) + .withPropertyValues( + OpenSearchVectorStoreProperties.CONFIG_PREFIX + ".uris=" + opensearchContainer.getHttpHostAddress(), + OpenSearchVectorStoreProperties.CONFIG_PREFIX + ".indexName=" + DOCUMENT_INDEX, + OpenSearchVectorStoreProperties.CONFIG_PREFIX + ".mappingJson=" + """ + { + "properties":{ + "embedding":{ + "type":"knn_vector", + "dimension":384 + } + } + } + """); + + @Test + public void addAndSearchTest() { + + this.contextRunner.run(context -> { + OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); + + vectorStore.add(documents); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(1)); + + List results = vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId()); + assertThat(resultDoc.getContent()).contains("The Great Depression (1929–1939) was an economic shock"); + assertThat(resultDoc.getMetadata()).hasSize(2); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(0)); + }); + } + + private String getText(String uri) { + var resource = new DefaultResourceLoader().getResource(uri); + try { + return resource.getContentAsString(StandardCharsets.UTF_8); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Configuration(proxyBeanMethods = false) + static class Config { + + @Bean + public EmbeddingClient embeddingClient() { + return new TransformersEmbeddingClient(); + } + + } + +} diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-opensearch-store/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-opensearch-store/pom.xml new file mode 100644 index 00000000000..c97eb81ad68 --- /dev/null +++ b/spring-ai-spring-boot-starters/spring-ai-starter-opensearch-store/pom.xml @@ -0,0 +1,42 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + spring-ai-opensearch-store-spring-boot-starter + jar + Spring AI Starter - OpenSearch Store + Spring AI OpenSearch Store Auto Configuration + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.ai + spring-ai-spring-boot-autoconfigure + ${project.parent.version} + + + + org.springframework.ai + spring-ai-opensearch-store + ${project.parent.version} + + + + diff --git a/vector-stores/spring-ai-opensearch-store/pom.xml b/vector-stores/spring-ai-opensearch-store/pom.xml new file mode 100644 index 00000000000..4c11603369d --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/pom.xml @@ -0,0 +1,85 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + spring-ai-opensearch-store + jar + Spring AI Vector Store - OpenSearch + Spring AI OpenSearch Vector Store + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + 4.0.3 + + + + + org.springframework.ai + spring-ai-core + ${parent.version} + + + + org.opensearch.client + opensearch-java + ${opensearch-client.version} + + + + org.apache.httpcomponents.client5 + httpclient5 + ${httpclient5.version} + + + + + org.springframework.ai + spring-ai-openai + ${parent.version} + test + + + + + org.springframework.ai + spring-ai-test + ${parent.version} + test + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.opensearch + opensearch-testcontainers + 2.0.1 + test + + + + org.testcontainers + junit-jupiter + ${testcontainers.version} + test + + + + + diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchAiSearchFilterExpressionConverter.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchAiSearchFilterExpressionConverter.java new file mode 100644 index 00000000000..9035a86d299 --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchAiSearchFilterExpressionConverter.java @@ -0,0 +1,150 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.vectorstore; + +import org.springframework.ai.vectorstore.filter.Filter; +import org.springframework.ai.vectorstore.filter.Filter.Expression; +import org.springframework.ai.vectorstore.filter.Filter.Key; +import org.springframework.ai.vectorstore.filter.converter.AbstractFilterExpressionConverter; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.TimeZone; +import java.util.regex.Pattern; + +/** + * @author Jemin Huh + * @since 1.0.0 + */ +public class OpenSearchAiSearchFilterExpressionConverter extends AbstractFilterExpressionConverter { + + private static final Pattern DATE_FORMAT_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"); + + private final SimpleDateFormat dateFormat; + + public OpenSearchAiSearchFilterExpressionConverter() { + this.dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + @Override + protected void doExpression(Expression expression, StringBuilder context) { + if (expression.type() == Filter.ExpressionType.IN || expression.type() == Filter.ExpressionType.NIN) { + context.append(getOperationSymbol(expression)); + context.append("("); + this.convertOperand(expression.left(), context); + this.convertOperand(expression.right(), context); + context.append(")"); + } + else { + this.convertOperand(expression.left(), context); + context.append(getOperationSymbol(expression)); + this.convertOperand(expression.right(), context); + } + } + + @Override + protected void doStartValueRange(Filter.Value listValue, StringBuilder context) { + } + + @Override + protected void doEndValueRange(Filter.Value listValue, StringBuilder context) { + } + + @Override + protected void doAddValueRangeSpitter(Filter.Value listValue, StringBuilder context) { + context.append(" OR "); + } + + private String getOperationSymbol(Expression exp) { + return switch (exp.type()) { + case AND -> " AND "; + case OR -> " OR "; + case EQ, IN -> ""; + case NE -> " NOT "; + case LT -> "<"; + case LTE -> "<="; + case GT -> ">"; + case GTE -> ">="; + case NIN -> "NOT "; + default -> throw new RuntimeException("Not supported expression type: " + exp.type()); + }; + } + + @Override + public void doKey(Key key, StringBuilder context) { + var identifier = hasOuterQuotes(key.key()) ? removeOuterQuotes(key.key()) : key.key(); + var prefixedIdentifier = withMetaPrefix(identifier); + context.append(prefixedIdentifier.trim()).append(":"); + } + + public String withMetaPrefix(String identifier) { + return "metadata." + identifier; + } + + @Override + protected void doValue(Filter.Value filterValue, StringBuilder context) { + if (filterValue.value() instanceof List list) { + int c = 0; + for (Object v : list) { + context.append(v); + if (c++ < list.size() - 1) { + this.doAddValueRangeSpitter(filterValue, context); + } + } + } + else { + this.doSingleValue(filterValue.value(), context); + } + } + + @Override + protected void doSingleValue(Object value, StringBuilder context) { + if (value instanceof Date date) { + context.append(this.dateFormat.format(date)); + } + else if (value instanceof String text) { + if (DATE_FORMAT_PATTERN.matcher(text).matches()) { + try { + Date date = this.dateFormat.parse(text); + context.append(this.dateFormat.format(date)); + } + catch (ParseException e) { + throw new IllegalArgumentException("Invalid date type:" + text, e); + } + } + else { + context.append(text); + } + } + else { + context.append(value); + } + } + + @Override + public void doStartGroup(Filter.Group group, StringBuilder context) { + context.append("("); + } + + @Override + public void doEndGroup(Filter.Group group, StringBuilder context) { + context.append(")"); + } + +} \ No newline at end of file diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java new file mode 100644 index 00000000000..8e12445d6b7 --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java @@ -0,0 +1,237 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.vectorstore; + +import org.opensearch.client.json.JsonData; +import org.opensearch.client.json.JsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.SortOrder; +import org.opensearch.client.opensearch._types.mapping.TypeMapping; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.opensearch.core.BulkResponse; +import org.opensearch.client.opensearch.core.search.Hit; +import org.opensearch.client.opensearch.indices.CreateIndexRequest; +import org.opensearch.client.opensearch.indices.CreateIndexResponse; +import org.opensearch.client.transport.endpoints.BooleanResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingClient; +import org.springframework.ai.vectorstore.filter.Filter; +import org.springframework.ai.vectorstore.filter.FilterExpressionConverter; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.util.Assert; + +import java.io.IOException; +import java.io.StringReader; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * @author Jemin Huh + * @since 1.0.0 + */ +public class OpenSearchVectorStore implements VectorStore, InitializingBean { + + public static final String COSINE_SIMILARITY_FUNCTION = "cosinesimil"; + + private static final Logger logger = LoggerFactory.getLogger(OpenSearchVectorStore.class); + + public static final String DEFAULT_INDEX_NAME = "spring-ai-document-index"; + + public static final String DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536 = """ + { + "properties":{ + "embedding":{ + "type":"knn_vector", + "dimension":1536 + } + } + } + """; + + private final EmbeddingClient embeddingClient; + + private final OpenSearchClient openSearchClient; + + private final String index; + + private final FilterExpressionConverter filterExpressionConverter; + + private final String mappingJson; + + private String similarityFunction; + + public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingClient embeddingClient) { + this(openSearchClient, embeddingClient, DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536); + } + + public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingClient embeddingClient, + String mappingJson) { + this(DEFAULT_INDEX_NAME, openSearchClient, embeddingClient, mappingJson); + } + + public OpenSearchVectorStore(String index, OpenSearchClient openSearchClient, EmbeddingClient embeddingClient, + String mappingJson) { + Objects.requireNonNull(embeddingClient, "RestClient must not be null"); + Objects.requireNonNull(embeddingClient, "EmbeddingClient must not be null"); + this.openSearchClient = openSearchClient; + this.embeddingClient = embeddingClient; + this.index = index; + this.mappingJson = mappingJson; + this.filterExpressionConverter = new OpenSearchAiSearchFilterExpressionConverter(); + // the potential functions for vector fields at + // https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces + this.similarityFunction = COSINE_SIMILARITY_FUNCTION; + } + + public OpenSearchVectorStore withSimilarityFunction(String similarityFunction) { + this.similarityFunction = similarityFunction; + return this; + } + + @Override + public void add(List documents) { + BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); + for (Document document : documents) { + if (Objects.isNull(document.getEmbedding()) || document.getEmbedding().isEmpty()) { + logger.debug("Calling EmbeddingClient for document id = " + document.getId()); + document.setEmbedding(this.embeddingClient.embed(document)); + } + bulkRequestBuilder + .operations(op -> op.index(idx -> idx.index(this.index).id(document.getId()).document(document))); + } + bulkRequest(bulkRequestBuilder.build()); + } + + @Override + public Optional delete(List idList) { + BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); + for (String id : idList) + bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id))); + return Optional.of(bulkRequest(bulkRequestBuilder.build()).errors()); + } + + private BulkResponse bulkRequest(BulkRequest bulkRequest) { + try { + return this.openSearchClient.bulk(bulkRequest); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public List similaritySearch(SearchRequest searchRequest) { + Assert.notNull(searchRequest, "The search request must not be null."); + return similaritySearch(this.embeddingClient.embed(searchRequest.getQuery()), searchRequest.getTopK(), + searchRequest.getSimilarityThreshold(), searchRequest.getFilterExpression()); + } + + public List similaritySearch(List embedding, int topK, double similarityThreshold, + Filter.Expression filterExpression) { + return similaritySearch(new org.opensearch.client.opensearch.core.SearchRequest.Builder() + .query(getOpenSearchSimilarityQuery(embedding, filterExpression)) + .sort(sortOptionsBuilder -> sortOptionsBuilder + .score(scoreSortBuilder -> scoreSortBuilder.order(SortOrder.Desc))) + .size(topK) + .minScore(similarityThreshold) + .build()); + } + + private Query getOpenSearchSimilarityQuery(List embedding, Filter.Expression filterExpression) { + return Query.of(queryBuilder -> queryBuilder.scriptScore(scriptScoreQueryBuilder -> { + scriptScoreQueryBuilder + .query(queryBuilder2 -> queryBuilder2.queryString(queryStringQuerybuilder -> queryStringQuerybuilder + .query(getOpenSearchQueryString(filterExpression)))) + .script(scriptBuilder -> scriptBuilder + .inline(inlineScriptBuilder -> inlineScriptBuilder.source("knn_score") + .lang("knn") + .params("field", JsonData.of("embedding")) + .params("query_value", JsonData.of(embedding)) + .params("space_type", JsonData.of(this.similarityFunction)))); + // https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script + // k-NN ensures non-negative scores by adding 1 to cosine similarity, + // extending OpenSearch scores to 0-2. + // A 0.5 boost normalizes to 0-1. + return this.similarityFunction.equals(COSINE_SIMILARITY_FUNCTION) ? scriptScoreQueryBuilder.boost(0.5f) + : scriptScoreQueryBuilder; + })); + } + + private String getOpenSearchQueryString(Filter.Expression filterExpression) { + return Objects.isNull(filterExpression) ? "*" + : this.filterExpressionConverter.convertExpression(filterExpression); + + } + + private List similaritySearch(org.opensearch.client.opensearch.core.SearchRequest searchRequest) { + try { + return this.openSearchClient.search(searchRequest, Document.class) + .hits() + .hits() + .stream() + .map(this::toDocument) + .collect(Collectors.toList()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private Document toDocument(Hit hit) { + Document document = hit.source(); + document.getMetadata().put("distance", 1 - hit.score().floatValue()); + return document; + } + + public boolean exists(String targetIndex) { + try { + BooleanResponse response = this.openSearchClient.indices() + .exists(existRequestBuilder -> existRequestBuilder.index(targetIndex)); + return response.value(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private CreateIndexResponse createIndexMapping(String index, String mappingJson) { + JsonpMapper jsonpMapper = openSearchClient._transport().jsonpMapper(); + try { + return this.openSearchClient.indices() + .create(new CreateIndexRequest.Builder().index(index) + .settings(settingsBuilder -> settingsBuilder.knn(true)) + .mappings(TypeMapping._DESERIALIZER.deserialize( + jsonpMapper.jsonProvider().createParser(new StringReader(mappingJson)), jsonpMapper)) + .build()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void afterPropertiesSet() { + if (!exists(this.index)) { + createIndexMapping(this.index, mappingJson); + } + } + +} \ No newline at end of file diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchAiSearchFilterExpressionConverterTest.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchAiSearchFilterExpressionConverterTest.java new file mode 100644 index 00000000000..274f132730e --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchAiSearchFilterExpressionConverterTest.java @@ -0,0 +1,117 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.vectorstore; + +import org.junit.jupiter.api.Test; +import org.springframework.ai.vectorstore.filter.Filter; +import org.springframework.ai.vectorstore.filter.FilterExpressionConverter; + +import java.util.Date; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.springframework.ai.vectorstore.filter.Filter.ExpressionType.*; + +class OpenSearchAiSearchFilterExpressionConverterTest { + + final FilterExpressionConverter converter = new OpenSearchAiSearchFilterExpressionConverter(); + + @Test + public void testDate() { + String vectorExpr = converter.convertExpression(new Filter.Expression(EQ, new Filter.Key("activationDate"), + new Filter.Value(new Date(1704637752148L)))); + assertThat(vectorExpr).isEqualTo("metadata.activationDate:2024-01-07T14:29:12Z"); + + vectorExpr = converter.convertExpression( + new Filter.Expression(EQ, new Filter.Key("activationDate"), new Filter.Value("1970-01-01T00:00:02Z"))); + assertThat(vectorExpr).isEqualTo("metadata.activationDate:1970-01-01T00:00:02Z"); + } + + @Test + public void testEQ() { + String vectorExpr = converter + .convertExpression(new Filter.Expression(EQ, new Filter.Key("country"), new Filter.Value("BG"))); + assertThat(vectorExpr).isEqualTo("metadata.country:BG"); + } + + @Test + public void tesEqAndGte() { + String vectorExpr = converter.convertExpression(new Filter.Expression(AND, + new Filter.Expression(EQ, new Filter.Key("genre"), new Filter.Value("drama")), + new Filter.Expression(GTE, new Filter.Key("year"), new Filter.Value(2020)))); + assertThat(vectorExpr).isEqualTo("metadata.genre:drama AND metadata.year:>=2020"); + } + + @Test + public void tesIn() { + String vectorExpr = converter.convertExpression(new Filter.Expression(IN, new Filter.Key("genre"), + new Filter.Value(List.of("comedy", "documentary", "drama")))); + assertThat(vectorExpr).isEqualTo("(metadata.genre:comedy OR documentary OR drama)"); + } + + @Test + public void testNe() { + String vectorExpr = converter.convertExpression( + new Filter.Expression(OR, new Filter.Expression(GTE, new Filter.Key("year"), new Filter.Value(2020)), + new Filter.Expression(AND, + new Filter.Expression(EQ, new Filter.Key("country"), new Filter.Value("BG")), + new Filter.Expression(NE, new Filter.Key("city"), new Filter.Value("Sofia"))))); + assertThat(vectorExpr).isEqualTo("metadata.year:>=2020 OR metadata.country:BG AND metadata.city: NOT Sofia"); + } + + @Test + public void testGroup() { + String vectorExpr = converter.convertExpression(new Filter.Expression(AND, + new Filter.Group(new Filter.Expression(OR, + new Filter.Expression(GTE, new Filter.Key("year"), new Filter.Value(2020)), + new Filter.Expression(EQ, new Filter.Key("country"), new Filter.Value("BG")))), + new Filter.Expression(NIN, new Filter.Key("city"), new Filter.Value(List.of("Sofia", "Plovdiv"))))); + assertThat(vectorExpr) + .isEqualTo("(metadata.year:>=2020 OR metadata.country:BG) AND NOT (metadata.city:Sofia OR Plovdiv)"); + } + + @Test + public void tesBoolean() { + String vectorExpr = converter.convertExpression(new Filter.Expression(AND, + new Filter.Expression(AND, new Filter.Expression(EQ, new Filter.Key("isOpen"), new Filter.Value(true)), + new Filter.Expression(GTE, new Filter.Key("year"), new Filter.Value(2020))), + new Filter.Expression(IN, new Filter.Key("country"), new Filter.Value(List.of("BG", "NL", "US"))))); + + assertThat(vectorExpr) + .isEqualTo("metadata.isOpen:true AND metadata.year:>=2020 AND (metadata.country:BG OR NL OR US)"); + } + + @Test + public void testDecimal() { + String vectorExpr = converter.convertExpression(new Filter.Expression(AND, + new Filter.Expression(GTE, new Filter.Key("temperature"), new Filter.Value(-15.6)), + new Filter.Expression(LTE, new Filter.Key("temperature"), new Filter.Value(20.13)))); + + assertThat(vectorExpr).isEqualTo("metadata.temperature:>=-15.6 AND metadata.temperature:<=20.13"); + } + + @Test + public void testComplexIdentifiers() { + String vectorExpr = converter + .convertExpression(new Filter.Expression(EQ, new Filter.Key("\"country 1 2 3\""), new Filter.Value("BG"))); + assertThat(vectorExpr).isEqualTo("metadata.country 1 2 3:BG"); + + vectorExpr = converter + .convertExpression(new Filter.Expression(EQ, new Filter.Key("'country 1 2 3'"), new Filter.Value("BG"))); + assertThat(vectorExpr).isEqualTo("metadata.country 1 2 3:BG"); + } + +} diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java new file mode 100644 index 00000000000..81661ec0deb --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java @@ -0,0 +1,363 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.vectorstore; + +import org.apache.hc.core5.http.HttpHost; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; +import org.opensearch.testcontainers.OpensearchContainer; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingClient; +import org.springframework.ai.openai.OpenAiEmbeddingClient; +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.springframework.context.annotation.Bean; +import org.springframework.core.io.DefaultResourceLoader; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.ZonedDateTime; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +@Testcontainers +@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+") +class OpenSearchVectorStoreIT { + + @Container + private static final OpensearchContainer opensearchContainer = new OpensearchContainer<>( + DockerImageName.parse("opensearchproject/opensearch:2.13.0")); + + private static final String DEFAULT = "cosinesimil"; + + private List documents = List.of( + new Document("1", getText("classpath:/test/data/spring.ai.txt"), Map.of("meta1", "meta1")), + new Document("2", getText("classpath:/test/data/time.shelter.txt"), Map.of()), + new Document("3", getText("classpath:/test/data/great.depression.txt"), Map.of("meta2", "meta2"))); + + @BeforeAll + public static void beforeAll() { + Awaitility.setDefaultPollInterval(2, TimeUnit.SECONDS); + Awaitility.setDefaultPollDelay(Duration.ZERO); + Awaitility.setDefaultTimeout(Duration.ofMinutes(1)); + } + + private String getText(String uri) { + var resource = new DefaultResourceLoader().getResource(uri); + try { + return resource.getContentAsString(StandardCharsets.UTF_8); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private ApplicationContextRunner getContextRunner() { + return new ApplicationContextRunner().withUserConfiguration(TestApplication.class); + } + + @BeforeEach + void cleanDatabase() { + getContextRunner().run(context -> { + VectorStore vectorStore = context.getBean(VectorStore.class); + vectorStore.delete(List.of("_all")); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + public void addAndSearchTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); + + if (!DEFAULT.equals(similarityFunction)) { + vectorStore.withSimilarityFunction(similarityFunction); + } + + vectorStore.add(documents); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(1)); + + List results = vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId()); + assertThat(resultDoc.getContent()).contains("The Great Depression (1929–1939) was an economic shock"); + assertThat(resultDoc.getMetadata()).hasSize(2); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(0)); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + public void searchWithFilters(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); + + if (!DEFAULT.equals(similarityFunction)) { + vectorStore.withSimilarityFunction(similarityFunction); + } + + var bgDocument = new Document("1", "The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", 2020, "activationDate", new Date(1000))); + var nlDocument = new Document("2", "The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "NL", "activationDate", new Date(2000))); + var bgDocument2 = new Document("3", "The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", 2023, "activationDate", new Date(3000))); + + vectorStore.add(List.of(bgDocument, nlDocument, bgDocument2)); + + Awaitility.await() + .until(() -> vectorStore.similaritySearch(SearchRequest.query("The World").withTopK(5)), hasSize(3)); + + List results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country == 'NL'")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(nlDocument.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country == 'BG'")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country == 'BG' && year == 2020")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country in ['BG']")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country in ['BG','NL']")); + + assertThat(results).hasSize(3); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country not in ['BG']")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(nlDocument.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("NOT(country not in ['BG'])")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression( + "activationDate > " + ZonedDateTime.parse("1970-01-01T00:00:02Z").toInstant().toEpochMilli())); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument2.getId()); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore.similaritySearch(SearchRequest.query("The World").withTopK(1)), hasSize(0)); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + public void documentUpdateTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); + if (!DEFAULT.equals(similarityFunction)) { + vectorStore.withSimilarityFunction(similarityFunction); + } + + Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!", + Map.of("meta1", "meta1")); + vectorStore.add(List.of(document)); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Spring").withSimilarityThreshold(0).withTopK(5)), + hasSize(1)); + + List results = vectorStore + .similaritySearch(SearchRequest.query("Spring").withSimilarityThreshold(0).withTopK(5)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(document.getId()); + assertThat(resultDoc.getContent()).isEqualTo("Spring AI rocks!!"); + assertThat(resultDoc.getMetadata()).containsKey("meta1"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + Document sameIdDocument = new Document(document.getId(), + "The World is Big and Salvation Lurks Around the Corner", Map.of("meta2", "meta2")); + + vectorStore.add(List.of(sameIdDocument)); + SearchRequest fooBarSearchRequest = SearchRequest.query("FooBar").withTopK(5); + + Awaitility.await() + .until(() -> vectorStore.similaritySearch(fooBarSearchRequest).get(0).getContent(), + equalTo("The World is Big and Salvation Lurks Around the Corner")); + + results = vectorStore.similaritySearch(fooBarSearchRequest); + + assertThat(results).hasSize(1); + resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(document.getId()); + assertThat(resultDoc.getContent()).isEqualTo("The World is Big and Salvation Lurks Around the Corner"); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(List.of(document.getId())); + + Awaitility.await().until(() -> vectorStore.similaritySearch(fooBarSearchRequest), hasSize(0)); + + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + public void searchThresholdTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); + if (!DEFAULT.equals(similarityFunction)) { + vectorStore.withSimilarityFunction(similarityFunction); + } + + vectorStore.add(documents); + + SearchRequest query = SearchRequest.query("Great Depression") + .withTopK(50) + .withSimilarityThreshold(SearchRequest.SIMILARITY_THRESHOLD_ACCEPT_ALL); + + Awaitility.await().until(() -> vectorStore.similaritySearch(query), hasSize(3)); + + List fullResult = vectorStore.similaritySearch(query); + + List distances = fullResult.stream().map(doc -> (Float) doc.getMetadata().get("distance")).toList(); + + assertThat(distances).hasSize(3); + + float threshold = (distances.get(0) + distances.get(1)) / 2; + + List results = vectorStore.similaritySearch( + SearchRequest.query("Great Depression").withTopK(50).withSimilarityThreshold(1 - threshold)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId()); + assertThat(resultDoc.getContent()).contains("The Great Depression (1929–1939) was an economic shock"); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(50).withSimilarityThreshold(0)), + hasSize(0)); + }); + } + + @SpringBootConfiguration + @EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class }) + public static class TestApplication { + + @Bean + public OpenSearchVectorStore vectorStore(EmbeddingClient embeddingClient) { + try { + return new OpenSearchVectorStore(new OpenSearchClient(ApacheHttpClient5TransportBuilder + .builder(HttpHost.create(opensearchContainer.getHttpHostAddress())) + .build()), embeddingClient); + } + catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @Bean + public EmbeddingClient embeddingClient() { + return new OpenAiEmbeddingClient(new OpenAiApi(System.getenv("OPENAI_API_KEY"))); + } + + } + +}