From 875f40c115247687be5c5025e77d4b41503c6dc6 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Fri, 6 Sep 2024 12:49:25 +0200 Subject: [PATCH 1/6] handling missing index case --- .../ai/vectorstore/ElasticsearchVectorStore.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java index 56db121a85f..68836a6c6a2 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java @@ -112,6 +112,12 @@ public ElasticsearchVectorStore(ElasticsearchVectorStoreOptions options, RestCli @Override public void doAdd(List documents) { + // Elasticsearch can automatically create an index if it does not exist, but it + // will always use the default similarity function 'cosine' + if (!indexExists() && !options.getSimilarity().equals(SimilarityFunction.cosine)) { + throw new IllegalArgumentException( + "Index not found, cannot use similarity functions other than 'cosine' if the index has not been previously configured"); + } BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); for (Document document : documents) { @@ -119,13 +125,8 @@ public void doAdd(List documents) { logger.debug("Calling EmbeddingModel for document id = " + document.getId()); document.setEmbedding(this.embeddingModel.embed(document)); } - // We call operations on BulkRequest.Builder only if the index exists. - // For the index to be present, either it must be pre-created or set the - // initializeSchema to true. - if (indexExists()) { - bulkRequestBuilder.operations(op -> op - .index(idx -> idx.index(this.options.getIndexName()).id(document.getId()).document(document))); - } + bulkRequestBuilder.operations(op -> op + .index(idx -> idx.index(this.options.getIndexName()).id(document.getId()).document(document))); } BulkResponse bulkRequest = bulkRequest(bulkRequestBuilder.build()); if (bulkRequest.errors()) { From 7c00681dc32607aaac87eaf7f827e2e83b6bf893 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Fri, 6 Sep 2024 13:23:02 +0200 Subject: [PATCH 2/6] docs update --- .../modules/ROOT/pages/api/vectordbs/elasticsearch.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc index 1a70b2bd19c..606adf2c88d 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc @@ -50,6 +50,10 @@ TIP: Refer to the xref:getting-started.adoc#repositories[Repositories] section t The vector store implementation can initialize the requisite schema for you, but you must opt-in by specifying the `initializeSchema` boolean in the appropriate constructor or by setting `...initialize-schema=true` in the `application.properties` file. +Alternatively you can opt-out the initialization and either: + +- let Elasticsearch create the index automatically when the first batch of data is indexed - this works well for simple usecases, or for when the vector dimension is unknown: Elasticsearch will be able to infer it from the data received. The only similarity function allowed using this approach is `cosine`. +- create the index manually using the Elasticsearch client - useful if the index needs advanced mapping or additional configuration which `initializeSchema` does not provide. NOTE: this is a breaking change! In earlier versions of Spring AI, this schema initialization happened by default. From c75f1cac92bbef05224549d73cb3b2050b149850 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 9 Sep 2024 16:12:38 +0200 Subject: [PATCH 3/6] conform elasticsearch to global settings, updated documentation --- .../modules/ROOT/pages/api/vectordbs/elasticsearch.adoc | 6 ++---- .../ai/vectorstore/ElasticsearchVectorStore.java | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc index 606adf2c88d..a791e588b25 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/elasticsearch.adoc @@ -50,16 +50,14 @@ TIP: Refer to the xref:getting-started.adoc#repositories[Repositories] section t The vector store implementation can initialize the requisite schema for you, but you must opt-in by specifying the `initializeSchema` boolean in the appropriate constructor or by setting `...initialize-schema=true` in the `application.properties` file. -Alternatively you can opt-out the initialization and either: - -- let Elasticsearch create the index automatically when the first batch of data is indexed - this works well for simple usecases, or for when the vector dimension is unknown: Elasticsearch will be able to infer it from the data received. The only similarity function allowed using this approach is `cosine`. -- create the index manually using the Elasticsearch client - useful if the index needs advanced mapping or additional configuration which `initializeSchema` does not provide. +Alternatively you can opt-out the initialization and create the index manually using the Elasticsearch client, which can be useful if the index needs advanced mapping or additional configuration. NOTE: this is a breaking change! In earlier versions of Spring AI, this schema initialization happened by default. Please have a look at the list of <> for the vector store to learn about the default values and configuration options. +These properties can be also set by configuring the `ElasticsearchVectorStoreOptions` bean. Additionally, you will need a configured `EmbeddingModel` bean. Refer to the xref:api/embeddings.adoc#available-implementations[EmbeddingModel] section for more information. diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java index 68836a6c6a2..ab93fa4b511 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java @@ -112,11 +112,9 @@ public ElasticsearchVectorStore(ElasticsearchVectorStoreOptions options, RestCli @Override public void doAdd(List documents) { - // Elasticsearch can automatically create an index if it does not exist, but it - // will always use the default similarity function 'cosine' - if (!indexExists() && !options.getSimilarity().equals(SimilarityFunction.cosine)) { + if (!indexExists()) { throw new IllegalArgumentException( - "Index not found, cannot use similarity functions other than 'cosine' if the index has not been previously configured"); + "Index not found"); } BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); From ac488ded5255ab66af3243bd5375f5a57a62f584 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 9 Sep 2024 16:27:04 +0200 Subject: [PATCH 4/6] user header for observability --- .../springframework/ai/vectorstore/ElasticsearchVectorStore.java | 1 + 1 file changed, 1 insertion(+) diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java index ab93fa4b511..5380bfd408a 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java @@ -105,6 +105,7 @@ public ElasticsearchVectorStore(ElasticsearchVectorStoreOptions options, RestCli Objects.requireNonNull(embeddingModel, "EmbeddingModel must not be null"); this.elasticsearchClient = new ElasticsearchClient(new RestClientTransport(restClient, new JacksonJsonpMapper( new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)))); + elasticsearchClient.withTransportOptions(t -> t.addHeader("user-agent", "spring-ai")); this.embeddingModel = embeddingModel; this.options = options; this.filterExpressionConverter = new ElasticsearchAiSearchFilterExpressionConverter(); From 7f9520e31e35f804a14e2559383ca3336f007dae Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 9 Sep 2024 17:14:15 +0200 Subject: [PATCH 5/6] format --- .../ai/vectorstore/ElasticsearchVectorStore.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java index 5380bfd408a..87d59136d3f 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java @@ -114,8 +114,7 @@ public ElasticsearchVectorStore(ElasticsearchVectorStoreOptions options, RestCli @Override public void doAdd(List documents) { if (!indexExists()) { - throw new IllegalArgumentException( - "Index not found"); + throw new IllegalArgumentException("Index not found"); } BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); From 94640cfbb127d204309f20bf39239904a3990ffb Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 9 Sep 2024 17:32:04 +0200 Subject: [PATCH 6/6] same exception management for delete --- .../ai/vectorstore/ElasticsearchVectorStore.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java index 87d59136d3f..ce4630cd738 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java @@ -113,6 +113,8 @@ public ElasticsearchVectorStore(ElasticsearchVectorStoreOptions options, RestCli @Override public void doAdd(List documents) { + // For the index to be present, either it must be pre-created or set the + // initializeSchema to true. if (!indexExists()) { throw new IllegalArgumentException("Index not found"); } @@ -140,13 +142,13 @@ public void doAdd(List documents) { @Override public Optional doDelete(List idList) { BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); - // We call operations on BulkRequest.Builder only if the index exists. // For the index to be present, either it must be pre-created or set the // initializeSchema to true. - if (indexExists()) { - for (String id : idList) { - bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.options.getIndexName()).id(id))); - } + if (!indexExists()) { + throw new IllegalArgumentException("Index not found"); + } + for (String id : idList) { + bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.options.getIndexName()).id(id))); } return Optional.of(bulkRequest(bulkRequestBuilder.build()).errors()); }