Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@
* }</pre>
*
* <p>
* AWS OpenSearch Serverless usage example:
* </p>
* <pre>{@code
* OpenSearchVectorStore vectorStore = OpenSearchVectorStore.builder(openSearchClient, embeddingModel)
* .initializeSchema(true)
* .manageDocumentIds(false) // Required for AWS OpenSearch Serverless
* .build();
* }</pre>
*
* <p>
* Advanced configuration example:
* </p>
* <pre>{@code
Expand Down Expand Up @@ -170,6 +180,8 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem

private String similarityFunction;

private final boolean manageDocumentIds;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be true by default to let Spring AI managing the documentIDs and for backwards compatibility?.


/**
* Creates a new OpenSearchVectorStore using the builder pattern.
* @param builder The configured builder instance
Expand All @@ -187,6 +199,7 @@ protected OpenSearchVectorStore(Builder builder) {
// https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces
this.similarityFunction = builder.similarityFunction;
this.initializeSchema = builder.initializeSchema;
this.manageDocumentIds = builder.manageDocumentIds;
}

/**
Expand All @@ -210,14 +223,27 @@ public void doAdd(List<Document> documents) {
for (Document document : documents) {
OpenSearchDocument openSearchDocument = new OpenSearchDocument(document.getId(), document.getText(),
document.getMetadata(), embedding.get(documents.indexOf(document)));
bulkRequestBuilder.operations(op -> op
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));

// Conditionally set document ID based on manageDocumentIds flag
if (this.manageDocumentIds) {
bulkRequestBuilder.operations(op -> op
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
}
else {
bulkRequestBuilder
.operations(op -> op.index(idx -> idx.index(this.index).document(openSearchDocument)));
}
}
bulkRequest(bulkRequestBuilder.build());
}

@Override
public void doDelete(List<String> idList) {
if (!this.manageDocumentIds) {
logger.warn("Document ID management is disabled. Delete operations may not work as expected "
+ "since document IDs are auto-generated by OpenSearch. Consider using filter-based deletion instead.");
}

BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
for (String id : idList) {
bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id)));
Expand Down Expand Up @@ -417,6 +443,8 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {

private String similarityFunction = COSINE_SIMILARITY_FUNCTION;

private boolean manageDocumentIds = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here as well for the default value to be true?


/**
* Sets the OpenSearch client.
* @param openSearchClient The OpenSearch client to use
Expand Down Expand Up @@ -488,6 +516,28 @@ public Builder similarityFunction(String similarityFunction) {
return this;
}

/**
* Sets whether to manage document IDs during indexing operations.
* <p>
* When set to {@code true} (default), document IDs will be explicitly set during
* indexing operations. When set to {@code false}, OpenSearch will auto-generate
* document IDs, which is required for AWS OpenSearch Serverless vector search
* collections.
* </p>
* <p>
* Note: When document ID management is disabled, the {@link #doDelete(List)}
* method may not work as expected since document IDs are auto-generated by
* OpenSearch.
* </p>
* @param manageDocumentIds true to manage document IDs (default), false to let
* OpenSearch auto-generate IDs
* @return The builder instance
*/
public Builder manageDocumentIds(boolean manageDocumentIds) {
this.manageDocumentIds = manageDocumentIds;
return this;
}

/**
* Builds a new OpenSearchVectorStore instance with the configured properties.
* @return A new OpenSearchVectorStore instance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,161 @@ void getNativeClientTest() {
});
}

@ParameterizedTest(name = "manageDocumentIds={0}")
@ValueSource(booleans = { true, false })
void testManageDocumentIdsSetting(boolean manageDocumentIds) {
getContextRunner().run(context -> {
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);

// Create a new vector store with specific manageDocumentIds setting
OpenSearchVectorStore testVectorStore = OpenSearchVectorStore
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
context.getBean(EmbeddingModel.class))
.manageDocumentIds(manageDocumentIds)
.index("test_manage_document_ids_" + manageDocumentIds)
.initializeSchema(true)
.build();

// Test documents
List<Document> testDocuments = List.of(new Document("doc1", "Test content 1", Map.of("key1", "value1")),
new Document("doc2", "Test content 2", Map.of("key2", "value2")));

// Add documents
testVectorStore.add(testDocuments);

// Wait for indexing
Awaitility.await()
.until(() -> testVectorStore
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build()), hasSize(2));

// Search and verify results
List<Document> results = testVectorStore
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build());

assertThat(results).hasSize(2);

// Verify document content and metadata are preserved
assertThat(results.stream().map(Document::getText).toList()).containsExactlyInAnyOrder("Test content 1",
"Test content 2");

assertThat(results.stream().map(doc -> doc.getMetadata().get("key1")).toList()).contains("value1");
assertThat(results.stream().map(doc -> doc.getMetadata().get("key2")).toList()).contains("value2");

// Clean up
testVectorStore.delete(testDocuments.stream().map(Document::getId).toList());
});
}

@Test
void testManageDocumentIdsFalseForAWSOpenSearchServerless() {
getContextRunner().run(context -> {
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);

// Create vector store with manageDocumentIds=false (AWS OpenSearch Serverless
// mode)
OpenSearchVectorStore awsCompatibleVectorStore = OpenSearchVectorStore
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
context.getBean(EmbeddingModel.class))
.manageDocumentIds(false)
.index("test_aws_serverless_compatible")
.initializeSchema(true)
.build();

// Test documents with IDs (these should be ignored when
// manageDocumentIds=false)
List<Document> testDocuments = List.of(
new Document("custom-id-1", "AWS Serverless content 1", Map.of("env", "aws-serverless")),
new Document("custom-id-2", "AWS Serverless content 2", Map.of("env", "aws-serverless")));

// Add documents - should work without explicit document ID errors
awsCompatibleVectorStore.add(testDocuments);

// Wait for indexing
Awaitility.await()
.until(() -> awsCompatibleVectorStore
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build()), hasSize(2));

// Search and verify results
List<Document> results = awsCompatibleVectorStore
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build());

assertThat(results).hasSize(2);

// Verify content is preserved
assertThat(results.stream().map(Document::getText).toList())
.containsExactlyInAnyOrder("AWS Serverless content 1", "AWS Serverless content 2");

// Verify metadata is preserved
assertThat(results.stream().map(doc -> doc.getMetadata().get("env")).toList())
.containsOnly("aws-serverless");

// Clean up
awsCompatibleVectorStore.delete(List.of("_all"));
});
}

@Test
void testManageDocumentIdsTrueWithExplicitIds() {
getContextRunner().run(context -> {
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);

// Create vector store with manageDocumentIds=true (default behavior)
OpenSearchVectorStore explicitIdVectorStore = OpenSearchVectorStore
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
context.getBean(EmbeddingModel.class))
.manageDocumentIds(true)
.index("test_explicit_ids")
.initializeSchema(true)
.build();

// Test documents with specific IDs
List<Document> testDocuments = List.of(
new Document("explicit-id-1", "Explicit ID content 1", Map.of("type", "explicit")),
new Document("explicit-id-2", "Explicit ID content 2", Map.of("type", "explicit")));

// Add documents
explicitIdVectorStore.add(testDocuments);

// Wait for indexing
Awaitility.await()
.until(() -> explicitIdVectorStore
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(2));

// Search and verify results
List<Document> results = explicitIdVectorStore
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());

assertThat(results).hasSize(2);

// Verify document IDs are preserved
assertThat(results.stream().map(Document::getId).toList()).containsExactlyInAnyOrder("explicit-id-1",
"explicit-id-2");

// Verify content and metadata
assertThat(results.stream().map(Document::getText).toList())
.containsExactlyInAnyOrder("Explicit ID content 1", "Explicit ID content 2");

assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).toList()).containsOnly("explicit");

// Test deletion by specific IDs
explicitIdVectorStore.delete(List.of("explicit-id-1"));

Awaitility.await()
.until(() -> explicitIdVectorStore
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(1));

// Verify only one document remains
results = explicitIdVectorStore
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());

assertThat(results).hasSize(1);
assertThat(results.get(0).getId()).isEqualTo("explicit-id-2");

// Clean up
explicitIdVectorStore.delete(List.of("explicit-id-2"));
});
}

@SpringBootConfiguration
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
public static class TestApplication {
Expand Down
Loading