Skip to content

Commit 798ab17

Browse files
apappascsilayaperumalg
authored andcommitted
feat(spring-ai-azure-store): Add support for configurable field names in Azure Vector Store, allowing users to work with existing Azure AI Search indexes that use custom field names instead of the hardcoded defaults (content, embedding, metadata).
## Motivation - Problem Azure AI Search indexes may use custom field names (e.g., `chunk_text` instead of `content`). Currently, `AzureVectorStore` hardcodes field names, making it incompatible with such indexes. Without this feature, users must use native Azure SDK directly (bypassing Spring AI abstractions) Following the pattern established by `PineconeVectorStore`, which already supports custom field names. Signed-off-by: Alexandros Pappas <[email protected]>
1 parent 556391b commit 798ab17

File tree

4 files changed

+152
-22
lines changed

4 files changed

+152
-22
lines changed

auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreAutoConfiguration.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
*
4545
* @author Christian Tzolov
4646
* @author Soby Chacko
47+
* @author Alexandros Pappas
4748
*/
4849
@AutoConfiguration
4950
@ConditionalOnClass({ EmbeddingModel.class, SearchIndexClient.class, AzureVectorStore.class })
@@ -102,6 +103,18 @@ public AzureVectorStore vectorStore(SearchIndexClient searchIndexClient, Embeddi
102103
builder.defaultSimilarityThreshold(properties.getDefaultSimilarityThreshold());
103104
}
104105

106+
if (properties.getContentFieldName() != null) {
107+
builder.contentFieldName(properties.getContentFieldName());
108+
}
109+
110+
if (properties.getEmbeddingFieldName() != null) {
111+
builder.embeddingFieldName(properties.getEmbeddingFieldName());
112+
}
113+
114+
if (properties.getMetadataFieldName() != null) {
115+
builder.metadataFieldName(properties.getMetadataFieldName());
116+
}
117+
105118
return builder.build();
106119
}
107120

auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreProperties.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
* Configuration properties for Azure Vector Store.
2525
*
2626
* @author Christian Tzolov
27+
* @author Alexandros Pappas
2728
*/
2829
@ConfigurationProperties(AzureVectorStoreProperties.CONFIG_PREFIX)
2930
public class AzureVectorStoreProperties extends CommonVectorStoreProperties {
@@ -42,6 +43,12 @@ public class AzureVectorStoreProperties extends CommonVectorStoreProperties {
4243

4344
private boolean useKeylessAuth;
4445

46+
private String contentFieldName;
47+
48+
private String embeddingFieldName;
49+
50+
private String metadataFieldName;
51+
4552
public String getUrl() {
4653
return this.url;
4754
}
@@ -90,4 +97,28 @@ public void setUseKeylessAuth(boolean useKeylessAuth) {
9097
this.useKeylessAuth = useKeylessAuth;
9198
}
9299

100+
public String getContentFieldName() {
101+
return this.contentFieldName;
102+
}
103+
104+
public void setContentFieldName(String contentFieldName) {
105+
this.contentFieldName = contentFieldName;
106+
}
107+
108+
public String getEmbeddingFieldName() {
109+
return this.embeddingFieldName;
110+
}
111+
112+
public void setEmbeddingFieldName(String embeddingFieldName) {
113+
this.embeddingFieldName = embeddingFieldName;
114+
}
115+
116+
public String getMetadataFieldName() {
117+
return this.metadataFieldName;
118+
}
119+
120+
public void setMetadataFieldName(String metadataFieldName) {
121+
this.metadataFieldName = metadataFieldName;
122+
}
123+
93124
}

vector-stores/spring-ai-azure-store/src/main/java/org/springframework/ai/vectorstore/azure/AzureVectorStore.java

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
* @author Thomas Vitale
7878
* @author Soby Chacko
7979
* @author Jinwoo Lee
80+
* @author Alexandros Pappas
8081
*/
8182
public class AzureVectorStore extends AbstractObservationVectorStore implements InitializingBean {
8283

@@ -119,6 +120,12 @@ public class AzureVectorStore extends AbstractObservationVectorStore implements
119120
*/
120121
private final List<MetadataField> filterMetadataFields;
121122

123+
private final String contentFieldName;
124+
125+
private final String embeddingFieldName;
126+
127+
private final String metadataFieldName;
128+
122129
@Nullable
123130
private SearchClient searchClient;
124131

@@ -145,6 +152,9 @@ protected AzureVectorStore(Builder builder) {
145152
this.defaultTopK = builder.defaultTopK;
146153
this.defaultSimilarityThreshold = builder.defaultSimilarityThreshold;
147154
this.indexName = builder.indexName;
155+
this.contentFieldName = builder.contentFieldName;
156+
this.embeddingFieldName = builder.embeddingFieldName;
157+
this.metadataFieldName = builder.metadataFieldName;
148158
this.filterExpressionConverter = new AzureAiSearchFilterExpressionConverter(this.filterMetadataFields);
149159
}
150160

@@ -166,9 +176,9 @@ public void doAdd(List<Document> documents) {
166176
final var searchDocuments = documents.stream().map(document -> {
167177
SearchDocument searchDocument = new SearchDocument();
168178
searchDocument.put(ID_FIELD_NAME, document.getId());
169-
searchDocument.put(EMBEDDING_FIELD_NAME, embeddings.get(documents.indexOf(document)));
170-
searchDocument.put(CONTENT_FIELD_NAME, document.getText());
171-
searchDocument.put(METADATA_FIELD_NAME, new JSONObject(document.getMetadata()).toJSONString());
179+
searchDocument.put(this.embeddingFieldName, embeddings.get(documents.indexOf(document)));
180+
searchDocument.put(this.contentFieldName, document.getText());
181+
searchDocument.put(this.metadataFieldName, new JSONObject(document.getMetadata()).toJSONString());
172182

173183
// Add the filterable metadata fields as top level fields, allowing filler
174184
// expressions on them.
@@ -223,7 +233,7 @@ public List<Document> doSimilaritySearch(SearchRequest request) {
223233
.setKNearestNeighborsCount(request.getTopK())
224234
// Set the fields to compare the vector against. This is a comma-delimited
225235
// list of field names.
226-
.setFields(EMBEDDING_FIELD_NAME);
236+
.setFields(this.embeddingFieldName);
227237

228238
var searchOptions = new SearchOptions()
229239
.setVectorSearchOptions(new VectorSearchOptions().setQueries(vectorQuery));
@@ -239,18 +249,19 @@ public List<Document> doSimilaritySearch(SearchRequest request) {
239249
.filter(result -> result.getScore() >= request.getSimilarityThreshold())
240250
.map(result -> {
241251

242-
final AzureSearchDocument entry = result.getDocument(AzureSearchDocument.class);
252+
SearchDocument document = result.getDocument(SearchDocument.class);
253+
254+
String id = document.get(ID_FIELD_NAME) != null ? document.get(ID_FIELD_NAME).toString() : "";
255+
String content = document.get(this.contentFieldName) != null
256+
? document.get(this.contentFieldName).toString() : "";
257+
String metadataJson = document.get(this.metadataFieldName) != null
258+
? document.get(this.metadataFieldName).toString() : "";
243259

244-
Map<String, Object> metadata = parseMetadataToMutable(entry.metadata());
260+
Map<String, Object> metadata = parseMetadataToMutable(metadataJson);
245261

246262
metadata.put(DocumentMetadata.DISTANCE.value(), 1.0 - result.getScore());
247263

248-
return Document.builder()
249-
.id(entry.id())
250-
.text(entry.content)
251-
.metadata(metadata)
252-
.score(result.getScore())
253-
.build();
264+
return Document.builder().id(id).text(content).metadata(metadata).score(result.getScore()).build();
254265
})
255266
.collect(Collectors.toList());
256267
}
@@ -270,15 +281,15 @@ public void afterPropertiesSet() throws Exception {
270281
fields.add(new SearchField(ID_FIELD_NAME, SearchFieldDataType.STRING).setKey(true)
271282
.setFilterable(true)
272283
.setSortable(true));
273-
fields.add(new SearchField(EMBEDDING_FIELD_NAME, SearchFieldDataType.collection(SearchFieldDataType.SINGLE))
284+
fields.add(new SearchField(this.embeddingFieldName, SearchFieldDataType.collection(SearchFieldDataType.SINGLE))
274285
.setSearchable(true)
275286
.setHidden(false)
276287
.setVectorSearchDimensions(dimensions)
277288
// This must match a vector search configuration name.
278289
.setVectorSearchProfileName(SPRING_AI_VECTOR_PROFILE));
279-
fields.add(new SearchField(CONTENT_FIELD_NAME, SearchFieldDataType.STRING).setSearchable(true)
290+
fields.add(new SearchField(this.contentFieldName, SearchFieldDataType.STRING).setSearchable(true)
280291
.setFilterable(true));
281-
fields.add(new SearchField(METADATA_FIELD_NAME, SearchFieldDataType.STRING).setSearchable(true)
292+
fields.add(new SearchField(this.metadataFieldName, SearchFieldDataType.STRING).setSearchable(true)
282293
.setFilterable(true));
283294

284295
for (MetadataField filterableMetadataField : this.filterMetadataFields) {
@@ -367,13 +378,6 @@ public static MetadataField date(String name) {
367378

368379
}
369380

370-
/**
371-
* Internal data structure for retrieving and storing documents.
372-
*/
373-
private record AzureSearchDocument(String id, String content, List<Float> embedding, String metadata) {
374-
375-
}
376-
377381
/**
378382
* Builder class for creating {@link AzureVectorStore} instances.
379383
* <p>
@@ -395,6 +399,12 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {
395399

396400
private String indexName = DEFAULT_INDEX_NAME;
397401

402+
private String contentFieldName = CONTENT_FIELD_NAME;
403+
404+
private String embeddingFieldName = EMBEDDING_FIELD_NAME;
405+
406+
private String metadataFieldName = METADATA_FIELD_NAME;
407+
398408
private Builder(SearchIndexClient searchIndexClient, EmbeddingModel embeddingModel) {
399409
super(embeddingModel);
400410
Assert.notNull(searchIndexClient, "SearchIndexClient must not be null");
@@ -460,6 +470,38 @@ public Builder defaultSimilarityThreshold(Double defaultSimilarityThreshold) {
460470
return this;
461471
}
462472

473+
/**
474+
* Sets the content field name in the Azure Search index.
475+
* @param contentFieldName the name of the content field (defaults to "content")
476+
* @return the builder instance
477+
*/
478+
public Builder contentFieldName(@Nullable String contentFieldName) {
479+
this.contentFieldName = contentFieldName != null ? contentFieldName : CONTENT_FIELD_NAME;
480+
return this;
481+
}
482+
483+
/**
484+
* Sets the embedding field name in the Azure Search index.
485+
* @param embeddingFieldName the name of the embedding field (defaults to
486+
* "embedding")
487+
* @return the builder instance
488+
*/
489+
public Builder embeddingFieldName(@Nullable String embeddingFieldName) {
490+
this.embeddingFieldName = embeddingFieldName != null ? embeddingFieldName : EMBEDDING_FIELD_NAME;
491+
return this;
492+
}
493+
494+
/**
495+
* Sets the metadata field name in the Azure Search index.
496+
* @param metadataFieldName the name of the metadata field (defaults to
497+
* "metadata")
498+
* @return the builder instance
499+
*/
500+
public Builder metadataFieldName(@Nullable String metadataFieldName) {
501+
this.metadataFieldName = metadataFieldName != null ? metadataFieldName : METADATA_FIELD_NAME;
502+
return this;
503+
}
504+
463505
@Override
464506
public AzureVectorStore build() {
465507
return new AzureVectorStore(this);

vector-stores/spring-ai-azure-store/src/test/java/org/springframework/ai/vectorstore/azure/AzureVectorStoreIT.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
/**
5757
* @author Christian Tzolov
5858
* @author Thomas Vitale
59+
* @author Alexandros Pappas
5960
*/
6061
@EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_API_KEY", matches = ".+")
6162
@EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_ENDPOINT", matches = ".+")
@@ -329,6 +330,49 @@ void getNativeClientTest() {
329330
});
330331
}
331332

333+
@Test
334+
@EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_INDEX_NAME", matches = ".+")
335+
void customFieldNamesTest() throws Exception {
336+
// Test with existing production index that uses custom field names
337+
String existingIndexName = System.getenv("AZURE_AI_SEARCH_INDEX_NAME");
338+
String endpoint = System.getenv("AZURE_AI_SEARCH_ENDPOINT");
339+
String apiKey = System.getenv("AZURE_AI_SEARCH_API_KEY");
340+
341+
SearchIndexClient searchIndexClient = new SearchIndexClientBuilder().endpoint(endpoint)
342+
.credential(new AzureKeyCredential(apiKey))
343+
.buildClient();
344+
345+
TransformersEmbeddingModel embeddingModel = new TransformersEmbeddingModel();
346+
embeddingModel.afterPropertiesSet();
347+
348+
// Create vector store with custom field names matching the production index
349+
// Index uses: chunk_text (content), embedding, metadata
350+
VectorStore vectorStore = AzureVectorStore.builder(searchIndexClient, embeddingModel)
351+
.indexName(existingIndexName)
352+
.initializeSchema(false) // Don't create - use existing index
353+
.contentFieldName("chunk_text") // Custom field name!
354+
.embeddingFieldName("embedding") // Standard name
355+
.metadataFieldName("metadata") // Standard name
356+
.build();
357+
358+
// Trigger initialization
359+
((AzureVectorStore) vectorStore).afterPropertiesSet();
360+
361+
// Search the existing index
362+
List<Document> results = vectorStore
363+
.similaritySearch(SearchRequest.builder().query("Azure Databricks").topK(3).build());
364+
365+
// Verify we got results
366+
assertThat(results).isNotEmpty();
367+
assertThat(results.size()).isLessThanOrEqualTo(3);
368+
369+
// Verify documents have content (from chunk_text field)
370+
Document firstDoc = results.get(0);
371+
assertThat(firstDoc.getId()).isNotNull();
372+
assertThat(firstDoc.getText()).isNotEmpty();
373+
assertThat(firstDoc.getScore()).isNotNull();
374+
}
375+
332376
@SpringBootConfiguration
333377
@EnableAutoConfiguration
334378
public static class Config {

0 commit comments

Comments
 (0)