Skip to content

Commit 02cbd46

Browse files
authored
Merge pull request #271 from milderhc/hybrid-search
Add hybridSearchAsync to AzureAISearchVectorStoreRecordCollection
2 parents 2c43b23 + 021411f commit 02cbd46

File tree

1 file changed

+106
-23
lines changed

1 file changed

+106
-23
lines changed

data/semantickernel-data-azureaisearch/src/main/java/com/microsoft/semantickernel/data/azureaisearch/AzureAISearchVectorStoreRecordCollection.java

Lines changed: 106 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.azure.search.documents.indexes.models.VectorSearchProfile;
1111
import com.azure.search.documents.models.IndexDocumentsResult;
1212
import com.azure.search.documents.models.IndexingResult;
13+
import com.azure.search.documents.models.ScoringParameter;
1314
import com.azure.search.documents.models.SearchOptions;
1415
import com.azure.search.documents.models.VectorQuery;
1516
import com.azure.search.documents.models.VectorizableTextQuery;
@@ -39,6 +40,7 @@
3940
import java.util.HashSet;
4041
import java.util.Iterator;
4142
import java.util.List;
43+
import java.util.Vector;
4244
import java.util.stream.Collectors;
4345
import javax.annotation.Nonnull;
4446
import reactor.core.publisher.Flux;
@@ -288,37 +290,40 @@ public Mono<Void> deleteBatchAsync(List<String> keys, DeleteRecordOptions option
288290
}).collect(Collectors.toList())).then();
289291
}
290292

291-
private Mono<VectorSearchResults<Record>> searchAndMapAsync(
292-
List<VectorQuery> vectorQueries, VectorSearchOptions options,
293-
GetRecordOptions getRecordOptions) {
294-
293+
private SearchOptions configureVectorSearchOptions(
294+
List<VectorQuery> vectorQueries, VectorSearchOptions options) {
295295
String filter = AzureAISearchVectorStoreCollectionSearchMapping.getInstance()
296296
.getFilter(options.getVectorSearchFilter(), recordDefinition);
297297

298298
SearchOptions searchOptions = new SearchOptions()
299299
.setFilter(filter)
300300
.setTop(options.getTop())
301301
.setSkip(options.getSkip())
302-
.setScoringParameters()
303302
.setVectorSearchOptions(new com.azure.search.documents.models.VectorSearchOptions()
304303
.setQueries(vectorQueries));
305304

306305
if (!options.isIncludeVectors()) {
307306
searchOptions.setSelect(nonVectorFields.toArray(new String[0]));
308307
}
309308

309+
return searchOptions;
310+
}
311+
312+
private Mono<VectorSearchResults<Record>> searchAndMapAsync(String query,
313+
SearchOptions searchOptions,
314+
boolean includeVectors) {
310315
VectorStoreRecordMapper<Record, SearchDocument> mapper = this.options
311316
.getVectorStoreRecordMapper();
312317

313-
return this.searchAsyncClient.search(null, searchOptions)
318+
return this.searchAsyncClient.search(query, searchOptions)
314319
.flatMap(response -> {
315320
Record record;
316321

317322
// Use custom mapper if available
318323
if (mapper != null && mapper.getStorageModelToRecordMapper() != null) {
319324
record = mapper
320325
.mapStorageModelToRecord(response.getDocument(SearchDocument.class),
321-
getRecordOptions);
326+
new GetRecordOptions(includeVectors));
322327
} else {
323328
record = response.getDocument(this.options.getRecordClass());
324329
}
@@ -329,7 +334,9 @@ record = response.getDocument(this.options.getRecordClass());
329334
}
330335

331336
/**
332-
* Vectorizable text search. This method searches for records that are similar to the given text.
337+
* Vectorizable text search. This method searches for records that are similar to the given text after vectorization.
338+
* <p>
339+
* Vectorizer configuration must be set up in the Azure AI Search index.
333340
*
334341
* @param searchText The text to search with.
335342
* @param options The options to use for the search.
@@ -353,8 +360,9 @@ public Mono<VectorSearchResults<Record>> searchAsync(String searchText,
353360
: firstVectorFieldName).getEffectiveStorageName())
354361
.setKNearestNeighborsCount(options.getTop()));
355362

356-
return searchAndMapAsync(vectorQueries, options,
357-
new GetRecordOptions(options.isIncludeVectors()));
363+
return searchAndMapAsync(null,
364+
configureVectorSearchOptions(vectorQueries, options),
365+
options.isIncludeVectors());
358366
}
359367

360368
/**
@@ -367,22 +375,97 @@ public Mono<VectorSearchResults<Record>> searchAsync(String searchText,
367375
@Override
368376
public Mono<VectorSearchResults<Record>> searchAsync(List<Float> vector,
369377
VectorSearchOptions options) {
370-
if (firstVectorFieldName == null) {
371-
throw new SKException("No vector fields defined. Cannot perform vector search");
372-
}
378+
return hybridSearchAsync(null, vector, options, null);
379+
}
373380

374-
if (options == null) {
375-
options = VectorSearchOptions.createDefault(firstVectorFieldName);
381+
/**
382+
* Hybrid search. This method searches for records that are similar to the given text and vector.
383+
*
384+
* @param searchText The text to search with.
385+
* If null, only vector search is performed.
386+
* @param vector The vector to search with.
387+
* If null, only full text search is performed.
388+
* @param options The vector search options used for the search.
389+
* @param additionalSearchOptions AzureAI search additional options.
390+
* If Filter, Top, Skip, Select or VectorSearchOptions are not null, they will be used instead of the default options.
391+
* <p>
392+
* If null, default search options are used.
393+
*/
394+
public Mono<VectorSearchResults<Record>> hybridSearchAsync(String searchText,
395+
List<Float> vector, VectorSearchOptions options, SearchOptions additionalSearchOptions) {
396+
SearchOptions searchOptions = new SearchOptions();
397+
398+
if (vector != null) {
399+
if (firstVectorFieldName == null) {
400+
throw new SKException("No vector fields defined. Cannot perform vector search");
401+
}
402+
403+
if (options == null) {
404+
options = VectorSearchOptions.createDefault(firstVectorFieldName);
405+
}
406+
407+
List<VectorQuery> vectorQueries = new ArrayList<>();
408+
vectorQueries.add(new VectorizedQuery(vector)
409+
.setFields(recordDefinition.getField(options.getVectorFieldName() != null
410+
? options.getVectorFieldName()
411+
: firstVectorFieldName).getEffectiveStorageName())
412+
.setKNearestNeighborsCount(options.getTop()));
413+
414+
// Configure default vector search options
415+
searchOptions = configureVectorSearchOptions(vectorQueries, options);
376416
}
377417

378-
List<VectorQuery> vectorQueries = new ArrayList<>();
379-
vectorQueries.add(new VectorizedQuery(vector)
380-
.setFields(recordDefinition.getField(options.getVectorFieldName() != null
381-
? options.getVectorFieldName()
382-
: firstVectorFieldName).getEffectiveStorageName())
383-
.setKNearestNeighborsCount(options.getTop()));
418+
// Configure additional search options
419+
if (additionalSearchOptions != null) {
420+
searchOptions
421+
.setQueryType(additionalSearchOptions.getQueryType())
422+
.setSemanticSearchOptions(additionalSearchOptions.getSemanticSearchOptions())
423+
.setFacets(additionalSearchOptions.getFacets() != null
424+
? additionalSearchOptions.getFacets().toArray(new String[0])
425+
: null)
426+
.setHighlightFields(additionalSearchOptions.getHighlightFields() != null
427+
? additionalSearchOptions.getHighlightFields().toArray(new String[0])
428+
: null)
429+
.setHighlightPreTag(additionalSearchOptions.getHighlightPreTag())
430+
.setHighlightPostTag(additionalSearchOptions.getHighlightPostTag())
431+
.setMinimumCoverage(additionalSearchOptions.getMinimumCoverage())
432+
.setOrderBy(additionalSearchOptions.getOrderBy() != null
433+
? additionalSearchOptions.getOrderBy().toArray(new String[0])
434+
: null)
435+
.setScoringParameters(additionalSearchOptions.getScoringParameters() != null
436+
? additionalSearchOptions.getScoringParameters().stream()
437+
.map(s -> new ScoringParameter(s.getName(), s.getValues()))
438+
.toArray(ScoringParameter[]::new)
439+
: null)
440+
.setScoringProfile(additionalSearchOptions.getScoringProfile())
441+
.setSearchFields(additionalSearchOptions.getSearchFields() != null
442+
? additionalSearchOptions.getSearchFields().toArray(new String[0])
443+
: null)
444+
.setIncludeTotalCount(additionalSearchOptions.isTotalCountIncluded())
445+
.setSearchMode(additionalSearchOptions.getSearchMode())
446+
.setScoringStatistics(additionalSearchOptions.getScoringStatistics())
447+
.setSessionId(additionalSearchOptions.getSessionId());
448+
449+
// Override default vector options if provided
450+
if (additionalSearchOptions.getFilter() != null) {
451+
searchOptions.setFilter(additionalSearchOptions.getFilter());
452+
}
453+
if (additionalSearchOptions.getTop() != null) {
454+
searchOptions.setTop(additionalSearchOptions.getTop());
455+
}
456+
if (additionalSearchOptions.getSkip() != null) {
457+
searchOptions.setSkip(additionalSearchOptions.getSkip());
458+
}
459+
if (additionalSearchOptions.getVectorSearchOptions() != null) {
460+
searchOptions
461+
.setVectorSearchOptions(additionalSearchOptions.getVectorSearchOptions());
462+
}
463+
if (additionalSearchOptions.getSelect() != null) {
464+
searchOptions.setSelect(additionalSearchOptions.getSelect().toArray(new String[0]));
465+
}
466+
}
384467

385-
return searchAndMapAsync(vectorQueries, options,
386-
new GetRecordOptions(options.isIncludeVectors()));
468+
return searchAndMapAsync(searchText, searchOptions,
469+
options != null && options.isIncludeVectors());
387470
}
388471
}

0 commit comments

Comments
 (0)