Skip to content

Commit 946ef03

Browse files
author
Milder Hernandez Cagua
committed
Add Text Search API and VectorStoreTextSearch implementation
1 parent c4bc8c6 commit 946ef03

File tree

39 files changed

+1070
-184
lines changed

39 files changed

+1070
-184
lines changed

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -416,25 +416,25 @@ public void exactSearch(QueryProvider provider, String embeddingName) {
416416

417417
VectorSearchOptions options = VectorSearchOptions.builder()
418418
.withVectorFieldName(embeddingName)
419-
.withLimit(3)
419+
.withTop(3)
420420
.build();
421421

422422
// Embeddings similar to the third hotel
423423
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
424-
options).block();
424+
options).block().getResults();
425425
assertNotNull(results);
426426
assertEquals(3, results.size());
427427
// The third hotel should be the most similar
428428
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
429429

430430
options = VectorSearchOptions.builder()
431431
.withVectorFieldName(embeddingName)
432-
.withOffset(1)
433-
.withLimit(-100)
432+
.withSkip(1)
433+
.withTop(-100)
434434
.build();
435435

436436
// Skip the first result
437-
results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block();
437+
results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block().getResults();
438438
assertNotNull(results);
439439
assertEquals(1, results.size());
440440
// The first hotel should be the most similar
@@ -453,12 +453,12 @@ public void approximateSearch(QueryProvider provider) {
453453

454454
VectorSearchOptions options = VectorSearchOptions.builder()
455455
.withVectorFieldName("indexedEuclidean")
456-
.withLimit(5)
456+
.withTop(5)
457457
.build();
458458

459459
// Embeddings similar to the third hotel
460460
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
461-
options).block();
461+
options).block().getResults();
462462
assertNotNull(results);
463463
assertEquals(5, results.size());
464464
// The third hotel should be the most similar
@@ -477,15 +477,15 @@ public void searchWithFilterEqualToFilter(QueryProvider provider, String embeddi
477477

478478
VectorSearchOptions options = VectorSearchOptions.builder()
479479
.withVectorFieldName(embeddingName)
480-
.withLimit(3)
480+
.withTop(3)
481481
.withVectorSearchFilter(
482482
VectorSearchFilter.builder()
483483
.equalTo("rating", 4.0).build())
484484
.build();
485485

486486
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
487487
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
488-
options).block();
488+
options).block().getResults();
489489
assertNotNull(results);
490490
assertEquals(3, results.size());
491491
// The first hotel should be the most similar
@@ -504,15 +504,15 @@ public void searchWithAnyTagEqualToFilter(QueryProvider provider, String embeddi
504504

505505
VectorSearchOptions options = VectorSearchOptions.builder()
506506
.withVectorFieldName(embeddingName)
507-
.withLimit(3)
507+
.withTop(3)
508508
.withVectorSearchFilter(
509509
VectorSearchFilter.builder()
510510
.anyTagEqualTo("tags", "city").build())
511511
.build();
512512

513513
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
514514
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
515-
options).block();
515+
options).block().getResults();
516516
assertNotNull(results);
517517
assertEquals(3, results.size());
518518
// The first hotel should be the most similar
@@ -530,7 +530,7 @@ public void postgresSearchIncludeAndNotIncludeVectors() {
530530
recordCollection.upsertBatchAsync(hotels, null).block();
531531

532532
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
533-
null).block();
533+
null).block().getResults();
534534
assertNotNull(results);
535535
assertEquals(3, results.size());
536536
// The third hotel should be the most similar
@@ -541,7 +541,7 @@ public void postgresSearchIncludeAndNotIncludeVectors() {
541541
.withIncludeVectors(true)
542542
.build();
543543

544-
results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block();
544+
results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block().getResults();
545545
assertNotNull(results);
546546
assertEquals(3, results.size());
547547
// The third hotel should be the most similar

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,9 @@ public void search(RecordCollectionOptions options, String embeddingName) {
387387
.build();
388388

389389
// Embeddings similar to the third hotel
390-
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block();
390+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults();
391391
assertNotNull(results);
392-
assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage);
392+
assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage);
393393
// The third hotel should be the most similar
394394
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage);
395395
// Score should be different than zero
@@ -412,9 +412,9 @@ public void searchWithVectors(RecordCollectionOptions options, String embeddingN
412412
.build();
413413

414414
// Embeddings similar to the third hotel
415-
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block();
415+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults();
416416
assertNotNull(results);
417-
assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage);
417+
assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage);
418418
// The third hotel should be the most similar
419419
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage);
420420
assertNotNull(results.get(0).getRecord().getEuclidean());
@@ -431,12 +431,12 @@ public void searchWithOffSet(RecordCollectionOptions options, String embeddingNa
431431

432432
VectorSearchOptions searchOptions = VectorSearchOptions.builder()
433433
.withVectorFieldName(embeddingName)
434-
.withOffset(1)
435-
.withLimit(4)
434+
.withSkip(1)
435+
.withTop(4)
436436
.build();
437437

438438
// Embeddings similar to the third hotel
439-
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block();
439+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults();
440440
assertNotNull(results);
441441
assertEquals(4, results.size(), indexingFailureMessage);
442442
// The first hotel should be the most similar
@@ -454,15 +454,15 @@ public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollecti
454454

455455
VectorSearchOptions options = VectorSearchOptions.builder()
456456
.withVectorFieldName(embeddingName)
457-
.withLimit(3)
457+
.withTop(3)
458458
.withVectorSearchFilter(
459459
VectorSearchFilter.builder()
460460
.equalTo("rating", 4.0).build())
461461
.build();
462462

463463
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
464464
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
465-
options).block();
465+
options).block().getResults();
466466
assertNotNull(results);
467467
assertEquals(3, results.size());
468468
// The first hotel should be the most similar

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisJsonVectorStoreRecordCollectionTest.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,9 @@ public void search(RecordCollectionOptions options, String embeddingName) {
387387
.build();
388388

389389
// Embeddings similar to the third hotel
390-
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block();
390+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults();
391391
assertNotNull(results);
392-
assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage);
392+
assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage);
393393
// The third hotel should be the most similar
394394
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage);
395395
// Score should be different than zero
@@ -412,9 +412,9 @@ public void searchWithVectors(RecordCollectionOptions options, String embeddingN
412412
.build();
413413

414414
// Embeddings similar to the third hotel
415-
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block();
415+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults();
416416
assertNotNull(results);
417-
assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage);
417+
assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage);
418418
// The third hotel should be the most similar
419419
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage);
420420
assertNotNull(results.get(0).getRecord().getEuclidean());
@@ -431,12 +431,12 @@ public void searchWithOffSet(RecordCollectionOptions options, String embeddingNa
431431

432432
VectorSearchOptions searchOptions = VectorSearchOptions.builder()
433433
.withVectorFieldName(embeddingName)
434-
.withOffset(1)
435-
.withLimit(4)
434+
.withSkip(1)
435+
.withTop(4)
436436
.build();
437437

438438
// Embeddings similar to the third hotel
439-
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block();
439+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults();
440440
assertNotNull(results);
441441
assertEquals(4, results.size(), indexingFailureMessage);
442442
// The first hotel should be the most similar
@@ -454,15 +454,15 @@ public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollecti
454454

455455
VectorSearchOptions options = VectorSearchOptions.builder()
456456
.withVectorFieldName(embeddingName)
457-
.withLimit(3)
457+
.withTop(3)
458458
.withVectorSearchFilter(
459459
VectorSearchFilter.builder()
460460
.equalTo("rating", 4.0).build())
461461
.build();
462462

463463
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
464464
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
465-
options).block();
465+
options).block().getResults();
466466
assertNotNull(results);
467467
assertEquals(3, results.size());
468468
// The first hotel should be the most similar

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
import com.azure.core.credential.KeyCredential;
88
import com.fasterxml.jackson.annotation.JsonProperty;
99
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
10+
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
1011
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
1112
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
1213
import com.microsoft.semantickernel.data.VolatileVectorStore;
1314
import com.microsoft.semantickernel.data.VolatileVectorStoreRecordCollectionOptions;
15+
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearch;
16+
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearchOptions;
1417
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
1518
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
1619
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
@@ -38,12 +41,12 @@ public class InMemoryVolatileVectorStore {
3841
private static final int EMBEDDING_DIMENSIONS = 1536;
3942

4043
static class GitHubFile {
41-
@JsonProperty("fileId") // Set a different name for the storage field if needed
42-
@VectorStoreRecordKey()
44+
@VectorStoreRecordKey
4345
private final String id;
44-
@VectorStoreRecordData()
46+
@VectorStoreRecordData
4547
private final String description;
4648
@VectorStoreRecordData
49+
@TextSearchResultValue
4750
private final String link;
4851
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
4952
private final List<Float> embedding;
@@ -125,27 +128,24 @@ public static void inMemoryStoreAndSearch(
125128
.then(storeData(collection, embeddingGeneration, sampleData()))
126129
.block();
127130

131+
// Build a vectorized search
132+
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
133+
.withVectorizedSearch(collection)
134+
.withTextEmbeddingGenerationService(embeddingGeneration)
135+
.build();
136+
128137
// Search for results
129138
// Volatile store executes an exhaustive search, for approximate search use Azure AI Search, Redis or JDBC with PostgreSQL
130-
var results = search("How to get started", collection, embeddingGeneration).block();
139+
String query = "How to get started?";
140+
var results = vectorStoreTextSearch.searchAsync(query, null)
141+
.block();
131142

132-
if (results == null || results.isEmpty()) {
143+
if (results == null || results.getTotalCount() == 0) {
133144
System.out.println("No search results found.");
134145
return;
135146
}
136-
var searchResult = results.get(0);
137-
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
138-
searchResult.getScore(), searchResult.getRecord().link,
139-
searchResult.getRecord().description);
140-
}
141147

142-
private static Mono<List<VectorSearchResult<GitHubFile>>> search(
143-
String searchText,
144-
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
145-
OpenAITextEmbeddingGenerationService embeddingGeneration) {
146-
// Generate embeddings for the search text and search for the closest records
147-
return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText))
148-
.flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null));
148+
System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
149149
}
150150

151151
private static Mono<List<String>> storeData(

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStore;
1616
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreOptions;
1717
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollectionOptions;
18+
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
1819
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
20+
import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch;
1921
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
22+
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearch;
2023
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
2124
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
2225
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
@@ -51,13 +54,12 @@ public class VectorStoreWithAzureAISearch {
5154
private static final int EMBEDDING_DIMENSIONS = 1536;
5255

5356
static class GitHubFile {
54-
55-
@JsonProperty("fileId") // Set a different name for the storage field if needed
5657
@VectorStoreRecordKey()
5758
private final String id;
5859
@VectorStoreRecordData()
5960
private final String description;
6061
@VectorStoreRecordData
62+
@TextSearchResultValue
6163
private final String link;
6264
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_SIMILARITY)
6365
private final List<Float> embedding;
@@ -111,7 +113,6 @@ public static void main(String[] args) {
111113
var searchClient = new SearchIndexClientBuilder()
112114
.endpoint(AZURE_AI_SEARCH_ENDPOINT)
113115
.credential(new AzureKeyCredential(AZURE_AISEARCH_KEY))
114-
.clientOptions(clientOptions())
115116
.buildAsyncClient();
116117

117118
storeAndSearch(searchClient, embeddingGeneration);
@@ -141,27 +142,24 @@ public static void storeAndSearch(
141142
.then(storeData(collection, embeddingGeneration, sampleData()))
142143
.block();
143144

145+
// Build a vectorized search
146+
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
147+
.withVectorizedSearch(collection)
148+
.withTextEmbeddingGenerationService(embeddingGeneration)
149+
.build();
150+
144151
// Search for results
145152
// Might need to wait for the data to be indexed
146-
var results = search("How to get started", collection, embeddingGeneration).block();
153+
String query = "How to get started?";
154+
var results = vectorStoreTextSearch.searchAsync(query, null)
155+
.block();
147156

148-
if (results == null || results.isEmpty()) {
157+
if (results == null || results.getTotalCount() == 0) {
149158
System.out.println("No search results found.");
150159
return;
151160
}
152-
var searchResult = results.get(0);
153-
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
154-
searchResult.getScore(), searchResult.getRecord().link,
155-
searchResult.getRecord().description);
156-
}
157161

158-
private static Mono<List<VectorSearchResult<GitHubFile>>> search(
159-
String searchText,
160-
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
161-
OpenAITextEmbeddingGenerationService embeddingGeneration) {
162-
// Generate embeddings for the search text and search for the closest records
163-
return embeddingGeneration.generateEmbeddingAsync(searchText)
164-
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
162+
System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
165163
}
166164

167165
private static Mono<List<String>> storeData(
@@ -204,11 +202,4 @@ private static Map<String, String> sampleData() {
204202
"README: README associated with a sample chat summary react-based webapp" },
205203
}).collect(Collectors.toMap(element -> element[0], element -> element[1]));
206204
}
207-
208-
private static ClientOptions clientOptions() {
209-
return new ClientOptions()
210-
.setTracingOptions(new TracingOptions())
211-
.setMetricsOptions(new MetricsOptions())
212-
.setApplicationId("Semantic-Kernel");
213-
}
214205
}

0 commit comments

Comments
 (0)