Skip to content

Commit db814b1

Browse files
author
Milder Hernandez Cagua
committed
Update vector store examples and add some bug fixes
1 parent 205fc38 commit db814b1

File tree

11 files changed

+158
-75
lines changed

11 files changed

+158
-75
lines changed

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/Hotel.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.util.List;
1010

1111
public class Hotel {
12+
@JsonProperty("hotelId")
1213
@VectorStoreRecordKeyAttribute
1314
private final String id;
1415

@@ -46,7 +47,7 @@ public Hotel() {
4647

4748
@JsonCreator
4849
public Hotel(
49-
@JsonProperty("id") String id,
50+
@JsonProperty("hotelId") String id,
5051
@JsonProperty("name") String name,
5152
@JsonProperty("code") int code,
5253
@JsonProperty("summary") String description,

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.stream.Stream;
3434

3535
import static org.junit.jupiter.api.Assertions.assertEquals;
36+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
3637
import static org.junit.jupiter.api.Assertions.assertNotNull;
3738
import static org.junit.jupiter.api.Assertions.assertNull;
3839

@@ -387,6 +388,8 @@ public void search(RecordCollectionOptions options, String embeddingName) {
387388
assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size());
388389
// The third hotel should be the most similar
389390
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
391+
// Score should be different than zero
392+
assertNotEquals(0.0, results.get(0).getScore());
390393
assertNull(results.get(0).getRecord().getEuclidean());
391394
}
392395

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisJsonVectorStoreRecordCollectionTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.stream.Stream;
3434

3535
import static org.junit.jupiter.api.Assertions.assertEquals;
36+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
3637
import static org.junit.jupiter.api.Assertions.assertNotNull;
3738
import static org.junit.jupiter.api.Assertions.assertNull;
3839

@@ -387,6 +388,8 @@ public void search(RecordCollectionOptions options, String embeddingName) {
387388
assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size());
388389
// The third hotel should be the most similar
389390
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
391+
// Score should be different than zero
392+
assertNotEquals(0.0, results.get(0).getScore());
390393
assertNull(results.get(0).getRecord().getEuclidean());
391394
}
392395

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/RunAll.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import com.microsoft.semantickernel.samples.syntaxexamples.functions.Example59_OpenAIFunctionCalling;
1818
import com.microsoft.semantickernel.samples.syntaxexamples.functions.Example60_AdvancedMethodFunctions;
1919
import com.microsoft.semantickernel.samples.syntaxexamples.java.KernelFunctionYaml_Example;
20-
import com.microsoft.semantickernel.samples.syntaxexamples.memory.AzureAISearchVectorStore;
20+
import com.microsoft.semantickernel.samples.syntaxexamples.memory.VectorStoreWithAzureAISearch;
2121
import com.microsoft.semantickernel.samples.syntaxexamples.plugins.Example10_DescribeAllPluginsAndFunctions;
2222
import com.microsoft.semantickernel.samples.syntaxexamples.plugins.Example13_ConversationSummaryPlugin;
2323
import com.microsoft.semantickernel.samples.syntaxexamples.template.Example06_TemplateLanguage;
@@ -38,7 +38,7 @@ public class RunAll {
3838

3939
public static void main(String[] args) {
4040
List<MainMethod> mains = Arrays.asList(
41-
AzureAISearchVectorStore::main,
41+
VectorStoreWithAzureAISearch::main,
4242
Example01_NativeFunctions::main,
4343
Example03_Arguments::main,
4444
Example05_InlineFunctionDefinition::main,

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemory_DataStorage.java renamed to samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import com.azure.ai.openai.OpenAIClientBuilder;
66
import com.azure.core.credential.AzureKeyCredential;
77
import com.azure.core.credential.KeyCredential;
8+
import com.fasterxml.jackson.annotation.JsonProperty;
89
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
10+
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
911
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
1012
import com.microsoft.semantickernel.data.VolatileVectorStore;
1113
import com.microsoft.semantickernel.data.VolatileVectorStoreRecordCollectionOptions;
@@ -20,7 +22,7 @@
2022
import reactor.core.publisher.Flux;
2123
import reactor.core.publisher.Mono;
2224

23-
public class InMemory_DataStorage {
25+
public class InMemoryVolatileVectorStore {
2426

2527
private static final String CLIENT_KEY = System.getenv("CLIENT_KEY");
2628
private static final String AZURE_CLIENT_KEY = System.getenv("AZURE_CLIENT_KEY");
@@ -34,14 +36,14 @@ public class InMemory_DataStorage {
3436
private static final int EMBEDDING_DIMENSIONS = 1536;
3537

3638
static class GitHubFile {
37-
39+
@JsonProperty("fileId") // Set a different name for the storage field if needed
3840
@VectorStoreRecordKeyAttribute()
3941
private final String id;
4042
@VectorStoreRecordDataAttribute()
4143
private final String description;
4244
@VectorStoreRecordDataAttribute
4345
private final String link;
44-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw")
46+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = "cosineDistance")
4547
private final List<Float> embedding;
4648

4749
public GitHubFile(
@@ -58,13 +60,18 @@ public GitHubFile(
5860
public String getId() {
5961
return id;
6062
}
61-
6263
public String getDescription() {
6364
return description;
6465
}
66+
public String getLink() {
67+
return link;
68+
}
69+
public List<Float> getEmbedding() {
70+
return embedding;
71+
}
6572

6673
static String encodeId(String realId) {
67-
return AzureAISearchVectorStore.GitHubFile.encodeId(realId);
74+
return VectorStoreWithAzureAISearch.GitHubFile.encodeId(realId);
6875
}
6976
}
7077

@@ -93,10 +100,10 @@ public static void main(String[] args) {
93100
.withDimensions(EMBEDDING_DIMENSIONS)
94101
.build();
95102

96-
inMemoryDataStorage(embeddingGeneration);
103+
inMemoryStoreAndSearch(embeddingGeneration);
97104
}
98105

99-
public static void inMemoryDataStorage(
106+
public static void inMemoryStoreAndSearch(
100107
OpenAITextEmbeddingGenerationService embeddingGeneration) {
101108
// Create a new Volatile vector store
102109
var volatileVectorStore = new VolatileVectorStore();
@@ -108,15 +115,32 @@ public static void inMemoryDataStorage(
108115
.build());
109116

110117
// Create collection if it does not exist and store data
111-
List<String> ids = collection
118+
collection
112119
.createCollectionIfNotExistsAsync()
113120
.then(storeData(collection, embeddingGeneration, sampleData()))
114121
.block();
115122

116-
// Retrieve all records from the collection
117-
List<GitHubFile> data = collection.getBatchAsync(ids, null).block();
123+
// Search for results
124+
// Volatile store executes an exhaustive search, for approximate search use Azure AI Search, Redis or JDBC with PostgreSQL
125+
var results = search("How to get started", collection, embeddingGeneration).block();
126+
127+
if (results == null || results.isEmpty()) {
128+
System.out.println("No search results found.");
129+
return;
130+
}
131+
var searchResult = results.get(0);
132+
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
133+
searchResult.getScore(), searchResult.getRecord().link,
134+
searchResult.getRecord().description);
135+
}
118136

119-
data.forEach(gitHubFile -> System.out.println("Retrieved: " + gitHubFile.getDescription()));
137+
private static Mono<List<VectorSearchResult<GitHubFile>>> search(
138+
String searchText,
139+
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
140+
OpenAITextEmbeddingGenerationService embeddingGeneration) {
141+
// Generate embeddings for the search text and search for the closest records
142+
return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText))
143+
.flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null));
120144
}
121145

122146
private static Mono<List<String>> storeData(
@@ -128,6 +152,7 @@ private static Mono<List<String>> storeData(
128152
.flatMap(entry -> {
129153
System.out.println("Save '" + entry.getKey() + "' to memory.");
130154

155+
// Generate embeddings for the data and store it
131156
return embeddingGeneration
132157
.generateEmbeddingsAsync(Collections.singletonList(entry.getValue()))
133158
.flatMap(embeddings -> {

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/AzureAISearchVectorStore.java renamed to samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
1313
import com.fasterxml.jackson.annotation.JsonProperty;
1414
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
15+
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStore;
1516
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreOptions;
16-
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollection;
1717
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollectionOptions;
1818
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
19+
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
1920
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
2021
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
2122
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
@@ -30,7 +31,7 @@
3031
import reactor.core.publisher.Flux;
3132
import reactor.core.publisher.Mono;
3233

33-
public class AzureAISearchVectorStore {
34+
public class VectorStoreWithAzureAISearch {
3435

3536
private static final String CLIENT_KEY = System.getenv("CLIENT_KEY");
3637
private static final String AZURE_CLIENT_KEY = System.getenv("AZURE_CLIENT_KEY");
@@ -110,22 +111,21 @@ public static void main(String[] args) {
110111
.clientOptions(clientOptions())
111112
.buildAsyncClient();
112113

113-
dataStorageWithAzureAISearch(searchClient, embeddingGeneration);
114+
storeAndSearch(searchClient, embeddingGeneration);
114115
}
115116

116-
public static void dataStorageWithAzureAISearch(
117+
public static void storeAndSearch(
117118
SearchIndexAsyncClient searchClient,
118119
OpenAITextEmbeddingGenerationService embeddingGeneration) {
119120

120-
// Create a new Azure AI Search vector store
121-
var azureAISearchVectorStore = com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStore
122-
.builder()
121+
// Build an Azure AI Search Vector Store
122+
var azureAISearchVectorStore = AzureAISearchVectorStore.builder()
123123
.withSearchIndexAsyncClient(searchClient)
124124
.withOptions(new AzureAISearchVectorStoreOptions())
125125
.build();
126126

127127
String collectionName = "skgithubfiles";
128-
var collection = (AzureAISearchVectorStoreRecordCollection<GitHubFile>) azureAISearchVectorStore
128+
var collection = azureAISearchVectorStore
129129
.getCollection(
130130
collectionName,
131131
AzureAISearchVectorStoreRecordCollectionOptions.<GitHubFile>builder()
@@ -154,22 +154,23 @@ public static void dataStorageWithAzureAISearch(
154154

155155
private static Mono<List<VectorSearchResult<GitHubFile>>> search(
156156
String searchText,
157-
AzureAISearchVectorStoreRecordCollection<GitHubFile> recordCollection,
157+
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
158158
OpenAITextEmbeddingGenerationService embeddingGeneration) {
159-
159+
// Generate embeddings for the search text and search for the closest records
160160
return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText))
161161
.flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null));
162162
}
163163

164164
private static Mono<List<String>> storeData(
165-
AzureAISearchVectorStoreRecordCollection<GitHubFile> recordCollection,
165+
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
166166
OpenAITextEmbeddingGenerationService embeddingGeneration,
167167
Map<String, String> data) {
168168

169169
return Flux.fromIterable(data.entrySet())
170170
.flatMap(entry -> {
171171
System.out.println("Save '" + entry.getKey() + "' to memory.");
172172

173+
// Generate embeddings for the data and store it
173174
return embeddingGeneration
174175
.generateEmbeddingsAsync(Collections.singletonList(entry.getValue()))
175176
.flatMap(embeddings -> {

0 commit comments

Comments
 (0)