Skip to content

Commit 5f973ea

Browse files
Milder Hernandezjohnoliver
andauthored
Add VolatileVectorStore (#98)
* Rename memory to data Add RedisVectorStoreRecordCollection builder() method * add redis example * Fix name * Updates * Updates * Fix null away warnings * Fix null away warnings * Remove type from VectorStores * Add VolatileVectorStore and RecordCollection * Add Exceptions when getter for Id is not found --------- Co-authored-by: John Oliver <[email protected]>
1 parent 6d8f8a7 commit 5f973ea

File tree

10 files changed

+822
-53
lines changed

10 files changed

+822
-53
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
package com.microsoft.semantickernel.samples.syntaxexamples.memory;
3+
4+
import com.azure.ai.openai.OpenAIAsyncClient;
5+
import com.azure.ai.openai.OpenAIClientBuilder;
6+
import com.azure.core.credential.AzureKeyCredential;
7+
import com.azure.core.credential.KeyCredential;
8+
import com.azure.core.util.ClientOptions;
9+
import com.azure.core.util.MetricsOptions;
10+
import com.azure.core.util.TracingOptions;
11+
import com.azure.search.documents.indexes.SearchIndexAsyncClient;
12+
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
13+
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
14+
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStore;
15+
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreOptions;
16+
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollection;
17+
import com.microsoft.semantickernel.data.VolatileVectorStore;
18+
import com.microsoft.semantickernel.data.VolatileVectorStoreRecordCollection;
19+
import com.microsoft.semantickernel.data.recordattributes.VectorStoreRecordDataAttribute;
20+
import com.microsoft.semantickernel.data.recordattributes.VectorStoreRecordKeyAttribute;
21+
import com.microsoft.semantickernel.data.recordattributes.VectorStoreRecordVectorAttribute;
22+
import reactor.core.publisher.Flux;
23+
import reactor.core.publisher.Mono;
24+
25+
import java.nio.charset.StandardCharsets;
26+
import java.util.Arrays;
27+
import java.util.Base64;
28+
import java.util.Collections;
29+
import java.util.List;
30+
import java.util.Map;
31+
import java.util.stream.Collectors;
32+
33+
public class InMemory_DataStorage {
34+
private static final String CLIENT_KEY = System.getenv("CLIENT_KEY");
35+
private static final String AZURE_CLIENT_KEY = System.getenv("AZURE_CLIENT_KEY");
36+
37+
// Only required if AZURE_CLIENT_KEY is set
38+
private static final String CLIENT_ENDPOINT = System.getenv("CLIENT_ENDPOINT");
39+
40+
// Embedding model configuration
41+
private static final String MODEL_ID = System.getenv()
42+
.getOrDefault("EMBEDDING_MODEL_ID", "text-embedding-3-large");
43+
private static final int EMBEDDING_DIMENSIONS = 1536;
44+
45+
static class GitHubFile {
46+
@VectorStoreRecordKeyAttribute()
47+
private final String id;
48+
@VectorStoreRecordDataAttribute(hasEmbedding = true, embeddingFieldName = "embedding")
49+
private final String description;
50+
@VectorStoreRecordDataAttribute
51+
private final String link;
52+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw")
53+
private final List<Float> embedding;
54+
55+
public GitHubFile(
56+
String id,
57+
String description,
58+
String link,
59+
List<Float> embedding) {
60+
this.id = id;
61+
this.description = description;
62+
this.link = link;
63+
this.embedding = embedding;
64+
}
65+
66+
public String getId() {
67+
return id;
68+
}
69+
70+
public String getDescription() {
71+
return description;
72+
}
73+
74+
static String encodeId(String realId) {
75+
byte[] bytes = Base64.getUrlEncoder().encode(realId.getBytes(StandardCharsets.UTF_8));
76+
return new String(bytes, StandardCharsets.UTF_8);
77+
}
78+
}
79+
80+
public static void main(String[] args) {
81+
System.out.println("===================================================================");
82+
System.out.println("========== Volatile (In memory) Vector Store Example ==============");
83+
System.out.println("===================================================================");
84+
85+
OpenAIAsyncClient client;
86+
87+
if (AZURE_CLIENT_KEY != null) {
88+
client = new OpenAIClientBuilder()
89+
.credential(new AzureKeyCredential(AZURE_CLIENT_KEY))
90+
.endpoint(CLIENT_ENDPOINT)
91+
.buildAsyncClient();
92+
93+
} else {
94+
client = new OpenAIClientBuilder()
95+
.credential(new KeyCredential(CLIENT_KEY))
96+
.buildAsyncClient();
97+
}
98+
99+
var embeddingGeneration = OpenAITextEmbeddingGenerationService.builder()
100+
.withOpenAIAsyncClient(client)
101+
.withModelId(MODEL_ID)
102+
.withDimensions(EMBEDDING_DIMENSIONS)
103+
.build();
104+
105+
inMemoryDataStorage(embeddingGeneration);
106+
}
107+
108+
public static void inMemoryDataStorage(OpenAITextEmbeddingGenerationService embeddingGeneration) {
109+
// Create a new Volatile vector store
110+
var volatileVectorStore = new VolatileVectorStore();
111+
112+
String collectionName = "skgithubfiles";
113+
var collection = volatileVectorStore.getCollection(collectionName, GitHubFile.class, null);
114+
115+
// Create collection if it does not exist and store data
116+
List<String> ids = collection
117+
.createCollectionIfNotExistsAsync()
118+
.then(storeData(collection, embeddingGeneration, sampleData()))
119+
.block();
120+
121+
// Retrieve all records from the collection
122+
List<GitHubFile> data = collection.getBatchAsync(ids, null).block();
123+
124+
data.forEach(gitHubFile -> System.out.println("Retrieved: " + gitHubFile.getDescription()));
125+
}
126+
127+
private static Mono<List<String>> storeData(
128+
VolatileVectorStoreRecordCollection<GitHubFile> recordCollection,
129+
OpenAITextEmbeddingGenerationService embeddingGeneration,
130+
Map<String, String> data) {
131+
132+
return Flux.fromIterable(data.entrySet())
133+
.flatMap(entry -> {
134+
System.out.println("Save '" + entry.getKey() + "' to memory.");
135+
136+
return embeddingGeneration
137+
.generateEmbeddingsAsync(Collections.singletonList(entry.getValue()))
138+
.flatMap(embeddings -> {
139+
GitHubFile gitHubFile = new GitHubFile(
140+
GitHubFile.encodeId(entry.getKey()),
141+
entry.getValue(),
142+
entry.getKey(),
143+
embeddings.get(0).getVector());
144+
return recordCollection.upsertAsync(gitHubFile, null);
145+
});
146+
})
147+
.collectList();
148+
}
149+
150+
private static Map<String, String> sampleData() {
151+
return Arrays.stream(new String[][] {
152+
{ "https://github.com/microsoft/semantic-kernel/blob/main/README.md",
153+
"README: Installation, getting started with Semantic Kernel, and how to contribute" },
154+
{ "https://github.com/microsoft/semantic-kernel/blob/main/samples/notebooks/dotnet/02-running-prompts-from-file.ipynb",
155+
"Jupyter notebook describing how to pass prompts from a file to a semantic skill or function" },
156+
{ "https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT",
157+
"Sample demonstrating how to create a chat skill interfacing with ChatGPT" },
158+
{ "https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs",
159+
"C# class that defines a volatile embedding store" },
160+
{ "https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md",
161+
"README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4" },
162+
{ "https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md",
163+
"README: README associated with a sample chat summary react-based webapp" },
164+
}).collect(Collectors.toMap(element -> element[0], element -> element[1]));
165+
}
166+
}

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/azureaisearch/AzureAISearchVectorStoreRecordCollection.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,12 @@ public AzureAISearchVectorStoreRecordCollection(
9090
: options.getRecordDefinition();
9191

9292
// Validate supported types
93-
VectorStoreRecordDefinition.validateSupportedTypes(
94-
this.options.getRecordClass(), this.recordDefinition, supportedKeyTypes,
95-
supportedVectorTypes, supportedDataTypes);
93+
VectorStoreRecordDefinition.validateSupportedKeyTypes(this.options.getRecordClass(),
94+
this.recordDefinition, supportedKeyTypes);
95+
VectorStoreRecordDefinition.validateSupportedDataTypes(this.options.getRecordClass(),
96+
this.recordDefinition, supportedDataTypes);
97+
VectorStoreRecordDefinition.validateSupportedVectorTypes(this.options.getRecordClass(),
98+
this.recordDefinition, supportedVectorTypes);
9699

97100
// Add non-vector fields to the list
98101
nonVectorFields.add(this.recordDefinition.getKeyField().getName());

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisVectorStoreRecordCollection.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ public RedisVectorStoreRecordCollection(
8181
}
8282

8383
// Validate supported types
84-
VectorStoreRecordDefinition.validateSupportedTypes(options.getRecordClass(),
85-
recordDefinition,
86-
supportedKeyTypes, supportedVectorTypes, null);
84+
VectorStoreRecordDefinition.validateSupportedKeyTypes(options.getRecordClass(),
85+
recordDefinition, supportedKeyTypes);
86+
VectorStoreRecordDefinition.validateSupportedVectorTypes(options.getRecordClass(),
87+
recordDefinition, supportedVectorTypes);
8788

8889
// If mapper is not provided, set a default one
8990
if (options.getVectorStoreRecordMapper() == null) {
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
package com.microsoft.semantickernel.data;
3+
4+
import com.microsoft.semantickernel.data.recorddefinition.VectorStoreRecordDefinition;
5+
import reactor.core.publisher.Mono;
6+
7+
import javax.annotation.Nonnull;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import java.util.Map;
11+
import java.util.concurrent.ConcurrentHashMap;
12+
13+
public class VolatileVectorStore implements VectorStore<VolatileVectorStoreRecordCollection<?>> {
14+
private final Map<String, Map<String, ?>> collections;
15+
16+
public VolatileVectorStore() {
17+
this.collections = new ConcurrentHashMap<>();
18+
}
19+
20+
/**
21+
* Gets a collection from the vector store.
22+
*
23+
* @param collectionName The name of the collection.
24+
* @param recordDefinition The record definition.
25+
* @return The collection.
26+
*/
27+
@Override
28+
public <Key, Record> VolatileVectorStoreRecordCollection<Record> getCollection(
29+
@Nonnull String collectionName, @Nonnull Class<Record> recordClass,
30+
VectorStoreRecordDefinition recordDefinition) {
31+
return new VolatileVectorStoreRecordCollection<>(
32+
collectionName,
33+
collections,
34+
VolatileVectorStoreRecordCollectionOptions.<Record>builder()
35+
.withRecordClass(recordClass)
36+
.withRecordDefinition(recordDefinition)
37+
.build());
38+
}
39+
40+
/**
41+
* Gets the names of all collections in the vector store.
42+
*
43+
* @return A list of collection names.
44+
*/
45+
@Override
46+
public Mono<List<String>> getCollectionNamesAsync() {
47+
return Mono.just(new ArrayList<>(collections.keySet()));
48+
}
49+
}

0 commit comments

Comments
 (0)