-
Notifications
You must be signed in to change notification settings - Fork 397
feat: add embedding generators and cache #1111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 2 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
df9091c
feat: add embedding generators and cache (#00007)
dluc b8fe07c
refactor: use composite primary key in embedding cache
dluc 12eb472
fix: address PR review comments for embedding cache
dluc 06bdcbc
refactor: remove timestamp from embedding cache
dluc b6449e7
refactor: simplify embedding cache - remove TokenCount
dluc adfdc0f
fix: dispose CancellationTokenSource in tests
dluc File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
| using System.Text.Json.Serialization; | ||
| using KernelMemory.Core.Config.Enums; | ||
| using KernelMemory.Core.Config.Validation; | ||
| using KernelMemory.Core.Embeddings; | ||
|
|
||
| namespace KernelMemory.Core.Config.Embeddings; | ||
|
|
||
| /// <summary> | ||
| /// HuggingFace Inference API embeddings provider configuration. | ||
| /// Supports the serverless Inference API for embedding models. | ||
| /// </summary> | ||
| public sealed class HuggingFaceEmbeddingsConfig : EmbeddingsConfig | ||
| { | ||
| /// <inheritdoc /> | ||
| [JsonIgnore] | ||
| public override EmbeddingsTypes Type => EmbeddingsTypes.HuggingFace; | ||
|
|
||
| /// <summary> | ||
| /// HuggingFace model name (e.g., "sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-base-en-v1.5"). | ||
| /// </summary> | ||
| [JsonPropertyName("model")] | ||
| public string Model { get; set; } = EmbeddingConstants.DefaultHuggingFaceModel; | ||
|
|
||
| /// <summary> | ||
| /// HuggingFace API key (token). | ||
| /// Can also be set via HF_TOKEN environment variable. | ||
| /// </summary> | ||
| [JsonPropertyName("apiKey")] | ||
| public string? ApiKey { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// HuggingFace Inference API base URL. | ||
| /// Default: https://api-inference.huggingface.co | ||
| /// Can be changed for custom inference endpoints. | ||
| /// </summary> | ||
| [JsonPropertyName("baseUrl")] | ||
| public string BaseUrl { get; set; } = EmbeddingConstants.DefaultHuggingFaceBaseUrl; | ||
|
|
||
| /// <inheritdoc /> | ||
| public override void Validate(string path) | ||
| { | ||
| if (string.IsNullOrWhiteSpace(this.Model)) | ||
| { | ||
| throw new ConfigException($"{path}.Model", "HuggingFace model name is required"); | ||
| } | ||
|
|
||
| if (string.IsNullOrWhiteSpace(this.ApiKey)) | ||
| { | ||
| throw new ConfigException($"{path}.ApiKey", "HuggingFace API key is required"); | ||
| } | ||
|
|
||
| if (string.IsNullOrWhiteSpace(this.BaseUrl)) | ||
| { | ||
| throw new ConfigException($"{path}.BaseUrl", "HuggingFace base URL is required"); | ||
| } | ||
|
|
||
| if (!Uri.TryCreate(this.BaseUrl, UriKind.Absolute, out _)) | ||
| { | ||
| throw new ConfigException($"{path}.BaseUrl", | ||
| $"Invalid HuggingFace base URL: {this.BaseUrl}"); | ||
| } | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
| using System.Text.Json.Serialization; | ||
|
|
||
| namespace KernelMemory.Core.Config.Enums; | ||
|
|
||
| /// <summary> | ||
| /// Modes for embedding cache operations. | ||
| /// Controls whether the cache reads, writes, or both. | ||
| /// </summary> | ||
| [JsonConverter(typeof(JsonStringEnumConverter))] | ||
| public enum CacheModes | ||
| { | ||
| /// <summary> | ||
| /// Both read from and write to cache (default). | ||
| /// Cache hits return stored embeddings, misses are generated and stored. | ||
| /// </summary> | ||
| ReadWrite, | ||
|
|
||
| /// <summary> | ||
| /// Only read from cache, never write. | ||
| /// Useful for read-only deployments or when cache is pre-populated. | ||
| /// </summary> | ||
| ReadOnly, | ||
|
|
||
| /// <summary> | ||
| /// Only write to cache, never read. | ||
| /// Useful for warming up a cache without affecting current behavior. | ||
| /// </summary> | ||
| WriteOnly | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
| using System.Diagnostics.CodeAnalysis; | ||
|
|
||
| namespace KernelMemory.Core.Embeddings.Cache; | ||
|
|
||
| /// <summary> | ||
| /// Represents a cached embedding vector with metadata. | ||
| /// Stores the vector, optional token count, and timestamp of when it was cached. | ||
| /// </summary> | ||
| public sealed class CachedEmbedding | ||
| { | ||
| /// <summary> | ||
| /// The embedding vector as a float array. | ||
| /// Array is intentional for performance - embeddings are read-only after creation. | ||
| /// </summary> | ||
| [SuppressMessage("Performance", "CA1819:Properties should not return arrays", | ||
| Justification = "Embedding vectors are performance-critical and read-only after creation")] | ||
| public required float[] Vector { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Optional token count from the provider response. | ||
| /// Null if the provider did not return token count. | ||
| /// </summary> | ||
| public int? TokenCount { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Timestamp when this embedding was stored in the cache. | ||
| /// </summary> | ||
| public required DateTimeOffset Timestamp { get; init; } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
| using System.Security.Cryptography; | ||
| using System.Text; | ||
|
|
||
| namespace KernelMemory.Core.Embeddings.Cache; | ||
|
|
||
| /// <summary> | ||
| /// Cache key for embeddings. Uniquely identifies an embedding by provider, model, | ||
| /// dimensions, normalization state, and content hash. | ||
| /// The input text is NOT stored - only a SHA256 hash is used for security. | ||
| /// </summary> | ||
| public sealed class EmbeddingCacheKey | ||
| { | ||
| /// <summary> | ||
| /// Provider type name (e.g., "OpenAI", "Ollama", "AzureOpenAI", "HuggingFace"). | ||
| /// </summary> | ||
| public required string Provider { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Model name (e.g., "text-embedding-ada-002", "qwen3-embedding"). | ||
| /// </summary> | ||
| public required string Model { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Vector dimensions produced by this model. | ||
| /// </summary> | ||
| public required int VectorDimensions { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Whether the vectors are normalized. | ||
| /// </summary> | ||
| public required bool IsNormalized { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Length of the original text in characters. | ||
| /// Used as an additional collision prevention measure. | ||
| /// </summary> | ||
| public required int TextLength { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// SHA256 hash of the original text (hex string). | ||
| /// The text itself is never stored for security/privacy. | ||
| /// </summary> | ||
| public required string TextHash { get; init; } | ||
|
|
||
| /// <summary> | ||
| /// Creates a cache key from the given parameters. | ||
| /// The text is hashed using SHA256 and not stored. | ||
| /// </summary> | ||
| /// <param name="provider">Provider type name.</param> | ||
| /// <param name="model">Model name.</param> | ||
| /// <param name="vectorDimensions">Vector dimensions.</param> | ||
| /// <param name="isNormalized">Whether vectors are normalized.</param> | ||
| /// <param name="text">The text to hash.</param> | ||
| /// <returns>A new EmbeddingCacheKey instance.</returns> | ||
| /// <exception cref="ArgumentNullException">When provider, model, or text is null.</exception> | ||
| /// <exception cref="ArgumentOutOfRangeException">When vectorDimensions is less than 1.</exception> | ||
| public static EmbeddingCacheKey Create( | ||
| string provider, | ||
| string model, | ||
| int vectorDimensions, | ||
| bool isNormalized, | ||
| string text) | ||
| { | ||
| ArgumentNullException.ThrowIfNull(provider, nameof(provider)); | ||
| ArgumentNullException.ThrowIfNull(model, nameof(model)); | ||
| ArgumentNullException.ThrowIfNull(text, nameof(text)); | ||
| ArgumentOutOfRangeException.ThrowIfLessThan(vectorDimensions, 1, nameof(vectorDimensions)); | ||
|
|
||
| return new EmbeddingCacheKey | ||
| { | ||
| Provider = provider, | ||
| Model = model, | ||
| VectorDimensions = vectorDimensions, | ||
| IsNormalized = isNormalized, | ||
| TextLength = text.Length, | ||
| TextHash = ComputeSha256Hash(text) | ||
| }; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Generates a composite key string for use as a database primary key. | ||
| /// Format: Provider|Model|Dimensions|IsNormalized|TextLength|TextHash | ||
| /// </summary> | ||
| /// <returns>A string suitable for use as a cache key.</returns> | ||
| public string ToCompositeKey() | ||
| { | ||
| return $"{this.Provider}|{this.Model}|{this.VectorDimensions}|{this.IsNormalized}|{this.TextLength}|{this.TextHash}"; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Computes SHA256 hash of the input text and returns as lowercase hex string. | ||
| /// </summary> | ||
| /// <param name="text">The text to hash.</param> | ||
| /// <returns>64-character lowercase hex string.</returns> | ||
| private static string ComputeSha256Hash(string text) | ||
| { | ||
| byte[] bytes = SHA256.HashData(Encoding.UTF8.GetBytes(text)); | ||
| return Convert.ToHexStringLower(bytes); | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
| using KernelMemory.Core.Config.Enums; | ||
|
|
||
| namespace KernelMemory.Core.Embeddings.Cache; | ||
|
|
||
| /// <summary> | ||
| /// Interface for embedding cache implementations. | ||
| /// Supports dependency injection and multiple cache implementations (SQLite, etc.). | ||
| /// </summary> | ||
| public interface IEmbeddingCache | ||
| { | ||
| /// <summary> | ||
| /// Cache mode (read-write, read-only, write-only). | ||
| /// Controls whether read and write operations are allowed. | ||
| /// </summary> | ||
| CacheModes Mode { get; } | ||
|
|
||
| /// <summary> | ||
| /// Try to retrieve a cached embedding by key. | ||
| /// Returns null if not found or if mode is WriteOnly. | ||
| /// </summary> | ||
| /// <param name="key">The cache key to look up.</param> | ||
| /// <param name="ct">Cancellation token.</param> | ||
| /// <returns>The cached embedding if found, null otherwise.</returns> | ||
| Task<CachedEmbedding?> TryGetAsync(EmbeddingCacheKey key, CancellationToken ct = default); | ||
|
|
||
| /// <summary> | ||
| /// Store an embedding in the cache. | ||
| /// Does nothing if mode is ReadOnly. | ||
| /// </summary> | ||
| /// <param name="key">The cache key.</param> | ||
| /// <param name="vector">The embedding vector to store.</param> | ||
| /// <param name="tokenCount">Optional token count from the provider.</param> | ||
| /// <param name="ct">Cancellation token.</param> | ||
| Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCount, CancellationToken ct = default); | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.