microsoft · dluc · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/src/Core/Config/Embeddings/EmbeddingsConfig.cs b/src/Core/Config/Embeddings/EmbeddingsConfig.cs
@@ -12,6 +12,7 @@ namespace KernelMemory.Core.Config.Embeddings;
 [JsonDerivedType(typeof(OllamaEmbeddingsConfig), typeDiscriminator: "ollama")]
 [JsonDerivedType(typeof(OpenAIEmbeddingsConfig), typeDiscriminator: "openai")]
 [JsonDerivedType(typeof(AzureOpenAIEmbeddingsConfig), typeDiscriminator: "azureOpenAI")]
+[JsonDerivedType(typeof(HuggingFaceEmbeddingsConfig), typeDiscriminator: "huggingFace")]
 public abstract class EmbeddingsConfig : IValidatable
 {
     /// <summary>

diff --git a/src/Core/Config/Embeddings/HuggingFaceEmbeddingsConfig.cs b/src/Core/Config/Embeddings/HuggingFaceEmbeddingsConfig.cs
@@ -0,0 +1,64 @@
+// Copyright (c) Microsoft. All rights reserved.
+using System.Text.Json.Serialization;
+using KernelMemory.Core.Config.Enums;
+using KernelMemory.Core.Config.Validation;
+using KernelMemory.Core.Embeddings;
+
+namespace KernelMemory.Core.Config.Embeddings;
+
+/// <summary>
+/// HuggingFace Inference API embeddings provider configuration.
+/// Supports the serverless Inference API for embedding models.
+/// </summary>
+public sealed class HuggingFaceEmbeddingsConfig : EmbeddingsConfig
+{
+    /// <inheritdoc />
+    [JsonIgnore]
+    public override EmbeddingsTypes Type => EmbeddingsTypes.HuggingFace;
+
+    /// <summary>
+    /// HuggingFace model name (e.g., "sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-base-en-v1.5").
+    /// </summary>
+    [JsonPropertyName("model")]
+    public string Model { get; set; } = EmbeddingConstants.DefaultHuggingFaceModel;
+
+    /// <summary>
+    /// HuggingFace API key (token).
+    /// Can also be set via HF_TOKEN environment variable.
+    /// </summary>
+    [JsonPropertyName("apiKey")]
+    public string? ApiKey { get; set; }
+
+    /// <summary>
+    /// HuggingFace Inference API base URL.
+    /// Default: https://api-inference.huggingface.co
+    /// Can be changed for custom inference endpoints.
+    /// </summary>
+    [JsonPropertyName("baseUrl")]
+    public string BaseUrl { get; set; } = EmbeddingConstants.DefaultHuggingFaceBaseUrl;
+
+    /// <inheritdoc />
+    public override void Validate(string path)
+    {
+        if (string.IsNullOrWhiteSpace(this.Model))
+        {
+            throw new ConfigException($"{path}.Model", "HuggingFace model name is required");
+        }
+
+        if (string.IsNullOrWhiteSpace(this.ApiKey))
+        {
+            throw new ConfigException($"{path}.ApiKey", "HuggingFace API key is required");
+        }
+
+        if (string.IsNullOrWhiteSpace(this.BaseUrl))
+        {
+            throw new ConfigException($"{path}.BaseUrl", "HuggingFace base URL is required");
+        }
+
+        if (!Uri.TryCreate(this.BaseUrl, UriKind.Absolute, out _))
+        {
+            throw new ConfigException($"{path}.BaseUrl",
+                $"Invalid HuggingFace base URL: {this.BaseUrl}");
+        }
+    }
+}
diff --git a/src/Core/Config/Enums/CacheModes.cs b/src/Core/Config/Enums/CacheModes.cs
@@ -0,0 +1,30 @@
+// Copyright (c) Microsoft. All rights reserved.
+using System.Text.Json.Serialization;
+
+namespace KernelMemory.Core.Config.Enums;
+
+/// <summary>
+/// Modes for embedding cache operations.
+/// Controls whether the cache reads, writes, or both.
+/// </summary>
+[JsonConverter(typeof(JsonStringEnumConverter))]
+public enum CacheModes
+{
+    /// <summary>
+    /// Both read from and write to cache (default).
+    /// Cache hits return stored embeddings, misses are generated and stored.
+    /// </summary>
+    ReadWrite,
+
+    /// <summary>
+    /// Only read from cache, never write.
+    /// Useful for read-only deployments or when cache is pre-populated.
+    /// </summary>
+    ReadOnly,
+
+    /// <summary>
+    /// Only write to cache, never read.
+    /// Useful for warming up a cache without affecting current behavior.
+    /// </summary>
+    WriteOnly
+}
diff --git a/src/Core/Config/Enums/EmbeddingsTypes.cs b/src/Core/Config/Enums/EmbeddingsTypes.cs
@@ -16,5 +16,8 @@ public enum EmbeddingsTypes
     OpenAI,
 
     /// <summary>Azure OpenAI Service</summary>
-    AzureOpenAI
+    AzureOpenAI,
+
+    /// <summary>Hugging Face Inference API</summary>
+    HuggingFace
 }
diff --git a/src/Core/Embeddings/Cache/CachedEmbedding.cs b/src/Core/Embeddings/Cache/CachedEmbedding.cs
@@ -0,0 +1,30 @@
+// Copyright (c) Microsoft. All rights reserved.
+using System.Diagnostics.CodeAnalysis;
+
+namespace KernelMemory.Core.Embeddings.Cache;
+
+/// <summary>
+/// Represents a cached embedding vector with metadata.
+/// Stores the vector, optional token count, and timestamp of when it was cached.
+/// </summary>
+public sealed class CachedEmbedding
+{
+    /// <summary>
+    /// The embedding vector as a float array.
+    /// Array is intentional for performance - embeddings are read-only after creation.
+    /// </summary>
+    [SuppressMessage("Performance", "CA1819:Properties should not return arrays",
+        Justification = "Embedding vectors are performance-critical and read-only after creation")]
+    public required float[] Vector { get; init; }
+
+    /// <summary>
+    /// Optional token count from the provider response.
+    /// Null if the provider did not return token count.
+    /// </summary>
+    public int? TokenCount { get; init; }
+
+    /// <summary>
+    /// Timestamp when this embedding was stored in the cache.
+    /// </summary>
+    public required DateTimeOffset Timestamp { get; init; }
+}
diff --git a/src/Core/Embeddings/Cache/EmbeddingCacheKey.cs b/src/Core/Embeddings/Cache/EmbeddingCacheKey.cs
@@ -0,0 +1,101 @@
+// Copyright (c) Microsoft. All rights reserved.
+using System.Security.Cryptography;
+using System.Text;
+
+namespace KernelMemory.Core.Embeddings.Cache;
+
+/// <summary>
+/// Cache key for embeddings. Uniquely identifies an embedding by provider, model,
+/// dimensions, normalization state, and content hash.
+/// The input text is NOT stored - only a SHA256 hash is used for security.
+/// </summary>
+public sealed class EmbeddingCacheKey
+{
+    /// <summary>
+    /// Provider type name (e.g., "OpenAI", "Ollama", "AzureOpenAI", "HuggingFace").
+    /// </summary>
+    public required string Provider { get; init; }
+
+    /// <summary>
+    /// Model name (e.g., "text-embedding-ada-002", "qwen3-embedding").
+    /// </summary>
+    public required string Model { get; init; }
+
+    /// <summary>
+    /// Vector dimensions produced by this model.
+    /// </summary>
+    public required int VectorDimensions { get; init; }
+
+    /// <summary>
+    /// Whether the vectors are normalized.
+    /// </summary>
+    public required bool IsNormalized { get; init; }
+
+    /// <summary>
+    /// Length of the original text in characters.
+    /// Used as an additional collision prevention measure.
+    /// </summary>
+    public required int TextLength { get; init; }
+
+    /// <summary>
+    /// SHA256 hash of the original text (hex string).
+    /// The text itself is never stored for security/privacy.
+    /// </summary>
+    public required string TextHash { get; init; }
+
+    /// <summary>
+    /// Creates a cache key from the given parameters.
+    /// The text is hashed using SHA256 and not stored.
+    /// </summary>
+    /// <param name="provider">Provider type name.</param>
+    /// <param name="model">Model name.</param>
+    /// <param name="vectorDimensions">Vector dimensions.</param>
+    /// <param name="isNormalized">Whether vectors are normalized.</param>
+    /// <param name="text">The text to hash.</param>
+    /// <returns>A new EmbeddingCacheKey instance.</returns>
+    /// <exception cref="ArgumentNullException">When provider, model, or text is null.</exception>
+    /// <exception cref="ArgumentOutOfRangeException">When vectorDimensions is less than 1.</exception>
+    public static EmbeddingCacheKey Create(
+        string provider,
+        string model,
+        int vectorDimensions,
+        bool isNormalized,
+        string text)
+    {
+        ArgumentNullException.ThrowIfNull(provider, nameof(provider));
+        ArgumentNullException.ThrowIfNull(model, nameof(model));
+        ArgumentNullException.ThrowIfNull(text, nameof(text));
+        ArgumentOutOfRangeException.ThrowIfLessThan(vectorDimensions, 1, nameof(vectorDimensions));
+
+        return new EmbeddingCacheKey
+        {
+            Provider = provider,
+            Model = model,
+            VectorDimensions = vectorDimensions,
+            IsNormalized = isNormalized,
+            TextLength = text.Length,
+            TextHash = ComputeSha256Hash(text)
+        };
+    }
+
+    /// <summary>
+    /// Generates a composite key string for use as a database primary key.
+    /// Format: Provider|Model|Dimensions|IsNormalized|TextLength|TextHash
+    /// </summary>
+    /// <returns>A string suitable for use as a cache key.</returns>
+    public string ToCompositeKey()
+    {
+        return $"{this.Provider}|{this.Model}|{this.VectorDimensions}|{this.IsNormalized}|{this.TextLength}|{this.TextHash}";
+    }
+
+    /// <summary>
+    /// Computes SHA256 hash of the input text and returns as lowercase hex string.
+    /// </summary>
+    /// <param name="text">The text to hash.</param>
+    /// <returns>64-character lowercase hex string.</returns>
+    private static string ComputeSha256Hash(string text)
+    {
+        byte[] bytes = SHA256.HashData(Encoding.UTF8.GetBytes(text));
+        return Convert.ToHexStringLower(bytes);
+    }
+}
diff --git a/src/Core/Embeddings/Cache/IEmbeddingCache.cs b/src/Core/Embeddings/Cache/IEmbeddingCache.cs
@@ -0,0 +1,36 @@
+// Copyright (c) Microsoft. All rights reserved.
+using KernelMemory.Core.Config.Enums;
+
+namespace KernelMemory.Core.Embeddings.Cache;
+
+/// <summary>
+/// Interface for embedding cache implementations.
+/// Supports dependency injection and multiple cache implementations (SQLite, etc.).
+/// </summary>
+public interface IEmbeddingCache
+{
+    /// <summary>
+    /// Cache mode (read-write, read-only, write-only).
+    /// Controls whether read and write operations are allowed.
+    /// </summary>
+    CacheModes Mode { get; }
+
+    /// <summary>
+    /// Try to retrieve a cached embedding by key.
+    /// Returns null if not found or if mode is WriteOnly.
+    /// </summary>
+    /// <param name="key">The cache key to look up.</param>
+    /// <param name="ct">Cancellation token.</param>
+    /// <returns>The cached embedding if found, null otherwise.</returns>
+    Task<CachedEmbedding?> TryGetAsync(EmbeddingCacheKey key, CancellationToken ct = default);
+
+    /// <summary>
+    /// Store an embedding in the cache.
+    /// Does nothing if mode is ReadOnly.
+    /// </summary>
+    /// <param name="key">The cache key.</param>
+    /// <param name="vector">The embedding vector to store.</param>
+    /// <param name="tokenCount">Optional token count from the provider.</param>
+    /// <param name="ct">Cancellation token.</param>
+    Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCount, CancellationToken ct = default);
+}