Skip to content

.Net: Bug: InMemoryVectorStore DeserializeCollectionFromJsonAsync error #12826

@williamlzw

Description

@williamlzw

net9.0
semantic-kernel 1.61.0

System.ArgumentException:“Input span arguments must all have the same length.”

#pragma warning disable SKEXP0001

using Microsoft.Extensions.AI;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.InMemory;

class Program1
{
    public static void Test()
    {
        var program = new Program4();
        program.Test().GetAwaiter().GetResult();
    }

    public static void Main()
    {
        Test();
    }
}

public class Program4
{
    public delegate TRecord CreateRecordFromString<TKey, TRecord>(string text, ReadOnlyMemory<float> vector) where TKey : notnull;
    internal static async Task<VectorStoreCollection<TKey, TRecord>> CreateCollectionFromListAsync<TKey, TRecord>(
        VectorStore vectorStore,
        string collectionName,
        List<string> entries,
        IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator,
        CreateRecordFromString<TKey, TRecord> createRecord)
        where TKey : notnull
        where TRecord : class
    {
        // Get and create collection if it doesn't exist.
        var collection = vectorStore.GetCollection<TKey, TRecord>(collectionName);
        await collection.EnsureCollectionExistsAsync().ConfigureAwait(false);

        // Create records and generate embeddings for them.
        var tasks = entries.Select((entry, i) => Task.Run(async () =>
        {
            var record = createRecord(entry, (await embeddingGenerator.GenerateAsync(entry).ConfigureAwait(false)).Vector);
            await collection.UpsertAsync(record).ConfigureAwait(false);
        }));
        await Task.WhenAll(tasks).ConfigureAwait(false);

        return collection;
    }

    public async Task Test()
    {
        var builder = Kernel.CreateBuilder();
        var embeddingModelId = "granite-embedding:278m";
        var endpoint = new Uri("http://localhost:11434");
        builder.Services.AddOllamaEmbeddingGenerator(embeddingModelId, endpoint);

        var kernel = builder.Build();
        var embeddingService = kernel.GetRequiredService<IEmbeddingGenerator<string, Embedding<float>>>();
        var vectorStore = new InMemoryVectorStore();
        var collectionName = "Records";
        static DataModel CreateRecord(string text, ReadOnlyMemory<float> embedding)
        {
            return new()
            {
                Id = Guid.NewGuid(),
                Text = text,
                Embedding = embedding
            };
        }
        List<string> lines = new List<string>();
        foreach (var file in Directory.GetFiles("Issues", "*.txt"))
        {
            var content = File.ReadAllText(file);
            lines.Add(content!);
        }
        Console.WriteLine(lines.Count);

        var collection = await CreateCollectionFromListAsync<Guid, DataModel>(vectorStore,
            collectionName, lines, embeddingService, CreateRecord);
        string filePath = "D:\\ExampleCollection1.json";
        var options = new JsonSerializerOptions
        {
            Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping, // 允许非 ASCII 字符(如中文)
            WriteIndented = true // 可选:格式化输出(换行和缩进)
        };
        using (FileStream fileStream = new(filePath, FileMode.OpenOrCreate))
        {
            await vectorStore.SerializeCollectionAsJsonAsync<Guid, DataModel>(collectionName, fileStream, options);
        }

        using (FileStream fileStream = new(filePath, FileMode.Open))
        {
            var vectorSearch = await vectorStore.DeserializeCollectionFromJsonAsync<Guid, DataModel>(fileStream);
            var searchString = "How many records are there?";
            var searchVector = (await embeddingService.GenerateAsync(searchString)).Vector;
            var result = vectorSearch.SearchAsync(searchVector, top: 1);
            var resultRecords = new List<VectorSearchResult<DataModel>>();

            await foreach (var item in result.WithCancellation(default).ConfigureAwait(false))
            {
                resultRecords.Add(item);
            }
            Console.WriteLine("Search string: " + searchString);
            Console.WriteLine("Result: " + resultRecords.First().Record.Text);
            Console.WriteLine();
        }
    }
}

class DataModel
{
    [VectorStoreKey]
    public Guid Id { get; init; }

    [VectorStoreData]
    public string Text { get; init; }

    [VectorStoreVector(1536)]
    public ReadOnlyMemory<float> Embedding { get; init; }
}

Metadata

Metadata

Assignees

Labels

.NETIssue or Pull requests regarding .NET codebugSomething isn't workingmsft.ext.vectordataRelated to Microsoft.Extensions.VectorData

Type

Projects

Status

Sprint: In Review

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions