Skip to content

Commit 9c6ba4f

Browse files
authored
Add vector search index feature and end-to-end logging (#1113)
# Summary - Implement SQLite-based vector search index with math utilities and match models, plus factory wiring for embedding providers. - Centralize constants into src/Core/Constants.cs and update Core/Main code to consume the new structure. - Add `doctor` command and embedding generator factory integration. - Expand test suite: new vector index unit/integration tests, updated core/main tests, and **new e2e harness/tests** with per-test C# log files. - Update format.sh and add e2e-tests.sh runner; e2e tests default to Release build and use KM_BIN. # Details - New vector search components: IVectorIndex, SqliteVectorIndex, VectorMath, VectorMatch, plus tests covering persistence, error handling, and math. - Constants consolidation removed module-specific constant files and unified them in Constants.cs (search, embeddings, logging, config/app). - CLI/Services: added DoctorCommand, EmbeddingGeneratorFactory, enhanced SearchIndexFactory; CLI builder now emits bootstrap logs and respects --log-file. - E2E: added framework helpers (cli, db, logging), five e2e scenarios with per-test log files; runner script e2e-tests.sh builds if needed and sets KM_BIN.
1 parent 1bf7b42 commit 9c6ba4f

File tree

120 files changed

+6953
-855
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+6953
-855
lines changed

.github/workflows/coverage.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ jobs:
2525
chmod +x ./coverage.sh
2626
./coverage.sh 80
2727
env:
28+
OLLAMA_AVAILABLE: "false"
2829
MIN_COVERAGE: 80
2930

3031
- name: Upload coverage report

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ archived/
66
node_modules/
77
obj/
88
bin/
9+
__pycache__/
910
_dev/
1011
.dev/
1112
.vs/
@@ -68,4 +69,4 @@ publish/
6869
*.crt
6970
*.key
7071
*.pem
71-
certs/
72+
certs/

docs

Submodule docs updated from 23845bf to a0321cf

e2e-tests.sh

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
6+
cd "$ROOT"
7+
8+
echo "======================================="
9+
echo " Running E2E Tests"
10+
echo "======================================="
11+
echo ""
12+
13+
# Choose build configuration (default Release to align with build.sh)
14+
CONFIGURATION="${CONFIGURATION:-Release}"
15+
KM_BIN="$ROOT/src/Main/bin/$CONFIGURATION/net10.0/KernelMemory.Main.dll"
16+
17+
# Ensure km binary is built at the selected configuration
18+
if [ ! -f "$KM_BIN" ]; then
19+
echo "km binary not found at $KM_BIN. Building ($CONFIGURATION)..."
20+
dotnet build src/Main/Main.csproj -c "$CONFIGURATION"
21+
fi
22+
23+
if [ ! -f "$KM_BIN" ]; then
24+
echo "❌ km binary still not found at $KM_BIN after build. Set KM_BIN to a valid path."
25+
exit 1
26+
fi
27+
28+
export KM_BIN
29+
30+
FAILED=0
31+
PASSED=0
32+
33+
# Run each test file
34+
for test_file in tests/e2e/test_*.py; do
35+
if [ -f "$test_file" ]; then
36+
echo ""
37+
echo "Running: $(basename "$test_file")"
38+
echo "---------------------------------------"
39+
40+
if python3 "$test_file"; then
41+
PASSED=$((PASSED + 1))
42+
else
43+
FAILED=$((FAILED + 1))
44+
fi
45+
fi
46+
done
47+
48+
echo ""
49+
echo "======================================="
50+
echo " E2E Test Results"
51+
echo "======================================="
52+
echo "Passed: $PASSED"
53+
echo "Failed: $FAILED"
54+
echo "======================================="
55+
56+
if [ $FAILED -gt 0 ]; then
57+
exit 1
58+
fi
59+
60+
exit 0

src/Core/Config/AppConfig.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ public static AppConfig CreateDefault()
8383

8484
/// <summary>
8585
/// Creates a default configuration with a single "personal" node
86-
/// using local SQLite storage in the specified base directory
86+
/// using local SQLite storage in the specified base directory.
87+
/// Includes embeddings cache for efficient vector search operations.
8788
/// </summary>
8889
/// <param name="baseDir">Base directory for data storage</param>
8990
public static AppConfig CreateDefault(string baseDir)
@@ -95,8 +96,10 @@ public static AppConfig CreateDefault(string baseDir)
9596
Nodes = new Dictionary<string, NodeConfig>
9697
{
9798
["personal"] = NodeConfig.CreateDefaultPersonalNode(personalNodeDir)
98-
}
99-
// EmbeddingsCache and LLMCache intentionally omitted - add when features are implemented
99+
},
100+
EmbeddingsCache = CacheConfig.CreateDefaultSqliteCache(
101+
Path.Combine(baseDir, "embeddings-cache.db"))
102+
// LLMCache intentionally omitted - add when LLM features are implemented
100103
};
101104
}
102105
}

src/Core/Config/ConfigParser.cs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright (c) Microsoft. All rights reserved.
22
using System.Text.Json;
3+
using System.Text.Json.Serialization;
34
using System.Text.Json.Serialization.Metadata;
45
using KernelMemory.Core.Config.Cache;
56
using KernelMemory.Core.Config.ContentIndex;
@@ -28,7 +29,8 @@ public static class ConfigParser
2829
ReadCommentHandling = JsonCommentHandling.Skip,
2930
AllowTrailingCommas = true,
3031
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
31-
TypeInfoResolver = new DefaultJsonTypeInfoResolver()
32+
TypeInfoResolver = new DefaultJsonTypeInfoResolver(),
33+
Converters = { new JsonStringEnumConverter() }
3234
};
3335

3436
/// <summary>
@@ -46,13 +48,14 @@ public static class ConfigParser
4648

4749
/// <summary>
4850
/// Loads configuration from a file, or creates default config if file doesn't exist.
49-
/// The config file is always ensured to exist on disk after loading.
51+
/// Optionally ensures the config file exists on disk after loading (for write operations).
5052
/// Performs tilde expansion on paths (~/ → home directory)
5153
/// </summary>
5254
/// <param name="filePath">Path to configuration file</param>
55+
/// <param name="ensureFileExists">If true, writes config to disk if missing (default: true for backward compatibility)</param>
5356
/// <returns>Validated AppConfig instance</returns>
5457
/// <exception cref="ConfigException">Thrown when file exists but parsing or validation fails</exception>
55-
public static AppConfig LoadFromFile(string filePath)
58+
public static AppConfig LoadFromFile(string filePath, bool ensureFileExists = true)
5659
{
5760
AppConfig config;
5861

@@ -65,8 +68,11 @@ public static AppConfig LoadFromFile(string filePath)
6568
// Create default config relative to config file location
6669
config = AppConfig.CreateDefault(baseDir);
6770

68-
// Write the config file
69-
WriteConfigFile(filePath, config);
71+
// Write the config file only if requested
72+
if (ensureFileExists)
73+
{
74+
WriteConfigFile(filePath, config);
75+
}
7076

7177
return config;
7278
}
@@ -82,8 +88,11 @@ public static AppConfig LoadFromFile(string filePath)
8288
// Expand tilde paths
8389
ExpandTildePaths(config);
8490

85-
// Always ensure the config file exists (recreate if deleted between load and save)
86-
WriteConfigFileIfMissing(filePath, config);
91+
// Optionally ensure the config file exists (recreate if deleted between load and save)
92+
if (ensureFileExists)
93+
{
94+
WriteConfigFileIfMissing(filePath, config);
95+
}
8796

8897
return config;
8998
}

src/Core/Config/Embeddings/HuggingFaceEmbeddingsConfig.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using System.Text.Json.Serialization;
33
using KernelMemory.Core.Config.Enums;
44
using KernelMemory.Core.Config.Validation;
5-
using KernelMemory.Core.Embeddings;
65

76
namespace KernelMemory.Core.Config.Embeddings;
87

@@ -20,7 +19,7 @@ public sealed class HuggingFaceEmbeddingsConfig : EmbeddingsConfig
2019
/// HuggingFace model name (e.g., "sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-base-en-v1.5").
2120
/// </summary>
2221
[JsonPropertyName("model")]
23-
public string Model { get; set; } = EmbeddingConstants.DefaultHuggingFaceModel;
22+
public string Model { get; set; } = Constants.EmbeddingDefaults.DefaultHuggingFaceModel;
2423

2524
/// <summary>
2625
/// HuggingFace API key (token).
@@ -35,7 +34,7 @@ public sealed class HuggingFaceEmbeddingsConfig : EmbeddingsConfig
3534
/// Can be changed for custom inference endpoints.
3635
/// </summary>
3736
[JsonPropertyName("baseUrl")]
38-
public string BaseUrl { get; set; } = EmbeddingConstants.DefaultHuggingFaceBaseUrl;
37+
public string BaseUrl { get; set; } = Constants.EmbeddingDefaults.DefaultHuggingFaceBaseUrl;
3938

4039
/// <inheritdoc />
4140
public override void Validate(string path)

src/Core/Config/NodeConfig.cs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright (c) Microsoft. All rights reserved.
22
using System.Text.Json.Serialization;
33
using KernelMemory.Core.Config.ContentIndex;
4+
using KernelMemory.Core.Config.Embeddings;
45
using KernelMemory.Core.Config.Enums;
56
using KernelMemory.Core.Config.SearchIndex;
67
using KernelMemory.Core.Config.Storage;
@@ -106,7 +107,8 @@ public void Validate(string path)
106107
}
107108

108109
/// <summary>
109-
/// Creates a default "personal" node configuration
110+
/// Creates a default "personal" node configuration with FTS and vector search.
111+
/// Uses Ollama with qwen3-embedding model (1024 dimensions) for local, offline-capable vector search.
110112
/// </summary>
111113
/// <param name="nodeDir"></param>
112114
internal static NodeConfig CreateDefaultPersonalNode(string nodeDir)
@@ -128,7 +130,21 @@ internal static NodeConfig CreateDefaultPersonalNode(string nodeDir)
128130
Id = "sqlite-fts",
129131
Type = SearchIndexTypes.SqliteFTS,
130132
Path = Path.Combine(nodeDir, "fts.db"),
131-
EnableStemming = true
133+
EnableStemming = true,
134+
Required = true
135+
},
136+
new VectorSearchIndexConfig
137+
{
138+
Id = "sqlite-vector",
139+
Type = SearchIndexTypes.SqliteVector,
140+
Path = Path.Combine(nodeDir, "vector.db"),
141+
Dimensions = 1024,
142+
UseSqliteVec = false,
143+
Embeddings = new OllamaEmbeddingsConfig
144+
{
145+
Model = Constants.EmbeddingDefaults.DefaultOllamaModel,
146+
BaseUrl = Constants.EmbeddingDefaults.DefaultOllamaBaseUrl
147+
}
132148
}
133149
}
134150
};

src/Core/Config/SearchConfig.cs

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Copyright (c) Microsoft. All rights reserved.
22
using System.Text.Json.Serialization;
33
using KernelMemory.Core.Config.Validation;
4-
using KernelMemory.Core.Search;
54

65
namespace KernelMemory.Core.Config;
76

@@ -17,22 +16,22 @@ public sealed class SearchConfig : IValidatable
1716
/// Default: 0.3 (moderate threshold).
1817
/// </summary>
1918
[JsonPropertyName("defaultMinRelevance")]
20-
public float DefaultMinRelevance { get; set; } = SearchConstants.DefaultMinRelevance;
19+
public float DefaultMinRelevance { get; set; } = Constants.SearchDefaults.DefaultMinRelevance;
2120

2221
/// <summary>
2322
/// Default maximum number of results to return per search.
2423
/// Default: 20 results.
2524
/// </summary>
2625
[JsonPropertyName("defaultLimit")]
27-
public int DefaultLimit { get; set; } = SearchConstants.DefaultLimit;
26+
public int DefaultLimit { get; set; } = Constants.SearchDefaults.DefaultLimit;
2827

2928
/// <summary>
3029
/// Search timeout in seconds per node.
3130
/// If a node takes longer than this, it times out and is excluded from results.
3231
/// Default: 30 seconds.
3332
/// </summary>
3433
[JsonPropertyName("searchTimeoutSeconds")]
35-
public int SearchTimeoutSeconds { get; set; } = SearchConstants.DefaultSearchTimeoutSeconds;
34+
public int SearchTimeoutSeconds { get; set; } = Constants.SearchDefaults.DefaultSearchTimeoutSeconds;
3635

3736
/// <summary>
3837
/// Default maximum results to retrieve from each node (memory safety).
@@ -41,7 +40,7 @@ public sealed class SearchConfig : IValidatable
4140
/// Default: 1000 results per node.
4241
/// </summary>
4342
[JsonPropertyName("maxResultsPerNode")]
44-
public int MaxResultsPerNode { get; set; } = SearchConstants.DefaultMaxResultsPerNode;
43+
public int MaxResultsPerNode { get; set; } = Constants.SearchDefaults.DefaultMaxResultsPerNode;
4544

4645
/// <summary>
4746
/// Default nodes to search when no explicit --nodes flag is provided.
@@ -50,7 +49,7 @@ public sealed class SearchConfig : IValidatable
5049
/// </summary>
5150
[JsonPropertyName("defaultNodes")]
5251
[System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1819:Properties should not return arrays")]
53-
public string[] DefaultNodes { get; set; } = [SearchConstants.AllNodesWildcard];
52+
public string[] DefaultNodes { get; set; } = [Constants.SearchDefaults.AllNodesWildcard];
5453

5554
/// <summary>
5655
/// Nodes to exclude from search by default.
@@ -67,66 +66,66 @@ public sealed class SearchConfig : IValidatable
6766
/// Default: 10 levels.
6867
/// </summary>
6968
[JsonPropertyName("maxQueryDepth")]
70-
public int MaxQueryDepth { get; set; } = SearchConstants.MaxQueryDepth;
69+
public int MaxQueryDepth { get; set; } = Constants.SearchDefaults.MaxQueryDepth;
7170

7271
/// <summary>
7372
/// Maximum number of boolean operators (AND/OR/NOT) in a single query.
7473
/// Prevents query complexity attacks.
7574
/// Default: 50 operators.
7675
/// </summary>
7776
[JsonPropertyName("maxBooleanOperators")]
78-
public int MaxBooleanOperators { get; set; } = SearchConstants.MaxBooleanOperators;
77+
public int MaxBooleanOperators { get; set; } = Constants.SearchDefaults.MaxBooleanOperators;
7978

8079
/// <summary>
8180
/// Maximum length of a field value in query (characters).
8281
/// Prevents oversized query values.
8382
/// Default: 1000 characters.
8483
/// </summary>
8584
[JsonPropertyName("maxFieldValueLength")]
86-
public int MaxFieldValueLength { get; set; } = SearchConstants.MaxFieldValueLength;
85+
public int MaxFieldValueLength { get; set; } = Constants.SearchDefaults.MaxFieldValueLength;
8786

8887
/// <summary>
8988
/// Maximum time allowed for query parsing (milliseconds).
9089
/// Prevents regex catastrophic backtracking.
9190
/// Default: 1000ms (1 second).
9291
/// </summary>
9392
[JsonPropertyName("queryParseTimeoutMs")]
94-
public int QueryParseTimeoutMs { get; set; } = SearchConstants.QueryParseTimeoutMs;
93+
public int QueryParseTimeoutMs { get; set; } = Constants.SearchDefaults.QueryParseTimeoutMs;
9594

9695
/// <summary>
9796
/// Default snippet length in characters when --snippet flag is used.
9897
/// Default: 200 characters.
9998
/// </summary>
10099
[JsonPropertyName("snippetLength")]
101-
public int SnippetLength { get; set; } = SearchConstants.DefaultSnippetLength;
100+
public int SnippetLength { get; set; } = Constants.SearchDefaults.DefaultSnippetLength;
102101

103102
/// <summary>
104103
/// Default maximum number of snippets per result when --snippet flag is used.
105104
/// Default: 1 snippet.
106105
/// </summary>
107106
[JsonPropertyName("maxSnippetsPerResult")]
108-
public int MaxSnippetsPerResult { get; set; } = SearchConstants.DefaultMaxSnippetsPerResult;
107+
public int MaxSnippetsPerResult { get; set; } = Constants.SearchDefaults.DefaultMaxSnippetsPerResult;
109108

110109
/// <summary>
111110
/// Separator string between multiple snippets.
112111
/// Default: "..." (ellipsis).
113112
/// </summary>
114113
[JsonPropertyName("snippetSeparator")]
115-
public string SnippetSeparator { get; set; } = SearchConstants.DefaultSnippetSeparator;
114+
public string SnippetSeparator { get; set; } = Constants.SearchDefaults.DefaultSnippetSeparator;
116115

117116
/// <summary>
118117
/// Prefix marker for highlighting matched terms.
119118
/// Default: "&lt;mark&gt;" (HTML-style).
120119
/// </summary>
121120
[JsonPropertyName("highlightPrefix")]
122-
public string HighlightPrefix { get; set; } = SearchConstants.DefaultHighlightPrefix;
121+
public string HighlightPrefix { get; set; } = Constants.SearchDefaults.DefaultHighlightPrefix;
123122

124123
/// <summary>
125124
/// Suffix marker for highlighting matched terms.
126125
/// Default: "&lt;/mark&gt;" (HTML-style).
127126
/// </summary>
128127
[JsonPropertyName("highlightSuffix")]
129-
public string HighlightSuffix { get; set; } = SearchConstants.DefaultHighlightSuffix;
128+
public string HighlightSuffix { get; set; } = Constants.SearchDefaults.DefaultHighlightSuffix;
130129

131130
/// <summary>
132131
/// Validates the search configuration.
@@ -135,10 +134,10 @@ public sealed class SearchConfig : IValidatable
135134
public void Validate(string path)
136135
{
137136
// Validate min relevance score
138-
if (this.DefaultMinRelevance < SearchConstants.MinRelevanceScore || this.DefaultMinRelevance > SearchConstants.MaxRelevanceScore)
137+
if (this.DefaultMinRelevance < Constants.SearchDefaults.MinRelevanceScore || this.DefaultMinRelevance > Constants.SearchDefaults.MaxRelevanceScore)
139138
{
140139
throw new ConfigException($"{path}.DefaultMinRelevance",
141-
$"Must be between {SearchConstants.MinRelevanceScore} and {SearchConstants.MaxRelevanceScore}");
140+
$"Must be between {Constants.SearchDefaults.MinRelevanceScore} and {Constants.SearchDefaults.MaxRelevanceScore}");
142141
}
143142

144143
// Validate default limit
@@ -167,7 +166,7 @@ public void Validate(string path)
167166
}
168167

169168
// Validate no contradictory node configuration
170-
if (this.DefaultNodes.Length == 1 && this.DefaultNodes[0] == SearchConstants.AllNodesWildcard)
169+
if (this.DefaultNodes.Length == 1 && this.DefaultNodes[0] == Constants.SearchDefaults.AllNodesWildcard)
171170
{
172171
// Using wildcard - excludeNodes is OK
173172
}

0 commit comments

Comments
 (0)