Skip to content

Commit 3c4a037

Browse files
committed
feat(kmeans): switch from trigger-based to timer-based clustering
- Replace trigger-on-completion with scheduled clustering (default 15min) - Add KmeansClusterInterval config (ENV: NORNICDB_KMEANS_CLUSTER_INTERVAL) - Skip clustering cycles if no new embeddings since last run - Run immediately on startup, then at regular intervals - Prevents constant re-clustering during active embedding generation - Configurable via config or environment variable (0 = manual only) Fixes performance issue where k-means would trigger after every 1-2 embeddings, causing ~1min delays. Now clusters efficiently on schedule only when changes exist. Changes: - pkg/config/config.go: Add KmeansClusterInterval field - pkg/nornicdb/db.go: Replace SetOnQueueEmpty with timer-based approach - pkg/cypher/call.go: Add ORDER BY/LIMIT/SKIP support for YIELD - pkg/cypher/neo4j_compat_test.go: Add node_search/default index tests - pkg/cypher/yield_return_test.go: Comprehensive YIELD/RETURN tests
1 parent 7ad8a99 commit 3c4a037

File tree

5 files changed

+868
-27
lines changed

5 files changed

+868
-27
lines changed

pkg/config/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ type MemoryConfig struct {
267267
// KmeansMinEmbeddings is minimum embeddings required for k-means clustering
268268
// Env: NORNICDB_KMEANS_MIN_EMBEDDINGS (default: 1000)
269269
KmeansMinEmbeddings int
270+
// KmeansClusterInterval is how often to run k-means clustering (0 = disabled)
271+
// Env: NORNICDB_KMEANS_CLUSTER_INTERVAL (default: 5m)
272+
KmeansClusterInterval time.Duration
270273

271274
// === Runtime Memory Management (Go runtime tuning) ===
272275

@@ -1204,6 +1207,7 @@ func LoadDefaults() *Config {
12041207
config.Memory.AutoLinksEnabled = true
12051208
config.Memory.AutoLinksSimilarityThreshold = 0.82
12061209
config.Memory.KmeansMinEmbeddings = 100
1210+
config.Memory.KmeansClusterInterval = 15 * time.Minute // timer-based clustering every 15 min (skips if no changes)
12071211
config.Memory.RuntimeLimitStr = "0"
12081212
config.Memory.RuntimeLimit = 0
12091213
config.Memory.GCPercent = 100
@@ -1464,6 +1468,9 @@ func applyEnvVars(config *Config) {
14641468
if v := getEnvInt("NORNICDB_KMEANS_MIN_EMBEDDINGS", 0); v > 0 {
14651469
config.Memory.KmeansMinEmbeddings = v
14661470
}
1471+
if v := getEnvDuration("NORNICDB_KMEANS_CLUSTER_INTERVAL", 0); v > 0 {
1472+
config.Memory.KmeansClusterInterval = v
1473+
}
14671474
if getEnv("NORNICDB_AUTO_LINKS_ENABLED", "") == "false" {
14681475
config.Memory.AutoLinksEnabled = false
14691476
}

0 commit comments

Comments
 (0)