Skip to content

Commit db5791f

Browse files
committed
ensuring embeddings are removed and coutns are updated on delets
1 parent 6c37501 commit db5791f

File tree

3 files changed

+218
-5
lines changed

3 files changed

+218
-5
lines changed

pkg/nornicdb/db.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -951,15 +951,20 @@ func Open(dataDir string, config *Config) (*DB, error) {
951951

952952
// Wire up storage event callbacks to keep search indexes synchronized
953953
// Storage is the single source of truth - it notifies when changes happen
954-
// This works whether storage is BadgerEngine directly or wrapped in AsyncEngine
954+
// The storage chain can be: AsyncEngine -> WALEngine -> BadgerEngine
955955
var underlyingEngine storage.Engine = db.storage
956956

957-
// If storage is wrapped in AsyncEngine, get the underlying BadgerEngine
958-
if asyncEngine, ok := db.storage.(*storage.AsyncEngine); ok {
959-
underlyingEngine = asyncEngine.GetUnderlying()
957+
// Unwrap AsyncEngine if present
958+
if asyncEngine, ok := underlyingEngine.(*storage.AsyncEngine); ok {
959+
underlyingEngine = asyncEngine.GetEngine()
960+
}
961+
962+
// Unwrap WALEngine if present
963+
if walEngine, ok := underlyingEngine.(*storage.WALEngine); ok {
964+
underlyingEngine = walEngine.GetEngine()
960965
}
961966

962-
// Set callbacks on the actual storage engine (where operations actually happen)
967+
// Set callbacks on the actual storage engine (BadgerEngine which implements StorageEventNotifier)
963968
if notifier, ok := underlyingEngine.(storage.StorageEventNotifier); ok {
964969
// When a node is created, automatically index it for search
965970
notifier.OnNodeCreated(func(node *storage.Node) {

pkg/nornicdb/db_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2539,6 +2539,94 @@ func TestDeleteNode_RemovesFromSearchIndex(t *testing.T) {
25392539
assert.Error(t, err)
25402540
assert.ErrorIs(t, err, ErrClosed)
25412541
})
2542+
2543+
t.Run("delete_decrements_embedding_count_via_callback", func(t *testing.T) {
2544+
// This test verifies the full callback chain:
2545+
// Storage.DeleteNode -> notifyNodeDeleted callback -> SearchService.RemoveNode
2546+
// This ensures embeddings are cleaned up automatically when nodes are deleted.
2547+
2548+
db, err := Open(t.TempDir(), nil)
2549+
require.NoError(t, err)
2550+
defer db.Close()
2551+
2552+
// Create nodes with embeddings directly via storage layer
2553+
// We use the storage layer directly to have precise control over the Embedding field
2554+
nodes := []*storage.Node{
2555+
{
2556+
ID: "embed-callback-test-1",
2557+
Labels: []string{"TestNode"},
2558+
Properties: map[string]any{"name": "Test Node 1"},
2559+
Embedding: make([]float32, 1024), // Match default dimension
2560+
},
2561+
{
2562+
ID: "embed-callback-test-2",
2563+
Labels: []string{"TestNode"},
2564+
Properties: map[string]any{"name": "Test Node 2"},
2565+
Embedding: make([]float32, 1024),
2566+
},
2567+
{
2568+
ID: "embed-callback-test-3",
2569+
Labels: []string{"TestNode"},
2570+
Properties: map[string]any{"name": "Test Node 3"},
2571+
Embedding: make([]float32, 1024),
2572+
},
2573+
}
2574+
2575+
// Set distinct embedding values
2576+
nodes[0].Embedding[0] = 1.0
2577+
nodes[1].Embedding[1] = 1.0
2578+
nodes[2].Embedding[2] = 1.0
2579+
2580+
// Get initial embedding count
2581+
initialCount := db.EmbeddingCount()
2582+
2583+
// Create nodes via storage (this triggers OnNodeCreated callback)
2584+
for _, node := range nodes {
2585+
err := db.storage.CreateNode(node)
2586+
require.NoError(t, err)
2587+
}
2588+
2589+
// Wait for async callbacks to fire
2590+
time.Sleep(100 * time.Millisecond)
2591+
2592+
// Verify embedding count increased by 3
2593+
afterCreate := db.EmbeddingCount()
2594+
assert.Equal(t, initialCount+3, afterCreate,
2595+
"Embedding count should increase by 3 after creating nodes (was %d, now %d)",
2596+
initialCount, afterCreate)
2597+
2598+
// Delete node1 via storage (this triggers OnNodeDeleted callback)
2599+
err = db.storage.DeleteNode("embed-callback-test-1")
2600+
require.NoError(t, err)
2601+
2602+
// Wait for async callback
2603+
time.Sleep(100 * time.Millisecond)
2604+
2605+
afterDelete1 := db.EmbeddingCount()
2606+
assert.Equal(t, initialCount+2, afterDelete1,
2607+
"Embedding count should decrease by 1 after deleting node1 (expected %d, got %d)",
2608+
initialCount+2, afterDelete1)
2609+
2610+
// Delete node2
2611+
err = db.storage.DeleteNode("embed-callback-test-2")
2612+
require.NoError(t, err)
2613+
time.Sleep(100 * time.Millisecond)
2614+
2615+
afterDelete2 := db.EmbeddingCount()
2616+
assert.Equal(t, initialCount+1, afterDelete2,
2617+
"Embedding count should decrease by 2 after deleting node2 (expected %d, got %d)",
2618+
initialCount+1, afterDelete2)
2619+
2620+
// Delete node3
2621+
err = db.storage.DeleteNode("embed-callback-test-3")
2622+
require.NoError(t, err)
2623+
time.Sleep(100 * time.Millisecond)
2624+
2625+
afterDelete3 := db.EmbeddingCount()
2626+
assert.Equal(t, initialCount, afterDelete3,
2627+
"Embedding count should return to initial after deleting all test nodes (expected %d, got %d)",
2628+
initialCount, afterDelete3)
2629+
})
25422630
}
25432631

25442632
func TestDeleteUserData_RemovesFromSearchIndex(t *testing.T) {

pkg/search/search_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,126 @@ func TestSearchService_RemoveNode(t *testing.T) {
685685
}
686686
}
687687

688+
// TestSearchService_RemoveNode_DecrementsEmbeddingCount verifies that removing a node
689+
// decrements the embedding count in stats. This is critical for ensuring that when nodes
690+
// are deleted via Cypher, the embedding count is updated correctly without requiring
691+
// a manual "regenerate" operation.
692+
func TestSearchService_RemoveNode_DecrementsEmbeddingCount(t *testing.T) {
693+
engine := storage.NewMemoryEngine()
694+
defer engine.Close()
695+
696+
svc := NewServiceWithDimensions(engine, 4) // 4-dimensional embeddings for test
697+
698+
// Initial state: no embeddings
699+
assert.Equal(t, 0, svc.EmbeddingCount(), "Should start with 0 embeddings")
700+
701+
// Create and index 3 nodes with embeddings
702+
// Note: Node.Embedding is the struct field used by IndexNode, not Properties["embedding"]
703+
nodes := []*storage.Node{
704+
{
705+
ID: "node1",
706+
Labels: []string{"Person"},
707+
Properties: map[string]any{"name": "Alice"},
708+
Embedding: []float32{1, 0, 0, 0},
709+
},
710+
{
711+
ID: "node2",
712+
Labels: []string{"Person"},
713+
Properties: map[string]any{"name": "Bob"},
714+
Embedding: []float32{0, 1, 0, 0},
715+
},
716+
{
717+
ID: "node3",
718+
Labels: []string{"Person"},
719+
Properties: map[string]any{"name": "Charlie"},
720+
Embedding: []float32{0, 0, 1, 0},
721+
},
722+
}
723+
724+
for _, node := range nodes {
725+
require.NoError(t, engine.CreateNode(node))
726+
require.NoError(t, svc.IndexNode(node))
727+
}
728+
729+
// Verify all 3 nodes are indexed
730+
assert.Equal(t, 3, svc.EmbeddingCount(), "Should have 3 embeddings after indexing")
731+
732+
// Remove node1
733+
require.NoError(t, svc.RemoveNode("node1"))
734+
assert.Equal(t, 2, svc.EmbeddingCount(), "Should have 2 embeddings after removing node1")
735+
736+
// Remove node2
737+
require.NoError(t, svc.RemoveNode("node2"))
738+
assert.Equal(t, 1, svc.EmbeddingCount(), "Should have 1 embedding after removing node2")
739+
740+
// Remove node3
741+
require.NoError(t, svc.RemoveNode("node3"))
742+
assert.Equal(t, 0, svc.EmbeddingCount(), "Should have 0 embeddings after removing all nodes")
743+
744+
// Removing non-existent node should not affect count
745+
require.NoError(t, svc.RemoveNode("non-existent"))
746+
assert.Equal(t, 0, svc.EmbeddingCount(), "Count should remain 0 after removing non-existent node")
747+
}
748+
749+
// TestSearchService_RemoveNode_OnlyRemovesTargetNode ensures RemoveNode is precise
750+
// and doesn't affect other nodes' embeddings.
751+
func TestSearchService_RemoveNode_OnlyRemovesTargetNode(t *testing.T) {
752+
engine := storage.NewMemoryEngine()
753+
defer engine.Close()
754+
755+
svc := NewServiceWithDimensions(engine, 4) // 4-dimensional embeddings for test
756+
757+
// Create distinct embeddings for easy search verification
758+
// Note: Node.Embedding is the struct field used by IndexNode
759+
node1 := &storage.Node{
760+
ID: "target-to-remove",
761+
Labels: []string{"Document"},
762+
Properties: map[string]any{"content": "unique alpha content"},
763+
Embedding: []float32{1, 0, 0, 0},
764+
}
765+
node2 := &storage.Node{
766+
ID: "should-remain-1",
767+
Labels: []string{"Document"},
768+
Properties: map[string]any{"content": "unique beta content"},
769+
Embedding: []float32{0, 1, 0, 0},
770+
}
771+
node3 := &storage.Node{
772+
ID: "should-remain-2",
773+
Labels: []string{"Document"},
774+
Properties: map[string]any{"content": "unique gamma content"},
775+
Embedding: []float32{0, 0, 1, 0},
776+
}
777+
778+
for _, node := range []*storage.Node{node1, node2, node3} {
779+
require.NoError(t, engine.CreateNode(node))
780+
require.NoError(t, svc.IndexNode(node))
781+
}
782+
783+
assert.Equal(t, 3, svc.EmbeddingCount())
784+
785+
// Remove only the target node
786+
require.NoError(t, svc.RemoveNode("target-to-remove"))
787+
788+
// Verify count decreased
789+
assert.Equal(t, 2, svc.EmbeddingCount())
790+
791+
// Verify remaining nodes are still searchable
792+
opts := DefaultSearchOptions()
793+
794+
// Search for remaining nodes by their unique content
795+
response, err := svc.Search(context.Background(), "beta", nil, opts)
796+
require.NoError(t, err)
797+
found := false
798+
for _, r := range response.Results {
799+
if r.ID == "should-remain-1" {
800+
found = true
801+
}
802+
// The removed node should NOT appear
803+
assert.NotEqual(t, "target-to-remove", r.ID, "Removed node should not appear in results")
804+
}
805+
assert.True(t, found, "Remaining node 'should-remain-1' should be searchable")
806+
}
807+
688808
// TestSearchService_HybridSearch tests the hybrid RRF search.
689809
func TestSearchService_HybridSearch(t *testing.T) {
690810
engine := storage.NewMemoryEngine()

0 commit comments

Comments
 (0)