Skip to content

Commit 4cf6fa6

Browse files
committed
fixing counting and logging when it happens if it happens again, delete files from database when removing from indexing
1 parent 8427ae4 commit 4cf6fa6

File tree

5 files changed

+210
-31
lines changed

5 files changed

+210
-31
lines changed

macos/MenuBarApp/FileIndexerWindow.swift

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -441,16 +441,23 @@ class FileWatchManager: ObservableObject {
441441
// Stop watching
442442
stopWatching(folder.path)
443443

444+
// Delete all nodes from NornicDB in background
445+
Task {
446+
do {
447+
try await deleteNodesForFolder(folder.path)
448+
} catch {
449+
print("⚠️ Failed to delete nodes for folder \(folder.path): \(error.localizedDescription)")
450+
// Continue with removal even if DB cleanup fails
451+
}
452+
}
453+
444454
// Remove from list
445455
DispatchQueue.main.async {
446456
self.watchedFolders.removeAll { $0.id == folder.id }
447457
self.progressMap.removeValue(forKey: folder.path)
448458
self.saveFolders()
449459
self.updateStats()
450460
}
451-
452-
// TODO: Remove nodes from NornicDB
453-
// await deleteNodesForFolder(folder.path)
454461
}
455462

456463
func refreshFolder(_ folder: IndexedFolder) {
@@ -764,21 +771,62 @@ class FileWatchManager: ObservableObject {
764771
}
765772
}
766773

767-
/// Delete all file nodes for a folder
774+
/// Delete all file nodes and their chunks for a folder
768775
func deleteNodesForFolder(_ folderPath: String) async throws {
769-
let query = """
776+
// First, get count of nodes to be deleted for logging
777+
let countQuery = """
778+
MATCH (f:File {folder_root: $folder_root})
779+
OPTIONAL MATCH (f)-[:HAS_CHUNK]->(c)
780+
RETURN count(DISTINCT f) as fileCount, count(DISTINCT c) as chunkCount
781+
"""
782+
783+
let countBody: [String: Any] = [
784+
"statements": [
785+
["statement": countQuery, "parameters": ["folder_root": folderPath]]
786+
]
787+
]
788+
789+
let countData = try JSONSerialization.data(withJSONObject: countBody)
790+
let (countResponseData, _) = try await makeAuthenticatedRequest(to: "/db/neo4j/tx/commit", method: "POST", body: countData)
791+
792+
var fileCount = 0
793+
var chunkCount = 0
794+
if let json = try? JSONSerialization.jsonObject(with: countResponseData) as? [String: Any],
795+
let results = json["results"] as? [[String: Any]],
796+
let data = results.first?["data"] as? [[String: Any]],
797+
let row = data.first?["row"] as? [Int] {
798+
fileCount = row[0]
799+
chunkCount = row[1]
800+
}
801+
802+
print("🗑️ Deleting \(fileCount) File nodes and \(chunkCount) FileChunk nodes for folder: \(folderPath)")
803+
804+
// Delete all File nodes and their associated FileChunk nodes
805+
// DETACH DELETE on File removes HAS_CHUNK relationships
806+
// Then explicitly delete orphaned FileChunk nodes
807+
let deleteQuery = """
770808
MATCH (f:File {folder_root: $folder_root})
809+
OPTIONAL MATCH (f)-[:HAS_CHUNK]->(c)
810+
WITH f, collect(c) as chunks
771811
DETACH DELETE f
812+
FOREACH (chunk IN chunks | DETACH DELETE chunk)
772813
"""
773814

774-
let body: [String: Any] = [
815+
let deleteBody: [String: Any] = [
775816
"statements": [
776-
["statement": query, "parameters": ["folder_root": folderPath]]
817+
["statement": deleteQuery, "parameters": ["folder_root": folderPath]]
777818
]
778819
]
779820

780-
let bodyData = try JSONSerialization.data(withJSONObject: body)
781-
let _ = try await makeAuthenticatedRequest(to: "/db/neo4j/tx/commit", method: "POST", body: bodyData)
821+
let deleteData = try JSONSerialization.data(withJSONObject: deleteBody)
822+
let (_, response) = try await makeAuthenticatedRequest(to: "/db/neo4j/tx/commit", method: "POST", body: deleteData)
823+
824+
if response.statusCode == 200 {
825+
print("✅ Successfully deleted all nodes for folder: \(folderPath)")
826+
} else {
827+
throw NSError(domain: "FileWatchManager", code: response.statusCode,
828+
userInfo: [NSLocalizedDescriptionKey: "Failed to delete nodes from NornicDB"])
829+
}
782830
}
783831

784832
/// Perform vector search

pkg/cypher/executor.go

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,21 +1395,24 @@ func (e *StorageExecutor) executeDelete(ctx context.Context, cypher string) (*Ex
13951395
}
13961396

13971397
if detach {
1398-
// Delete all connected edges first
1399-
edges, _ := e.storage.GetOutgoingEdges(storage.NodeID(nodeID))
1400-
for _, edge := range edges {
1401-
e.storage.DeleteEdge(edge.ID)
1402-
result.Stats.RelationshipsDeleted++
1398+
// Count edges that will be deleted with the node (for stats)
1399+
// DeleteNode() automatically deletes connected edges and updates counts internally
1400+
// We just need to count them for the result stats
1401+
outgoingEdges, _ := e.storage.GetOutgoingEdges(storage.NodeID(nodeID))
1402+
incomingEdges, _ := e.storage.GetIncomingEdges(storage.NodeID(nodeID))
1403+
edgesCount := len(outgoingEdges) + len(incomingEdges)
1404+
1405+
// DeleteNode() handles edge deletion internally and updates internal counts
1406+
if err := e.storage.DeleteNode(storage.NodeID(nodeID)); err == nil {
1407+
result.Stats.NodesDeleted++
1408+
result.Stats.RelationshipsDeleted += edgesCount
14031409
}
1404-
edges, _ = e.storage.GetIncomingEdges(storage.NodeID(nodeID))
1405-
for _, edge := range edges {
1406-
e.storage.DeleteEdge(edge.ID)
1407-
result.Stats.RelationshipsDeleted++
1410+
} else {
1411+
// Non-detach delete - just delete the node (will fail if edges exist)
1412+
if err := e.storage.DeleteNode(storage.NodeID(nodeID)); err == nil {
1413+
result.Stats.NodesDeleted++
14081414
}
14091415
}
1410-
if err := e.storage.DeleteNode(storage.NodeID(nodeID)); err == nil {
1411-
result.Stats.NodesDeleted++
1412-
}
14131416
}
14141417
}
14151418

pkg/storage/async_engine.go

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ package storage
1414
import (
1515
"context"
1616
"fmt"
17+
"log"
1718
"strings"
1819
"sync"
1920
"time"
@@ -384,6 +385,11 @@ func (ae *AsyncEngine) DeleteNode(id NodeID) error {
384385
ae.mu.Lock()
385386
defer ae.mu.Unlock()
386387

388+
// Check if already marked for deletion (idempotent)
389+
if ae.deleteNodes[id] {
390+
return nil
391+
}
392+
387393
// Check if node is being flushed right now (in-flight)
388394
isInFlight := ae.inFlightNodes[id]
389395

@@ -408,7 +414,21 @@ func (ae *AsyncEngine) DeleteNode(id NodeID) error {
408414
return nil
409415
}
410416

411-
// Node exists in underlying engine (or will after in-flight flush) - mark for deletion
417+
// If in-flight, it will exist in underlying engine after flush - mark for deletion
418+
if isInFlight {
419+
ae.deleteNodes[id] = true
420+
ae.pendingWrites++
421+
return nil
422+
}
423+
424+
// Check if node actually exists in underlying engine before marking for deletion
425+
// This prevents count going negative for non-existent nodes
426+
if _, err := ae.engine.GetNode(id); err != nil {
427+
// Node doesn't exist anywhere - nothing to delete
428+
return ErrNotFound
429+
}
430+
431+
// Node exists in underlying engine - mark for deletion
412432
ae.deleteNodes[id] = true
413433
ae.pendingWrites++
414434
return nil
@@ -444,6 +464,11 @@ func (ae *AsyncEngine) DeleteEdge(id EdgeID) error {
444464
ae.mu.Lock()
445465
defer ae.mu.Unlock()
446466

467+
// Check if already marked for deletion (idempotent)
468+
if ae.deleteEdges[id] {
469+
return nil
470+
}
471+
447472
// Check if edge is being flushed right now (in-flight)
448473
isInFlight := ae.inFlightEdges[id]
449474

@@ -461,7 +486,21 @@ func (ae *AsyncEngine) DeleteEdge(id EdgeID) error {
461486
return nil
462487
}
463488

464-
// Edge exists in underlying engine (or will after in-flight flush) - mark for deletion
489+
// If in-flight, it will exist in underlying engine after flush - mark for deletion
490+
if isInFlight {
491+
ae.deleteEdges[id] = true
492+
ae.pendingWrites++
493+
return nil
494+
}
495+
496+
// Check if edge actually exists in underlying engine before marking for deletion
497+
// This prevents count going negative for non-existent edges
498+
if _, err := ae.engine.GetEdge(id); err != nil {
499+
// Edge doesn't exist anywhere - nothing to delete
500+
return ErrNotFound
501+
}
502+
503+
// Edge exists in underlying engine - mark for deletion
465504
ae.deleteEdges[id] = true
466505
ae.pendingWrites++
467506
return nil
@@ -886,16 +925,22 @@ func (ae *AsyncEngine) NodeCount() (int64, error) {
886925
}
887926
pendingDeletes := int64(len(ae.deleteNodes))
888927

889-
count, err := ae.engine.NodeCount()
928+
engineCount, err := ae.engine.NodeCount()
890929
ae.mu.RUnlock()
891930

892931
if err != nil {
893932
return 0, err
894933
}
895934

896935
// Adjust for pending creates and deletes
897-
count += pendingCreates
898-
count -= pendingDeletes
936+
count := engineCount + pendingCreates - pendingDeletes
937+
938+
// Clamp to zero if negative (should never happen, log for debugging)
939+
if count < 0 {
940+
log.Printf("⚠️ [COUNT BUG] NodeCount went negative: engineCount=%d pendingCreates=%d pendingDeletes=%d result=%d (clamping to 0)",
941+
engineCount, pendingCreates, pendingDeletes, count)
942+
return 0, nil
943+
}
899944
return count, nil
900945
}
901946

@@ -915,16 +960,22 @@ func (ae *AsyncEngine) EdgeCount() (int64, error) {
915960
}
916961
pendingDeletes := int64(len(ae.deleteEdges))
917962

918-
count, err := ae.engine.EdgeCount()
963+
engineCount, err := ae.engine.EdgeCount()
919964
ae.mu.RUnlock()
920965

921966
if err != nil {
922967
return 0, err
923968
}
924969

925970
// Adjust for pending creates and deletes
926-
count += pendingCreates
927-
count -= pendingDeletes
971+
count := engineCount + pendingCreates - pendingDeletes
972+
973+
// Clamp to zero if negative (should never happen, log for debugging)
974+
if count < 0 {
975+
log.Printf("⚠️ [COUNT BUG] EdgeCount went negative: engineCount=%d pendingCreates=%d pendingDeletes=%d result=%d (clamping to 0)",
976+
engineCount, pendingCreates, pendingDeletes, count)
977+
return 0, nil
978+
}
928979
return count, nil
929980
}
930981

pkg/storage/async_engine_delete_stats_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,69 @@ func TestDetachDeleteStatsTracking(t *testing.T) {
206206
assert.Equal(t, int64(1), nc, "After flush: 1 node (nodeB)")
207207
assert.Equal(t, int64(0), ec, "After flush: 0 edges")
208208
})
209+
210+
t.Run("delete_nonexistent_node_does_not_affect_count", func(t *testing.T) {
211+
// This tests the critical fix: deleting a non-existent node should NOT
212+
// decrement the count and cause it to go negative
213+
engine := NewMemoryEngine()
214+
asyncConfig := &AsyncEngineConfig{
215+
FlushInterval: 100 * time.Millisecond,
216+
}
217+
asyncEngine := NewAsyncEngine(engine, asyncConfig)
218+
defer asyncEngine.Close()
219+
220+
// Initial count should be 0
221+
count, err := asyncEngine.NodeCount()
222+
require.NoError(t, err)
223+
assert.Equal(t, int64(0), count, "Initial count should be 0")
224+
225+
// Try to delete a non-existent node
226+
err = asyncEngine.DeleteNode("nonexistent_node_id")
227+
assert.Equal(t, ErrNotFound, err, "Deleting non-existent node should return ErrNotFound")
228+
229+
// Count should still be 0, not -1
230+
count, err = asyncEngine.NodeCount()
231+
require.NoError(t, err)
232+
assert.Equal(t, int64(0), count, "Count should remain 0 after deleting non-existent node")
233+
234+
// Flush and verify
235+
require.NoError(t, asyncEngine.Flush())
236+
237+
count, err = asyncEngine.NodeCount()
238+
require.NoError(t, err)
239+
assert.Equal(t, int64(0), count, "Count should remain 0 after flush")
240+
})
241+
242+
t.Run("delete_nonexistent_edge_does_not_affect_count", func(t *testing.T) {
243+
// Similar test for edges
244+
engine := NewMemoryEngine()
245+
asyncConfig := &AsyncEngineConfig{
246+
FlushInterval: 100 * time.Millisecond,
247+
}
248+
asyncEngine := NewAsyncEngine(engine, asyncConfig)
249+
defer asyncEngine.Close()
250+
251+
// Initial count should be 0
252+
count, err := asyncEngine.EdgeCount()
253+
require.NoError(t, err)
254+
assert.Equal(t, int64(0), count, "Initial edge count should be 0")
255+
256+
// Try to delete a non-existent edge
257+
err = asyncEngine.DeleteEdge("nonexistent_edge_id")
258+
assert.Equal(t, ErrNotFound, err, "Deleting non-existent edge should return ErrNotFound")
259+
260+
// Count should still be 0
261+
count, err = asyncEngine.EdgeCount()
262+
require.NoError(t, err)
263+
assert.Equal(t, int64(0), count, "Edge count should remain 0 after deleting non-existent edge")
264+
265+
// Flush and verify
266+
require.NoError(t, asyncEngine.Flush())
267+
268+
count, err = asyncEngine.EdgeCount()
269+
require.NoError(t, err)
270+
assert.Equal(t, int64(0), count, "Edge count should remain 0 after flush")
271+
})
209272
}
210273

211274
// TestBadgerEngineDetachDeleteStats tests the underlying BadgerEngine directly

pkg/storage/badger.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2286,7 +2286,14 @@ func (b *BadgerEngine) NodeCount() (int64, error) {
22862286

22872287
// Return cached count for O(1) performance
22882288
// The counter is updated atomically on create/delete operations
2289-
return b.nodeCount.Load(), nil
2289+
count := b.nodeCount.Load()
2290+
2291+
// Clamp to zero if negative (should never happen, log for debugging)
2292+
if count < 0 {
2293+
log.Printf("⚠️ [COUNT BUG] BadgerEngine.NodeCount went negative: %d (clamping to 0)", count)
2294+
return 0, nil
2295+
}
2296+
return count, nil
22902297
}
22912298

22922299
// EdgeCount returns the total number of valid, decodable edges.
@@ -2301,7 +2308,14 @@ func (b *BadgerEngine) EdgeCount() (int64, error) {
23012308

23022309
// Return cached count for O(1) performance
23032310
// The counter is updated atomically on create/delete operations
2304-
return b.edgeCount.Load(), nil
2311+
count := b.edgeCount.Load()
2312+
2313+
// Clamp to zero if negative (should never happen, log for debugging)
2314+
if count < 0 {
2315+
log.Printf("⚠️ [COUNT BUG] BadgerEngine.EdgeCount went negative: %d (clamping to 0)", count)
2316+
return 0, nil
2317+
}
2318+
return count, nil
23052319
}
23062320

23072321
// GetSchema returns the schema manager.

0 commit comments

Comments
 (0)