Skip to content

Commit aa9440c

Browse files
wesmclaude
andauthored
fix: Claude DAG fork detection traverses full subtree (#231)
## Summary - `countUserTurns` now traverses the full subtree (stack-based DFS) instead of following only the first child at each node. The old approach undercounted user turns in sessions with nested forks, causing the fork heuristic to discard large conversation branches as "small retries." - Bumps `dataVersion` to 6 so existing databases trigger a full resync on upgrade. ## Context Sessions with many nested fork points (e.g. 17 in the reported case) would have their main conversation branch dropped entirely. The first-child-only traversal would dead-end after 0-1 user turns at nested forks, making a 124-entry branch look like a trivial retry. Result: a session with 110 real messages would show only 11. ## Test plan - [x] New regression test `TestForkDetection_NestedForkCountsFullSubtree` covers the specific scenario - [x] All existing fork detection tests pass - [x] Full test suite passes (`go test -tags fts5 ./... -short`) - [ ] Manual test: verify the affected session displays all messages after upgrade Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3396500 commit aa9440c

File tree

3 files changed

+86
-11
lines changed

3 files changed

+86
-11
lines changed

internal/db/db.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import (
2323
// formatting changes). Old databases with a lower user_version
2424
// trigger a non-destructive re-sync (mtime reset + skip cache
2525
// clear) so existing session data is preserved.
26-
const dataVersion = 5
26+
const dataVersion = 6
2727

2828
//go:embed schema.sql
2929
var schemaSQL string

internal/parser/claude.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -601,25 +601,25 @@ func parseDAG(
601601
return results, nil
602602
}
603603

604-
// countUserTurns counts the number of user entries reachable from
605-
// a starting index by following the first child at each node.
604+
// countUserTurns counts all user entries reachable from a
605+
// starting index by traversing the entire subtree. Earlier
606+
// versions followed only the first child at each node, which
607+
// undercounted in sessions with many nested forks and caused
608+
// the fork heuristic to discard the main conversation branch.
606609
func countUserTurns(
607610
entries []dagEntry,
608611
children map[string][]int,
609612
startIdx int,
610613
) int {
611614
count := 0
612-
current := startIdx
613-
for current >= 0 {
615+
stack := []int{startIdx}
616+
for len(stack) > 0 {
617+
current := stack[len(stack)-1]
618+
stack = stack[:len(stack)-1]
614619
if entries[current].entryType == "user" {
615620
count++
616621
}
617-
uuid := entries[current].uuid
618-
kids := children[uuid]
619-
if len(kids) == 0 {
620-
break
621-
}
622-
current = kids[0]
622+
stack = append(stack, children[entries[current].uuid]...)
623623
}
624624
return count
625625
}

internal/parser/fork_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,81 @@ func TestSessionBoundsStartedAtFromLeadingEvent(t *testing.T) {
323323
}
324324
}
325325

326+
func TestForkDetection_NestedForkCountsFullSubtree(t *testing.T) {
327+
// Regression test: when the first child at a fork point
328+
// itself contains nested forks early in its chain, the
329+
// old first-child-only countUserTurns would see only 1
330+
// user turn (following first children that dead-end
331+
// quickly) and treat the entire large branch as a small
332+
// retry, discarding it.
333+
//
334+
// DAG: root(a) -> b (fork point)
335+
// First child: c -> d (fork) -> e -> f -> g -> h -> i -> j
336+
// \-> d2 (retry, 1 entry)
337+
// Second child: z (1 entry, the "retry")
338+
//
339+
// The first child subtree has 5 user turns total (c,e,g,i
340+
// plus d2). With first-child-only traversal, the path
341+
// c->d->d2 sees only 1 user turn (c is user, d is asst,
342+
// d2 is user but d2 is the SECOND child not the first) --
343+
// actually c->d->(first child of d's fork)=e gives more.
344+
// Let's build a clearer case: the first child at the fork
345+
// is a dead-end assistant reply, so first-child traversal
346+
// stops after 0 user turns.
347+
//
348+
// DAG: root(a) -> b (fork)
349+
// First child: c(user) -> d(asst, fork)
350+
// d -> e(asst, dead-end first child)
351+
// d -> f(user) -> g(asst) -> h(user) ->
352+
// i(asst) -> j(user) -> k(asst)
353+
// Second child: z(user, 1 msg)
354+
//
355+
// Old countUserTurns for c: c(user,1) -> d(asst) ->
356+
// e(asst, no children) = 1 user turn <= 3 -> retry!
357+
// New countUserTurns for c: 1+0+1+0+1+0+1+0 = 4 > 3
358+
content := testjsonl.NewSessionBuilder().
359+
AddClaudeUserWithUUID("2024-01-01T10:00:00Z", "start", "a", "").
360+
AddClaudeAssistantWithUUID("2024-01-01T10:00:01Z", "ok", "b", "a").
361+
// First child branch from b: large subtree
362+
AddClaudeUserWithUUID("2024-01-01T10:00:02Z", "main1", "c", "b").
363+
AddClaudeAssistantWithUUID("2024-01-01T10:00:03Z", "m-ok1", "d", "c").
364+
// Nested fork at d: first child is a dead-end
365+
AddClaudeAssistantWithUUID("2024-01-01T10:00:04Z", "dead-end", "e", "d").
366+
// Second child of d's fork continues the real conversation
367+
AddClaudeUserWithUUID("2024-01-01T10:00:05Z", "main2", "f", "d").
368+
AddClaudeAssistantWithUUID("2024-01-01T10:00:06Z", "m-ok2", "g", "f").
369+
AddClaudeUserWithUUID("2024-01-01T10:00:07Z", "main3", "h", "g").
370+
AddClaudeAssistantWithUUID("2024-01-01T10:00:08Z", "m-ok3", "i", "h").
371+
AddClaudeUserWithUUID("2024-01-01T10:00:09Z", "main4", "j", "i").
372+
AddClaudeAssistantWithUUID("2024-01-01T10:00:10Z", "m-ok4", "k", "j").
373+
// Second child of b's fork: trivial retry
374+
AddClaudeUserWithUUID("2024-01-01T10:01:00Z", "retry", "z", "b").
375+
String()
376+
377+
// The first child subtree has 4 user turns (c,f,h,j) > 3,
378+
// so it should be treated as a large-gap fork. We expect
379+
// 2 results: main path (a,b,c,d,f,g,h,i,j,k = 10 msgs)
380+
// and the fork (z = 1 msg).
381+
results := parseTestContent(t, "nested-fork-subtree.jsonl", content, 2)
382+
383+
// Main path should follow first child at b, then second
384+
// child at d (the retry heuristic picks last child when
385+
// first child has <= 3 user turns — here "e" is a dead
386+
// end with 0 user turns so the nested fork follows "f").
387+
main := results[0]
388+
if main.Session.MessageCount < 8 {
389+
t.Errorf(
390+
"main MessageCount = %d, want >= 8 "+
391+
"(first child subtree should not be discarded)",
392+
main.Session.MessageCount,
393+
)
394+
}
395+
396+
// The trivial "retry" branch should be the fork.
397+
fork := results[1]
398+
assertMessage(t, fork.Messages[0], RoleUser, "retry")
399+
}
400+
326401
func TestSessionBoundsDAGMainWidenedNotFork(t *testing.T) {
327402
// DAG session with a trailing queue-operation after all
328403
// messages. Main session's EndedAt should be widened;

0 commit comments

Comments
 (0)