Skip to content

Commit 4c5a481

Browse files
committed
Wire hierarchy cache into adapter registry; fix nullable started_at handling in backfill state
- Initialize a HierarchyCache and pass it (with logger) to adapters.DefaultRegistry in main start and backfill run flows so adapters are hierarchy-aware. - Fix backfill state loading and listing: scan started_at into sql.NullInt64 and convert to time.Time when valid (avoid scanning directly into time.Time and handle nullable started_at correctly).
1 parent 61e7252 commit 4c5a481

File tree

3 files changed

+152
-7
lines changed

3 files changed

+152
-7
lines changed

packages/collector-go/cmd/collector/main.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/codervisor/devlog/collector/internal/buffer"
1414
"github.com/codervisor/devlog/collector/internal/client"
1515
"github.com/codervisor/devlog/collector/internal/config"
16+
"github.com/codervisor/devlog/collector/internal/hierarchy"
1617
"github.com/codervisor/devlog/collector/internal/watcher"
1718
"github.com/sirupsen/logrus"
1819
"github.com/spf13/cobra"
@@ -93,8 +94,9 @@ var startCmd = &cobra.Command{
9394
}
9495
}
9596

96-
// Initialize adapter registry
97-
registry := adapters.DefaultRegistry(cfg.ProjectID)
97+
// Initialize adapter registry with hierarchy cache
98+
hiererchyCache := hierarchy.NewHierarchyCache(nil, log)
99+
registry := adapters.DefaultRegistry(cfg.ProjectID, hiererchyCache, log)
98100
log.Infof("Registered %d agent adapters", len(registry.List()))
99101

100102
// Initialize buffer
@@ -324,7 +326,8 @@ var backfillRunCmd = &cobra.Command{
324326
}
325327

326328
// Initialize components
327-
registry := adapters.DefaultRegistry(cfg.ProjectID)
329+
hiererchyCache := hierarchy.NewHierarchyCache(nil, log)
330+
registry := adapters.DefaultRegistry(cfg.ProjectID, hiererchyCache, log)
328331

329332
bufferConfig := buffer.Config{
330333
DBPath: cfg.Buffer.DBPath,

packages/collector-go/internal/backfill/state.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ func (s *StateStore) Load(agentName, logFilePath string) (*BackfillState, error)
9292
`
9393

9494
var state BackfillState
95-
var lastTimestamp, completedAt sql.NullInt64
95+
var lastTimestamp, startedAt, completedAt sql.NullInt64
9696
var errorMessage sql.NullString
9797

9898
err := s.db.QueryRow(query, agentName, logFilePath).Scan(
@@ -103,7 +103,7 @@ func (s *StateStore) Load(agentName, logFilePath string) (*BackfillState, error)
103103
&lastTimestamp,
104104
&state.TotalEventsProcessed,
105105
&state.Status,
106-
&state.StartedAt,
106+
&startedAt,
107107
&completedAt,
108108
&errorMessage,
109109
)
@@ -127,6 +127,9 @@ func (s *StateStore) Load(agentName, logFilePath string) (*BackfillState, error)
127127
t := time.Unix(lastTimestamp.Int64, 0)
128128
state.LastTimestamp = &t
129129
}
130+
if startedAt.Valid {
131+
state.StartedAt = time.Unix(startedAt.Int64, 0)
132+
}
130133
if completedAt.Valid {
131134
t := time.Unix(completedAt.Int64, 0)
132135
state.CompletedAt = &t
@@ -251,7 +254,7 @@ func (s *StateStore) ListByAgent(agentName string) ([]*BackfillState, error) {
251254

252255
for rows.Next() {
253256
var state BackfillState
254-
var lastTimestamp, completedAt sql.NullInt64
257+
var lastTimestamp, startedAt, completedAt sql.NullInt64
255258
var errorMessage sql.NullString
256259

257260
err := rows.Scan(
@@ -262,7 +265,7 @@ func (s *StateStore) ListByAgent(agentName string) ([]*BackfillState, error) {
262265
&lastTimestamp,
263266
&state.TotalEventsProcessed,
264267
&state.Status,
265-
&state.StartedAt,
268+
&startedAt,
266269
&completedAt,
267270
&errorMessage,
268271
)
@@ -275,6 +278,9 @@ func (s *StateStore) ListByAgent(agentName string) ([]*BackfillState, error) {
275278
t := time.Unix(lastTimestamp.Int64, 0)
276279
state.LastTimestamp = &t
277280
}
281+
if startedAt.Valid {
282+
state.StartedAt = time.Unix(startedAt.Int64, 0)
283+
}
278284
if completedAt.Valid {
279285
t := time.Unix(completedAt.Int64, 0)
280286
state.CompletedAt = &t
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# Fix Collector Backfill Parsing Errors
2+
3+
**Status**: � In Progress
4+
**Created**: 2025-10-31
5+
**Spec**: `20251031/004-collector-parsing-errors`
6+
7+
## Overview
8+
9+
The Go collector's backfill functionality is failing to parse GitHub Copilot chat session log files, resulting in 447K+ parsing errors when processing historical logs. While the SQL timestamp scanning issue has been resolved, the event parsing logic is encountering errors that prevent successful backfill operations.
10+
11+
## Objectives
12+
13+
1. Identify root cause of 447K parsing errors in Copilot log backfill
14+
2. Fix event parsing logic to correctly handle Copilot chat session format
15+
3. Add verbose error logging for debugging
16+
4. Successfully backfill historical Copilot activity
17+
18+
## Current Behavior
19+
20+
**Command**: `./bin/devlog-collector backfill run --days 1`
21+
22+
**Results**:
23+
- Events processed: 0
24+
- Errors: 447,397
25+
- Data processed: 18.02 MB (but not successfully parsed)
26+
- 11 log files discovered but not processed
27+
- No error messages logged to stderr (silent failures)
28+
29+
**Log Files**:
30+
- Location: `~/Library/Application Support/Code - Insiders/User/workspaceStorage/.../chatSessions/`
31+
- Format: JSON chat session files (version 3)
32+
- Size range: 511 bytes to 941 KB
33+
- 11 files total
34+
35+
**Sample Log Structure**:
36+
```json
37+
{
38+
"version": 3,
39+
"requesterUsername": "tikazyq",
40+
"requesterAvatarIconUri": { "$mid": 1, ... },
41+
...
42+
}
43+
```
44+
45+
## Design
46+
47+
### Fixed Issues ✅
48+
49+
1. **SQL Timestamp Scanning** - Fixed `started_at` column scanning from int64 to `time.Time`
50+
- File: `packages/collector-go/internal/backfill/state.go`
51+
- Changes: Added `sql.NullInt64` for `startedAt` in both `Load()` and `ListByAgent()` methods
52+
53+
2. **DefaultRegistry Arguments** - Added missing `hierarchyCache` and `logger` parameters
54+
- File: `packages/collector-go/cmd/collector/main.go`
55+
- Changes: Initialize `HierarchyCache` and pass to `DefaultRegistry()` calls
56+
57+
### Root Cause Analysis
58+
59+
The Copilot adapter (`packages/collector-go/internal/adapters/copilot_adapter.go`) likely expects:
60+
- Line-delimited JSON logs (NDJSON format)
61+
- Different schema than chat session format
62+
- Specific event structure that doesn't match chat sessions
63+
64+
The chat session files are full session objects, not individual log events.
65+
66+
## Implementation Plan
67+
68+
### Phase 1: Investigation (High Priority)
69+
- [ ] Add verbose error logging to backfill processor
70+
- [ ] Capture and log first 10 parsing errors with sample data
71+
- [ ] Examine `copilot_adapter.go` to understand expected format
72+
- [ ] Compare expected vs actual log file format
73+
- [ ] Determine if chat sessions are the correct log source
74+
75+
### Phase 2: Fix Parsing Logic
76+
- [ ] Update parser to handle chat session format (if correct source)
77+
- [ ] Or identify and use correct Copilot log files (if wrong source)
78+
- [ ] Add format detection/validation
79+
- [ ] Handle both session-level and event-level data
80+
81+
### Phase 3: Testing
82+
- [ ] Test with sample chat session files
83+
- [ ] Verify successful event extraction
84+
- [ ] Test backfill with various date ranges
85+
- [ ] Validate data sent to backend
86+
- [ ] Test state persistence
87+
88+
## Files to Investigate
89+
90+
```
91+
packages/collector-go/
92+
├── internal/
93+
│ ├── adapters/
94+
│ │ ├── copilot_adapter.go # Parsing logic
95+
│ │ ├── claude_adapter.go
96+
│ │ └── cursor_adapter.go
97+
│ ├── backfill/
98+
│ │ ├── backfill.go # Error handling
99+
│ │ └── state.go # ✅ Fixed
100+
│ └── watcher/
101+
│ └── discovery.go # Log file discovery
102+
└── cmd/collector/main.go # ✅ Fixed
103+
```
104+
105+
## Success Criteria
106+
107+
- [ ] Zero parsing errors on valid log files
108+
- [ ] Successfully extract events from Copilot chat sessions
109+
- [ ] Error messages logged with actionable details
110+
- [ ] Events successfully sent to backend
111+
- [ ] Backfill state properly tracked
112+
- [ ] Throughput > 0 events/sec
113+
114+
## Testing Commands
115+
116+
```bash
117+
# Clean state and test backfill
118+
rm -f ~/.devlog/buffer.db*
119+
cd packages/collector-go
120+
./bin/devlog-collector backfill run --days 1
121+
122+
# Check backfill status
123+
./bin/devlog-collector backfill status
124+
125+
# Build collector
126+
./build.sh
127+
128+
# Verbose mode (when implemented)
129+
./bin/devlog-collector backfill run --days 1 --verbose
130+
```
131+
132+
## References
133+
134+
- Fixed SQL scanning issue in `state.go` (Lines 95-136)
135+
- Fixed DefaultRegistry calls in `main.go` (Lines 97, 327)
136+
- Chat session log location: `~/Library/Application Support/Code - Insiders/User/workspaceStorage/.../chatSessions/`

0 commit comments

Comments
 (0)