This guide explains how to write and run tests for the bv codebase. All contributions should include appropriate tests.
We prefer concrete test data over mocks or fakes. This approach:
- Makes tests easier to understand and debug
- Avoids the complexity of maintaining mock implementations
- Ensures tests exercise real code paths
- Produces more reliable tests
Instead of mocking:
// DON'T do this
mockAnalyzer := &MockAnalyzer{}
mockAnalyzer.On("Analyze").Return(fakeStats)
// DO this
issues := testutil.QuickChain(5) // Real issues with real dependencies
analyzer := analysis.NewAnalyzer(issues)
stats := analyzer.Analyze() // Real analysisUse table-driven tests for comprehensive coverage:
func TestMyFunction(t *testing.T) {
tests := []struct {
name string
input string
expected int
wantErr bool
}{
{"empty input", "", 0, false},
{"single item", "one", 1, false},
{"invalid", "bad", 0, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := MyFunction(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("MyFunction() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.expected {
t.Errorf("MyFunction() = %v, want %v", got, tt.expected)
}
})
}
}For complex outputs (JSON, rendered views, SVG), use golden file testing:
func TestComplexOutput(t *testing.T) {
golden := testutil.NewGoldenFile(t, "testdata/golden", "output.json")
result := GenerateComplexOutput()
golden.AssertJSON(result)
}Update golden files when intentionally changing output:
GENERATE_GOLDEN=1 go test ./pkg/...Tests must produce deterministic results:
- Use fixed random seeds (
testutil.DefaultConfig()uses seed 42) - Use fixed timestamps (
time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC)) - Sort slices before comparison if order doesn't matter
- Unit tests:
*_test.goin the same directory as the code - Package tests:
package_test(black-box) orpackage(white-box) - E2E tests:
tests/e2e/*_test.go
// Unit tests: TestFunctionName_Scenario
func TestExtractKeywords_FiltersStopWords(t *testing.T) { ... }
func TestExtractKeywords_HandlesEmptyInput(t *testing.T) { ... }
// Integration tests: TestIntegration_Feature
func TestIntegration_RobotTriageCommand(t *testing.T) { ... }
// E2E tests: TestEndToEnd_Workflow
func TestEndToEnd_RobotPlanCommand(t *testing.T) { ... }Group related tests with t.Run():
func TestAnalyzer(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
// test empty input
})
t.Run("SingleNode", func(t *testing.T) {
// test single node
})
t.Run("Chain", func(t *testing.T) {
// test chain topology
})
}The testutil package provides graph topology generators:
// Quick convenience functions
issues := testutil.QuickChain(10) // Linear chain: n0 <- n1 <- ... <- n9
issues := testutil.QuickStar(5) // Hub with 5 spokes
issues := testutil.QuickDiamond(3) // Diamond with 3 middle nodes
issues := testutil.QuickCycle(4) // Circular dependency (invalid DAG)
issues := testutil.QuickTree(3, 2) // Tree: depth=3, breadth=2
issues := testutil.QuickRandom(20, 0.3) // Random DAG: 20 nodes, 30% edge density
// Edge cases
issues := testutil.Empty() // Empty slice
issues := testutil.Single() // Single node, no depsFor custom configuration:
gen := testutil.New(testutil.GeneratorConfig{
Seed: 42,
IDPrefix: "TEST",
IncludeLabels: true,
StatusMix: []model.Status{model.StatusOpen, model.StatusInProgress},
})
fixture := gen.Chain(10)
issues := gen.ToIssues(fixture)testutil.AssertIssueCount(t, issues, 10)
testutil.AssertNoDuplicateIDs(t, issues)
testutil.AssertAllValid(t, issues)
testutil.AssertDependencyExists(t, issues, "from-id", "to-id")
testutil.AssertNoCycles(t, issues)
testutil.AssertHasCycle(t, issues)
testutil.AssertStatusCounts(t, issues, open, inProgress, blocked, closed)
testutil.AssertJSONEqual(t, expected, actual)// Create temp dir with .beads subdirectory
dir := testutil.TempBeadsDir(t) // Cleaned up automatically
// Write issues to .beads/beads.jsonl
path := testutil.WriteBeadsFile(t, dir, issues)// Build lookup map
issueMap := testutil.BuildIssueMap(issues)
issue := issueMap["issue-id"]
// Find single issue
issue := testutil.FindIssue(issues, "issue-id")
// Get statistics
counts := testutil.CountByStatus(issues)
ids := testutil.GetIDs(issues)# Run all tests
go test ./...
# Run with verbose output
go test -v ./...
# Run specific package
go test ./pkg/analysis/...
# Run specific test
go test -v -run TestExtractKeywords ./pkg/analysis/...
# Run with race detector
go test -race ./...# Using the coverage script (recommended)
./scripts/coverage.sh # Summary
./scripts/coverage.sh html # Open HTML report
./scripts/coverage.sh check # Check thresholds
./scripts/coverage.sh pkg # Per-package breakdown
# By default the script runs coverage for ./pkg/... (fast). Override if needed:
COVER_PACKAGES='./cmd/... ./pkg/...' ./scripts/coverage.sh check
# Manual commands
go test -coverprofile=coverage.out ./...
go tool cover -html=coverage.out -o coverage.html
go tool cover -func=coverage.out# Run all benchmarks
./scripts/benchmark.sh
# Run specific benchmark
go test -bench=BenchmarkFullAnalysis -benchmem ./pkg/analysis/...
# Compare against baseline
./scripts/benchmark.sh baseline # Save current as baseline
./scripts/benchmark.sh compare # Run and comparePerformance-sensitive tests are gated behind PERF_TEST=1:
PERF_TEST=1 go test -v ./pkg/analysis/... -run TestE2EStartupE2E tests verify the complete bv binary behavior:
The E2E suite includes a few large-scale/stress scenarios guarded by testing.Short().
# Fast/CI-friendly run (skips stress cases)
go test -short ./tests/e2e
# Full run
go test ./tests/e2efunc TestEndToEnd_Feature(t *testing.T) {
// 1. Use the shared bv binary (built once in TestMain)
bv := buildBvBinary(t)
// 2. Create test environment
envDir := t.TempDir()
os.MkdirAll(filepath.Join(envDir, ".beads"), 0755)
os.WriteFile(filepath.Join(envDir, ".beads", "beads.jsonl"), []byte(jsonl), 0644)
// 3. Execute command
runCmd := exec.Command(bv, "--robot-triage")
runCmd.Dir = envDir
out, err := runCmd.CombinedOutput()
if err != nil {
t.Fatalf("Command failed: %v\n%s", err, out)
}
// 4. Verify output
var result map[string]interface{}
if err := json.Unmarshal(out, &result); err != nil {
t.Fatalf("Invalid JSON: %v", err)
}
// Assert expected fields exist
if _, ok := result["triage"]; !ok {
t.Error("missing 'triage' field")
}
}Test all --robot-* flags produce valid JSON:
// Verify JSON output
var result map[string]interface{}
json.Unmarshal(out, &result)
// Check required fields
if _, ok := result["generated_at"]; !ok {
t.Error("missing 'generated_at'")
}Tests run automatically on CI for every push and PR:
- Unit tests with coverage (
go test -coverprofile) - Coverage threshold check (pkg/* ≥ 75%, plus per-package thresholds)
- Quick benchmarks for performance regression detection
Coverage is uploaded to Codecov for tracking trends and PR diffs.
For local stress-testing, consider running the race detector:
go test -race ./...| Package | Minimum |
|---|---|
pkg/analysis |
75% |
pkg/export |
80% |
pkg/recipe |
90% |
pkg/ui |
55% |
pkg/loader |
80% |
pkg/updater |
55% |
pkg/watcher |
80% |
pkg/workspace |
85% |
- Test behavior, not implementation - Focus on what functions do, not how
- One assertion per test case - Makes failures easier to diagnose
- Use
t.Helper()- Mark helper functions for better error locations - Clean up resources - Use
t.TempDir()andt.Cleanup() - Avoid sleeping - Use channels or polling instead of
time.Sleep() - Test edge cases - Empty inputs, nil values, boundary conditions
- Document test intent - Comment what each test case validates
If tests fail intermittently:
- Check for non-deterministic ordering (use
sort.Slice) - Look for time-dependent logic (use fixed timestamps)
- Check for race conditions (
go test -race) - Verify cleanup between tests
- Use
-shortflag to skip slow tests:if testing.Short() { t.Skip() } - Gate performance tests behind
PERF_TEST=1 - Profile with
go test -cpuprofile=cpu.out
Run coverage locally to identify untested paths:
./scripts/coverage.sh html # Opens browser with coverage highlighting
./scripts/coverage.sh uncovered # Lists uncovered lines