Skip to content

Commit d23d1a9

Browse files
committed
feat(cli): massive indexing performance and UX improvements
1 parent 88a1f72 commit d23d1a9

File tree

21 files changed

+803
-339
lines changed

21 files changed

+803
-339
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
---
2+
"@lytics/dev-agent-core": minor
3+
"@lytics/dev-agent-cli": minor
4+
---
5+
6+
Massive indexing performance and UX improvements
7+
8+
**Performance Optimizations (184% faster):**
9+
- **63x faster metadata collection**: Eliminated 863 individual git calls by using single batched git command
10+
- **Removed storage size calculation**: Deferred to on-demand in `dev stats` (saves 1-3s)
11+
- **Simplified ownership tracking**: Author contributions now calculated on-demand in `dev owners` (1s), removed SQLite pre-indexing overhead
12+
- **Total speedup**: Indexing now completes in ~33s vs ~95s (61s improvement!)
13+
14+
**Architecture Simplifications:**
15+
- Removed `file_authors` SQLite table (on-demand is fast enough)
16+
- Removed `appendFileAuthors()` and `getFileAuthors()` from MetricsStore
17+
- Removed `authorContributions` from IndexUpdatedEvent
18+
- Cleaner separation: metrics for analytics, ownership for developer insights
19+
20+
**UX Improvements (no more silent gaps):**
21+
- **Section-based progress display**: Clean, informative output inspired by Homebrew/Cargo
22+
- **Applied to 4 commands**: `dev index`, `dev update`, `dev git index`, `dev github index`
23+
- **Live progress updates**: Shows current progress for each phase (scanning, embedding, git, GitHub)
24+
- **Clean indexing plan**: Removed INFO timestamps from plan display
25+
- **Helpful next steps**: Suggests relevant commands after indexing completes
26+
- **More frequent scanner progress**: Logs every 2 batches OR every 10 seconds (was every 50 files)
27+
- **Slow file detection**: Debug logs for files/batches taking >5s to process
28+
- **Cleaner completion summary**: Removed storage size from index output (shown in `dev stats` instead)
29+
- **Continuous feedback**: Maximum 1-second gaps between progress updates
30+
- **Better developer grouping**: `dev owners` now groups by GitHub handle instead of email (merges multiple emails for same developer)
31+
- **File breakdown per developer**: Shows top 5 files owned with commit counts and LOC
32+
- **Graceful degradation**: Verbose mode and non-TTY environments show traditional log output
33+
34+
**Technical Details:**
35+
- Added `log-update` dependency for smooth single-line progress updates
36+
- New `ProgressRenderer` class for section-based progress display
37+
- Optimized `buildCodeMetadata()` to derive change frequency from author contributions instead of making separate git calls
38+
- Scanner now tracks time since last log and ensures updates every 10s
39+
- Storage size calculation moved from index-time to query-time (lazy evaluation)
40+
- TTY detection for graceful fallback in CI/CD environments
41+
42+
**Before:**
43+
```
44+
[14:27:37] typescript 3450/3730 (92%)
45+
← 3 MINUTES OF SILENCE
46+
[14:30:09] typescript 3600/3730 (97%)
47+
← EMBEDDING COMPLETES
48+
← 63 SECONDS OF SILENCE
49+
[14:31:12] Starting git extraction
50+
```
51+
52+
**After:**
53+
```
54+
▸ Scanning Repository
55+
357/433 files (82%, 119 files/sec)
56+
✓ Scanning Repository (3.2s)
57+
433 files → 2,525 components
58+
59+
▸ Embedding Vectors
60+
1,600/2,525 documents (63%, 108 docs/sec)
61+
✓ Embedding Vectors (20.7s)
62+
2,525 documents
63+
64+
▸ Git History
65+
150/252 commits (60%)
66+
✓ Git History (4.4s)
67+
252 commits
68+
69+
▸ GitHub Issues/PRs
70+
82/163 documents (50%)
71+
✓ GitHub Issues/PRs (7.8s)
72+
163 documents
73+
74+
✓ Repository indexed successfully!
75+
76+
Indexed: 433 files • 2,525 components • 252 commits • 163 GitHub docs
77+
Duration: 33.5s
78+
79+
💡 Next steps:
80+
dev map Explore codebase structure
81+
dev owners See contributor stats
82+
dev activity Find active files
83+
```
84+

packages/cli/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"chalk": "^5.6.2",
4646
"cli-table3": "^0.6.5",
4747
"commander": "^12.1.0",
48+
"log-update": "^6.1.0",
4849
"ora": "^8.0.1",
4950
"terminal-size": "^4.0.0"
5051
},

packages/cli/src/commands/commands.test.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ describe('CLI Commands', () => {
8282
);
8383
});
8484

85-
it('should display storage size after indexing', async () => {
85+
it('should display indexing summary without storage size', async () => {
8686
const indexDir = path.join(testDir, 'index-test');
8787
await fs.mkdir(indexDir, { recursive: true });
8888

@@ -120,13 +120,15 @@ export class Calculator {
120120
exitSpy.mockRestore();
121121
console.log = originalConsoleLog;
122122

123-
// Verify storage size is in the output (new compact format shows it after duration)
124-
const storageSizeLog = loggedMessages.find(
125-
(msg) => msg.includes('Duration:') || msg.includes('Storage:')
126-
);
127-
expect(storageSizeLog).toBeDefined();
128-
// Check for storage size in compact format: "Duration: X • Storage: Y"
129-
expect(loggedMessages.some((msg) => /\d+(\.\d+)?\s*(B|KB|MB|GB)/.test(msg))).toBe(true);
123+
// Verify summary shows duration (storage size calculated on-demand in `dev stats`)
124+
const durationLog = loggedMessages.find((msg) => msg.includes('Duration:'));
125+
expect(durationLog).toBeDefined();
126+
// Verify storage size is NOT shown (deferred to `dev stats`)
127+
const hasStorageSize = loggedMessages.some((msg) => msg.includes('Storage:'));
128+
expect(hasStorageSize).toBe(false);
129+
// Verify indexed stats are shown
130+
const indexedLog = loggedMessages.find((msg) => msg.includes('Indexed:'));
131+
expect(indexedLog).toBeDefined();
130132
}, 30000); // 30s timeout for indexing
131133
});
132134
});

packages/cli/src/commands/git.ts

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ import {
1010
LocalGitExtractor,
1111
VectorStorage,
1212
} from '@lytics/dev-agent-core';
13-
import { createLogger } from '@lytics/kero';
1413
import chalk from 'chalk';
1514
import { Command } from 'commander';
1615
import ora from 'ora';
17-
import { keroLogger, logger } from '../utils/logger.js';
16+
import { createIndexLogger, logger } from '../utils/logger.js';
1817
import { output, printGitStats } from '../utils/output.js';
18+
import { ProgressRenderer } from '../utils/progress.js';
1919

2020
/**
2121
* Create Git indexer with centralized storage
@@ -48,49 +48,91 @@ export const gitCommand = new Command('git')
4848
.addCommand(
4949
new Command('index')
5050
.description('Index git commit history for semantic search')
51-
.option('--limit <number>', 'Maximum commits to index (default: 500)', Number.parseInt, 500)
51+
.option(
52+
'--limit <number>',
53+
'Maximum commits to index (default: 500)',
54+
(val) => Number.parseInt(val, 10),
55+
500
56+
)
5257
.option(
5358
'--since <date>',
5459
'Only index commits after this date (e.g., "2024-01-01", "6 months ago")'
5560
)
5661
.option('-v, --verbose', 'Verbose output', false)
5762
.action(async (options) => {
58-
const spinner = ora('Loading configuration...').start();
63+
const spinner = ora('Initializing git indexer...').start();
5964

6065
// Create logger for indexing
61-
const indexLogger = options.verbose
62-
? createLogger({ level: 'debug', format: 'pretty' })
63-
: keroLogger.child({ command: 'git-index' });
66+
const indexLogger = createIndexLogger(options.verbose);
6467

6568
try {
66-
spinner.text = 'Initializing git indexer...';
67-
6869
const { indexer, vectorStore } = await createGitIndexer();
6970

70-
spinner.text = 'Indexing git commits...';
71+
// Stop spinner and switch to section-based progress
72+
spinner.stop();
73+
74+
// Initialize progress renderer
75+
const progressRenderer = new ProgressRenderer({ verbose: options.verbose });
76+
progressRenderer.setSections(['Extracting Commits', 'Embedding Commits']);
77+
78+
const startTime = Date.now();
79+
const extractStartTime = startTime;
80+
let embeddingStartTime = 0;
81+
let inEmbeddingPhase = false;
7182

7283
const stats = await indexer.index({
7384
limit: options.limit,
7485
since: options.since,
7586
logger: indexLogger,
7687
onProgress: (progress) => {
7788
if (progress.phase === 'storing' && progress.totalCommits > 0) {
89+
// Transitioning to embedding phase
90+
if (!inEmbeddingPhase) {
91+
const extractDuration = (Date.now() - extractStartTime) / 1000;
92+
progressRenderer.completeSection(
93+
`${progress.totalCommits.toLocaleString()} commits extracted`,
94+
extractDuration
95+
);
96+
embeddingStartTime = Date.now();
97+
inEmbeddingPhase = true;
98+
}
99+
100+
// Update embedding progress
78101
const pct = Math.round((progress.commitsProcessed / progress.totalCommits) * 100);
79-
spinner.text = `Embedding ${progress.commitsProcessed}/${progress.totalCommits} commits (${pct}%)`;
102+
progressRenderer.updateSection(
103+
`${progress.commitsProcessed}/${progress.totalCommits} commits (${pct}%)`
104+
);
80105
}
81106
},
82107
});
83108

84-
spinner.succeed(chalk.green('Git history indexed!'));
109+
// Complete embedding section
110+
if (inEmbeddingPhase) {
111+
const embeddingDuration = (Date.now() - embeddingStartTime) / 1000;
112+
progressRenderer.completeSection(
113+
`${stats.commitsIndexed.toLocaleString()} commits`,
114+
embeddingDuration
115+
);
116+
}
85117

86-
// Display stats
87-
logger.log('');
88-
logger.log(chalk.bold('Indexing Stats:'));
89-
logger.log(` Commits indexed: ${chalk.yellow(stats.commitsIndexed)}`);
90-
logger.log(` Duration: ${chalk.cyan(stats.durationMs)}ms`);
91-
logger.log('');
92-
logger.log(chalk.gray('Now you can search with: dev git search "<query>"'));
93-
logger.log('');
118+
const totalDuration = (Date.now() - startTime) / 1000;
119+
120+
// Finalize progress display
121+
progressRenderer.done();
122+
123+
// Display success message
124+
output.log('');
125+
output.success(`Git history indexed successfully!`);
126+
output.log(
127+
` ${chalk.bold('Indexed:')} ${stats.commitsIndexed.toLocaleString()} commits`
128+
);
129+
output.log(` ${chalk.bold('Duration:')} ${totalDuration.toFixed(1)}s`);
130+
output.log('');
131+
output.log(chalk.dim('💡 Next step:'));
132+
output.log(
133+
` ${chalk.cyan('dev git search "<query>"')} ${chalk.dim('Search commit history')}`
134+
);
135+
output.log('');
94136

95137
await vectorStore.close();
96138
} catch (error) {
@@ -111,7 +153,7 @@ export const gitCommand = new Command('git')
111153
new Command('search')
112154
.description('Semantic search over git commit messages')
113155
.argument('<query>', 'Search query (e.g., "authentication bug fix")')
114-
.option('--limit <number>', 'Number of results', Number.parseInt, 10)
156+
.option('--limit <number>', 'Number of results', (val) => Number.parseInt(val, 10), 10)
115157
.option('--json', 'Output as JSON')
116158
.action(async (query, options) => {
117159
const spinner = ora('Loading configuration...').start();

0 commit comments

Comments
 (0)