Skip to content

Commit 74b721b

Browse files
committed
feat(cli): add logging to git and GitHub indexers
- GitIndexer: add logger option with progress logging - GitHubIndexer: add logger and onProgress callback - CLI git/gh commands: add --verbose flag - Spinner shows progress: 'Embedding X/Y commits/docs (N%)' Example output: [git-indexer] Starting git commit extraction {limit: 500} [git-indexer] Extracted commits {commits: 500} [git-indexer] Embedded 320/500 commits {batch: 10} [github-indexer] Fetched GitHub documents {documents: 150} [github-indexer] Document breakdown {issues: 100, prs: 50}
1 parent 1500caf commit 74b721b

File tree

7 files changed

+163
-17
lines changed

7 files changed

+163
-17
lines changed

packages/cli/src/commands/gh.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55

66
import { getStorageFilePaths, getStoragePath } from '@lytics/dev-agent-core';
77
import { GitHubIndexer } from '@lytics/dev-agent-subagents';
8+
import { createLogger } from '@lytics/kero';
89
import chalk from 'chalk';
910
import { Command } from 'commander';
1011
import ora from 'ora';
11-
import { logger } from '../utils/logger.js';
12+
import { keroLogger, logger } from '../utils/logger.js';
1213

1314
/**
1415
* Create GitHub indexer with centralized storage
@@ -52,9 +53,15 @@ export const ghCommand = new Command('gh')
5253
.option('--prs-only', 'Index only pull requests')
5354
.option('--state <state>', 'Filter by state (open, closed, merged, all)', 'all')
5455
.option('--limit <number>', 'Limit number of items to fetch', Number.parseInt)
56+
.option('-v, --verbose', 'Verbose output', false)
5557
.action(async (options) => {
5658
const spinner = ora('Loading configuration...').start();
5759

60+
// Create logger for indexing
61+
const indexLogger = options.verbose
62+
? createLogger({ level: 'debug', format: 'pretty' })
63+
: keroLogger.child({ command: 'gh-index' });
64+
5865
try {
5966
spinner.text = 'Initializing indexers...';
6067

@@ -82,6 +89,14 @@ export const ghCommand = new Command('gh')
8289
types: types as ('issue' | 'pull_request')[],
8390
state: state as ('open' | 'closed' | 'merged')[] | undefined,
8491
limit: options.limit,
92+
logger: indexLogger,
93+
onProgress: (progress) => {
94+
if (progress.phase === 'fetching') {
95+
spinner.text = 'Fetching GitHub issues/PRs...';
96+
} else if (progress.phase === 'embedding') {
97+
spinner.text = `Embedding ${progress.documentsProcessed}/${progress.totalDocuments} GitHub docs`;
98+
}
99+
},
85100
});
86101

87102
spinner.succeed(chalk.green('GitHub data indexed!'));

packages/cli/src/commands/git.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ import {
1010
LocalGitExtractor,
1111
VectorStorage,
1212
} from '@lytics/dev-agent-core';
13+
import { createLogger } from '@lytics/kero';
1314
import chalk from 'chalk';
1415
import { Command } from 'commander';
1516
import ora from 'ora';
16-
import { logger } from '../utils/logger.js';
17+
import { keroLogger, logger } from '../utils/logger.js';
1718

1819
/**
1920
* Create Git indexer with centralized storage
@@ -51,9 +52,15 @@ export const gitCommand = new Command('git')
5152
'--since <date>',
5253
'Only index commits after this date (e.g., "2024-01-01", "6 months ago")'
5354
)
55+
.option('-v, --verbose', 'Verbose output', false)
5456
.action(async (options) => {
5557
const spinner = ora('Loading configuration...').start();
5658

59+
// Create logger for indexing
60+
const indexLogger = options.verbose
61+
? createLogger({ level: 'debug', format: 'pretty' })
62+
: keroLogger.child({ command: 'git-index' });
63+
5764
try {
5865
spinner.text = 'Initializing git indexer...';
5966

@@ -64,6 +71,13 @@ export const gitCommand = new Command('git')
6471
const stats = await indexer.index({
6572
limit: options.limit,
6673
since: options.since,
74+
logger: indexLogger,
75+
onProgress: (progress) => {
76+
if (progress.phase === 'storing' && progress.totalCommits > 0) {
77+
const pct = Math.round((progress.commitsProcessed / progress.totalCommits) * 100);
78+
spinner.text = `Embedding ${progress.commitsProcessed}/${progress.totalCommits} commits (${pct}%)`;
79+
}
80+
},
6781
});
6882

6983
spinner.succeed(chalk.green('Git history indexed!'));

packages/cli/src/commands/index.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,16 @@ export const indexCommand = new Command('index')
193193
vectorStorage: gitVectorStore,
194194
});
195195

196-
gitStats = await gitIndexer.index({ limit: options.gitLimit });
196+
gitStats = await gitIndexer.index({
197+
limit: options.gitLimit,
198+
logger: indexLogger,
199+
onProgress: (progress) => {
200+
if (progress.phase === 'storing' && progress.totalCommits > 0) {
201+
const pct = Math.round((progress.commitsProcessed / progress.totalCommits) * 100);
202+
spinner.text = `Embedding ${progress.commitsProcessed}/${progress.totalCommits} commits (${pct}%)`;
203+
}
204+
},
205+
});
197206
await gitVectorStore.close();
198207

199208
spinner.succeed(chalk.green('Git history indexed!'));
@@ -219,6 +228,14 @@ export const indexCommand = new Command('index')
219228

220229
ghStats = await ghIndexer.index({
221230
limit: options.ghLimit,
231+
logger: indexLogger,
232+
onProgress: (progress) => {
233+
if (progress.phase === 'fetching') {
234+
spinner.text = 'Fetching GitHub issues/PRs...';
235+
} else if (progress.phase === 'embedding') {
236+
spinner.text = `Embedding ${progress.documentsProcessed}/${progress.totalDocuments} GitHub docs`;
237+
}
238+
},
222239
});
223240
spinner.succeed(chalk.green('GitHub indexed!'));
224241
logger.log('');

packages/cli/src/utils/logger.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import { createLogger, type Logger, type LogLevel } from '@lytics/kero';
66

77
// Create a logger with pretty output and icons
8-
const keroLogger = createLogger({
8+
export const keroLogger = createLogger({
99
preset: 'development',
1010
format: 'pretty',
1111
});

packages/core/src/git/indexer.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* Indexes git commits into the vector store for semantic search.
55
*/
66

7+
import type { Logger } from '@lytics/kero';
78
import type { VectorStorage } from '../vector';
89
import type { EmbeddingDocument } from '../vector/types';
910
import type { GitExtractor } from './extractor';
@@ -39,6 +40,8 @@ export interface GitIndexOptions {
3940
noMerges?: boolean;
4041
/** Progress callback */
4142
onProgress?: (progress: GitIndexProgress) => void;
43+
/** Logger instance */
44+
logger?: Logger;
4245
}
4346

4447
/**
@@ -81,6 +84,9 @@ export class GitIndexer {
8184

8285
const limit = options.limit ?? this.commitLimit;
8386
const onProgress = options.onProgress;
87+
const logger = options.logger?.child({ component: 'git-indexer' });
88+
89+
logger?.info({ limit }, 'Starting git commit extraction');
8490

8591
// Phase 1: Extract commits
8692
onProgress?.({
@@ -101,9 +107,11 @@ export class GitIndexer {
101107
let commits: GitCommit[];
102108
try {
103109
commits = await this.extractor.getCommits(extractOptions);
110+
logger?.info({ commits: commits.length }, 'Extracted commits');
104111
} catch (error) {
105112
const message = `Failed to extract commits: ${error instanceof Error ? error.message : String(error)}`;
106113
errors.push(message);
114+
logger?.error({ error: message }, 'Failed to extract commits');
107115
return {
108116
commitsIndexed: 0,
109117
durationMs: Date.now() - startTime,
@@ -112,6 +120,7 @@ export class GitIndexer {
112120
}
113121

114122
if (commits.length === 0) {
123+
logger?.info('No commits to index');
115124
onProgress?.({
116125
phase: 'complete',
117126
commitsProcessed: 0,
@@ -126,6 +135,7 @@ export class GitIndexer {
126135
}
127136

128137
// Phase 2: Prepare documents for embedding
138+
logger?.debug({ commits: commits.length }, 'Preparing commit documents for embedding');
129139
onProgress?.({
130140
phase: 'embedding',
131141
commitsProcessed: 0,
@@ -136,6 +146,10 @@ export class GitIndexer {
136146
const documents = this.prepareCommitDocuments(commits);
137147

138148
// Phase 3: Store in batches
149+
logger?.info(
150+
{ documents: documents.length, batchSize: this.batchSize },
151+
'Starting commit embedding'
152+
);
139153
onProgress?.({
140154
phase: 'storing',
141155
commitsProcessed: 0,
@@ -144,26 +158,43 @@ export class GitIndexer {
144158
});
145159

146160
let commitsIndexed = 0;
161+
const totalBatches = Math.ceil(documents.length / this.batchSize);
147162
for (let i = 0; i < documents.length; i += this.batchSize) {
148163
const batch = documents.slice(i, i + this.batchSize);
164+
const batchNum = Math.floor(i / this.batchSize) + 1;
149165

150166
try {
151167
await this.vectorStorage.addDocuments(batch);
152168
commitsIndexed += batch.length;
153169

170+
// Log every 10 batches
171+
if (batchNum % 10 === 0 || batchNum === totalBatches) {
172+
logger?.info(
173+
{ batch: batchNum, totalBatches, commitsIndexed, total: commits.length },
174+
`Embedded ${commitsIndexed}/${commits.length} commits`
175+
);
176+
}
177+
154178
onProgress?.({
155179
phase: 'storing',
156180
commitsProcessed: commitsIndexed,
157181
totalCommits: commits.length,
158182
percentComplete: 50 + (commitsIndexed / commits.length) * 50,
159183
});
160184
} catch (error) {
161-
const message = `Failed to store batch ${i / this.batchSize}: ${error instanceof Error ? error.message : String(error)}`;
185+
const message = `Failed to store batch ${batchNum}: ${error instanceof Error ? error.message : String(error)}`;
162186
errors.push(message);
187+
logger?.error({ batch: batchNum, error: message }, 'Failed to store commit batch');
163188
}
164189
}
165190

166191
// Phase 4: Complete
192+
const durationMs = Date.now() - startTime;
193+
logger?.info(
194+
{ commitsIndexed, duration: `${durationMs}ms`, errors: errors.length },
195+
'Git indexing complete'
196+
);
197+
167198
onProgress?.({
168199
phase: 'complete',
169200
commitsProcessed: commitsIndexed,
@@ -173,7 +204,7 @@ export class GitIndexer {
173204

174205
return {
175206
commitsIndexed,
176-
durationMs: Date.now() - startTime,
207+
durationMs,
177208
errors,
178209
};
179210
}

packages/subagents/src/github/indexer.ts

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,64 @@ export class GitHubIndexer {
7979
*/
8080
async index(options: GitHubIndexOptions = {}): Promise<GitHubIndexStats> {
8181
const startTime = Date.now();
82+
const onProgress = options.onProgress;
83+
const logger = options.logger?.child({ component: 'github-indexer' });
84+
85+
logger?.info(
86+
{ repository: options.repository || this.repository },
87+
'Starting GitHub data fetch'
88+
);
89+
90+
// Phase 1: Fetch all documents from GitHub
91+
onProgress?.({
92+
phase: 'fetching',
93+
documentsProcessed: 0,
94+
totalDocuments: 0,
95+
percentComplete: 0,
96+
});
8297

83-
// Fetch all documents from GitHub
8498
const documents = fetchAllDocuments({
8599
...options,
86100
repository: options.repository || this.repository,
87101
});
88102

89-
// Enrich with relationships
103+
logger?.info({ documents: documents.length }, 'Fetched GitHub documents');
104+
105+
// Phase 2: Enrich with relationships
106+
onProgress?.({
107+
phase: 'enriching',
108+
documentsProcessed: 0,
109+
totalDocuments: documents.length,
110+
percentComplete: 25,
111+
});
112+
113+
logger?.debug({ documents: documents.length }, 'Enriching documents with relationships');
90114
const enrichedDocs = documents.map((doc) => enrichDocument(doc));
91115

116+
// Calculate stats by type
117+
const byType = enrichedDocs.reduce(
118+
(acc, doc) => {
119+
acc[doc.type] = (acc[doc.type] || 0) + 1;
120+
return acc;
121+
},
122+
{} as Record<string, number>
123+
);
124+
125+
logger?.info(
126+
{ issues: byType.issue || 0, prs: byType.pull_request || 0 },
127+
'Document breakdown'
128+
);
129+
130+
// Phase 3: Convert and embed
131+
onProgress?.({
132+
phase: 'embedding',
133+
documentsProcessed: 0,
134+
totalDocuments: enrichedDocs.length,
135+
percentComplete: 50,
136+
});
137+
138+
logger?.info({ documents: enrichedDocs.length }, 'Starting GitHub embedding');
139+
92140
// Convert to vector storage format
93141
const vectorDocs = enrichedDocs.map((doc) => ({
94142
id: `${doc.type}-${doc.number}`,
@@ -114,14 +162,13 @@ export class GitHubIndexer {
114162
// Duplicates are handled by ID (overwrites existing)
115163
await this.vectorStorage.addDocuments(vectorDocs);
116164

117-
// Calculate stats
118-
const byType = enrichedDocs.reduce(
119-
(acc, doc) => {
120-
acc[doc.type] = (acc[doc.type] || 0) + 1;
121-
return acc;
122-
},
123-
{} as Record<string, number>
124-
);
165+
// Phase 4: Complete
166+
onProgress?.({
167+
phase: 'complete',
168+
documentsProcessed: enrichedDocs.length,
169+
totalDocuments: enrichedDocs.length,
170+
percentComplete: 100,
171+
});
125172

126173
const byState = enrichedDocs.reduce(
127174
(acc, doc) => {
@@ -144,13 +191,19 @@ export class GitHubIndexer {
144191
// Save state to disk
145192
await this.saveState();
146193

194+
const durationMs = Date.now() - startTime;
195+
logger?.info(
196+
{ documents: enrichedDocs.length, duration: `${durationMs}ms` },
197+
'GitHub indexing complete'
198+
);
199+
147200
return {
148201
repository: this.repository,
149202
totalDocuments: enrichedDocs.length,
150203
byType: byType as Record<'issue' | 'pull_request' | 'discussion', number>,
151204
byState: byState as Record<'open' | 'closed' | 'merged', number>,
152205
lastIndexed: this.state.lastIndexed,
153-
indexDuration: Date.now() - startTime,
206+
indexDuration: durationMs,
154207
};
155208
}
156209

packages/subagents/src/github/types.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Type definitions for GitHub data indexing and context provision
44
*/
55

6+
import type { Logger } from '@lytics/kero';
7+
68
/**
79
* Type of GitHub document
810
*/
@@ -106,6 +108,16 @@ export interface GitHubIndexerState {
106108
byState: Record<GitHubState, number>;
107109
}
108110

111+
/**
112+
* Progress information for GitHub indexing
113+
*/
114+
export interface GitHubIndexProgress {
115+
phase: 'fetching' | 'enriching' | 'embedding' | 'complete';
116+
documentsProcessed: number;
117+
totalDocuments: number;
118+
percentComplete: number;
119+
}
120+
109121
/**
110122
* GitHub indexing options
111123
*/
@@ -115,6 +127,10 @@ export interface GitHubIndexOptions {
115127
state?: GitHubState[];
116128
since?: string; // ISO date - only index items updated after this
117129
limit?: number; // Max items to fetch (for testing)
130+
/** Progress callback */
131+
onProgress?: (progress: GitHubIndexProgress) => void;
132+
/** Logger instance */
133+
logger?: Logger;
118134
}
119135

120136
/**

0 commit comments

Comments
 (0)