44 * Indexes git commits into the vector store for semantic search.
55 */
66
7+ import type { Logger } from '@lytics/kero' ;
78import type { VectorStorage } from '../vector' ;
89import type { EmbeddingDocument } from '../vector/types' ;
910import type { GitExtractor } from './extractor' ;
@@ -39,6 +40,8 @@ export interface GitIndexOptions {
3940 noMerges ?: boolean ;
4041 /** Progress callback */
4142 onProgress ?: ( progress : GitIndexProgress ) => void ;
43+ /** Logger instance */
44+ logger ?: Logger ;
4245}
4346
4447/**
@@ -81,6 +84,9 @@ export class GitIndexer {
8184
8285 const limit = options . limit ?? this . commitLimit ;
8386 const onProgress = options . onProgress ;
87+ const logger = options . logger ?. child ( { component : 'git-indexer' } ) ;
88+
89+ logger ?. info ( { limit } , 'Starting git commit extraction' ) ;
8490
8591 // Phase 1: Extract commits
8692 onProgress ?.( {
@@ -101,9 +107,11 @@ export class GitIndexer {
101107 let commits : GitCommit [ ] ;
102108 try {
103109 commits = await this . extractor . getCommits ( extractOptions ) ;
110+ logger ?. info ( { commits : commits . length } , 'Extracted commits' ) ;
104111 } catch ( error ) {
105112 const message = `Failed to extract commits: ${ error instanceof Error ? error . message : String ( error ) } ` ;
106113 errors . push ( message ) ;
114+ logger ?. error ( { error : message } , 'Failed to extract commits' ) ;
107115 return {
108116 commitsIndexed : 0 ,
109117 durationMs : Date . now ( ) - startTime ,
@@ -112,6 +120,7 @@ export class GitIndexer {
112120 }
113121
114122 if ( commits . length === 0 ) {
123+ logger ?. info ( 'No commits to index' ) ;
115124 onProgress ?.( {
116125 phase : 'complete' ,
117126 commitsProcessed : 0 ,
@@ -126,6 +135,7 @@ export class GitIndexer {
126135 }
127136
128137 // Phase 2: Prepare documents for embedding
138+ logger ?. debug ( { commits : commits . length } , 'Preparing commit documents for embedding' ) ;
129139 onProgress ?.( {
130140 phase : 'embedding' ,
131141 commitsProcessed : 0 ,
@@ -136,6 +146,10 @@ export class GitIndexer {
136146 const documents = this . prepareCommitDocuments ( commits ) ;
137147
138148 // Phase 3: Store in batches
149+ logger ?. info (
150+ { documents : documents . length , batchSize : this . batchSize } ,
151+ 'Starting commit embedding'
152+ ) ;
139153 onProgress ?.( {
140154 phase : 'storing' ,
141155 commitsProcessed : 0 ,
@@ -144,26 +158,43 @@ export class GitIndexer {
144158 } ) ;
145159
146160 let commitsIndexed = 0 ;
161+ const totalBatches = Math . ceil ( documents . length / this . batchSize ) ;
147162 for ( let i = 0 ; i < documents . length ; i += this . batchSize ) {
148163 const batch = documents . slice ( i , i + this . batchSize ) ;
164+ const batchNum = Math . floor ( i / this . batchSize ) + 1 ;
149165
150166 try {
151167 await this . vectorStorage . addDocuments ( batch ) ;
152168 commitsIndexed += batch . length ;
153169
170+ // Log every 10 batches
171+ if ( batchNum % 10 === 0 || batchNum === totalBatches ) {
172+ logger ?. info (
173+ { batch : batchNum , totalBatches, commitsIndexed, total : commits . length } ,
174+ `Embedded ${ commitsIndexed } /${ commits . length } commits`
175+ ) ;
176+ }
177+
154178 onProgress ?.( {
155179 phase : 'storing' ,
156180 commitsProcessed : commitsIndexed ,
157181 totalCommits : commits . length ,
158182 percentComplete : 50 + ( commitsIndexed / commits . length ) * 50 ,
159183 } ) ;
160184 } catch ( error ) {
161- const message = `Failed to store batch ${ i / this . batchSize } : ${ error instanceof Error ? error . message : String ( error ) } ` ;
185+ const message = `Failed to store batch ${ batchNum } : ${ error instanceof Error ? error . message : String ( error ) } ` ;
162186 errors . push ( message ) ;
187+ logger ?. error ( { batch : batchNum , error : message } , 'Failed to store commit batch' ) ;
163188 }
164189 }
165190
166191 // Phase 4: Complete
192+ const durationMs = Date . now ( ) - startTime ;
193+ logger ?. info (
194+ { commitsIndexed, duration : `${ durationMs } ms` , errors : errors . length } ,
195+ 'Git indexing complete'
196+ ) ;
197+
167198 onProgress ?.( {
168199 phase : 'complete' ,
169200 commitsProcessed : commitsIndexed ,
@@ -173,7 +204,7 @@ export class GitIndexer {
173204
174205 return {
175206 commitsIndexed,
176- durationMs : Date . now ( ) - startTime ,
207+ durationMs,
177208 errors,
178209 } ;
179210 }
0 commit comments