@@ -130,79 +130,96 @@ export class CodeIndexOrchestrator {
130130 await this . cacheManager . clearCacheFile ( )
131131 }
132132
133- this . stateManager . setSystemState ( "Indexing" , "Services ready. Starting workspace scan..." )
133+ // Check if the collection already has indexed data
134+ // If it does, we can skip the full scan and just start the watcher
135+ const hasExistingData = await this . vectorStore . hasIndexedData ( )
136+
137+ if ( hasExistingData && ! collectionCreated ) {
138+ // Collection exists with data - skip the full scan
139+ console . log (
140+ "[CodeIndexOrchestrator] Collection already has indexed data. Skipping full scan and starting file watcher." ,
141+ )
142+ this . stateManager . setSystemState ( "Indexing" , "Resuming from existing index..." )
134143
135- let cumulativeBlocksIndexed = 0
136- let cumulativeBlocksFoundSoFar = 0
137- let batchErrors : Error [ ] = [ ]
144+ await this . _startWatcher ( )
138145
139- const handleFileParsed = ( fileBlockCount : number ) => {
140- cumulativeBlocksFoundSoFar += fileBlockCount
141- this . stateManager . reportBlockIndexingProgress ( cumulativeBlocksIndexed , cumulativeBlocksFoundSoFar )
142- }
146+ this . stateManager . setSystemState ( "Indexed" , t ( "embeddings:orchestrator.fileWatcherStarted" ) )
147+ } else {
148+ // No existing data or collection was just created - do a full scan
149+ this . stateManager . setSystemState ( "Indexing" , "Services ready. Starting workspace scan..." )
143150
144- const handleBlocksIndexed = ( indexedCount : number ) => {
145- cumulativeBlocksIndexed += indexedCount
146- this . stateManager . reportBlockIndexingProgress ( cumulativeBlocksIndexed , cumulativeBlocksFoundSoFar )
147- }
151+ let cumulativeBlocksIndexed = 0
152+ let cumulativeBlocksFoundSoFar = 0
153+ let batchErrors : Error [ ] = [ ]
148154
149- const result = await this . scanner . scanDirectory (
150- this . workspacePath ,
151- ( batchError : Error ) => {
152- console . error (
153- `[CodeIndexOrchestrator] Error during initial scan batch: ${ batchError . message } ` ,
154- batchError ,
155- )
156- batchErrors . push ( batchError )
157- } ,
158- handleBlocksIndexed ,
159- handleFileParsed ,
160- )
155+ const handleFileParsed = ( fileBlockCount : number ) => {
156+ cumulativeBlocksFoundSoFar += fileBlockCount
157+ this . stateManager . reportBlockIndexingProgress ( cumulativeBlocksIndexed , cumulativeBlocksFoundSoFar )
158+ }
161159
162- if ( ! result ) {
163- throw new Error ( "Scan failed, is scanner initialized?" )
164- }
160+ const handleBlocksIndexed = ( indexedCount : number ) => {
161+ cumulativeBlocksIndexed += indexedCount
162+ this . stateManager . reportBlockIndexingProgress ( cumulativeBlocksIndexed , cumulativeBlocksFoundSoFar )
163+ }
165164
166- const { stats } = result
165+ const result = await this . scanner . scanDirectory (
166+ this . workspacePath ,
167+ ( batchError : Error ) => {
168+ console . error (
169+ `[CodeIndexOrchestrator] Error during initial scan batch: ${ batchError . message } ` ,
170+ batchError ,
171+ )
172+ batchErrors . push ( batchError )
173+ } ,
174+ handleBlocksIndexed ,
175+ handleFileParsed ,
176+ )
167177
168- // Check if any blocks were actually indexed successfully
169- // If no blocks were indexed but blocks were found, it means all batches failed
170- if ( cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0 ) {
171- if ( batchErrors . length > 0 ) {
172- // Use the first batch error as it's likely representative of the main issue
173- const firstError = batchErrors [ 0 ]
174- throw new Error ( `Indexing failed: ${ firstError . message } ` )
175- } else {
176- throw new Error ( t ( "embeddings:orchestrator.indexingFailedNoBlocks" ) )
178+ if ( ! result ) {
179+ throw new Error ( "Scan failed, is scanner initialized?" )
177180 }
178- }
179181
180- // Check for partial failures - if a significant portion of blocks failed
181- const failureRate = ( cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed ) / cumulativeBlocksFoundSoFar
182- if ( batchErrors . length > 0 && failureRate > 0.1 ) {
183- // More than 10% of blocks failed to index
184- const firstError = batchErrors [ 0 ]
185- throw new Error (
186- `Indexing partially failed: Only ${ cumulativeBlocksIndexed } of ${ cumulativeBlocksFoundSoFar } blocks were indexed. ${ firstError . message } ` ,
187- )
188- }
182+ const { stats } = result
189183
190- // CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
191- // this is a complete failure regardless of the failure rate calculation
192- if ( batchErrors . length > 0 && cumulativeBlocksIndexed === 0 ) {
193- const firstError = batchErrors [ 0 ]
194- throw new Error ( `Indexing failed completely: ${ firstError . message } ` )
195- }
184+ // Check if any blocks were actually indexed successfully
185+ // If no blocks were indexed but blocks were found, it means all batches failed
186+ if ( cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0 ) {
187+ if ( batchErrors . length > 0 ) {
188+ // Use the first batch error as it's likely representative of the main issue
189+ const firstError = batchErrors [ 0 ]
190+ throw new Error ( `Indexing failed: ${ firstError . message } ` )
191+ } else {
192+ throw new Error ( t ( "embeddings:orchestrator.indexingFailedNoBlocks" ) )
193+ }
194+ }
196195
197- // Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
198- // this is still a failure
199- if ( cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0 ) {
200- throw new Error ( t ( "embeddings:orchestrator.indexingFailedCritical" ) )
201- }
196+ // Check for partial failures - if a significant portion of blocks failed
197+ const failureRate = ( cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed ) / cumulativeBlocksFoundSoFar
198+ if ( batchErrors . length > 0 && failureRate > 0.1 ) {
199+ // More than 10% of blocks failed to index
200+ const firstError = batchErrors [ 0 ]
201+ throw new Error (
202+ `Indexing partially failed: Only ${ cumulativeBlocksIndexed } of ${ cumulativeBlocksFoundSoFar } blocks were indexed. ${ firstError . message } ` ,
203+ )
204+ }
202205
203- await this . _startWatcher ( )
206+ // CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
207+ // this is a complete failure regardless of the failure rate calculation
208+ if ( batchErrors . length > 0 && cumulativeBlocksIndexed === 0 ) {
209+ const firstError = batchErrors [ 0 ]
210+ throw new Error ( `Indexing failed completely: ${ firstError . message } ` )
211+ }
204212
205- this . stateManager . setSystemState ( "Indexed" , t ( "embeddings:orchestrator.fileWatcherStarted" ) )
213+ // Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
214+ // this is still a failure
215+ if ( cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0 ) {
216+ throw new Error ( t ( "embeddings:orchestrator.indexingFailedCritical" ) )
217+ }
218+
219+ await this . _startWatcher ( )
220+
221+ this . stateManager . setSystemState ( "Indexed" , t ( "embeddings:orchestrator.fileWatcherStarted" ) )
222+ }
206223 } catch ( error : any ) {
207224 console . error ( "[CodeIndexOrchestrator] Error during indexing:" , error )
208225 TelemetryService . instance . captureEvent ( TelemetryEventName . CODE_INDEX_ERROR , {
0 commit comments