Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .changeset/incremental-indexing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
"@lytics/dev-agent-core": patch
"@lytics/dev-agent": patch
---

Incremental indexing now works! `dev update` detects changed, new, and deleted files.

**What's new:**
- Only re-indexes files that actually changed (via content hash)
- Detects new files added since last index
- Cleans up documents for deleted files
- Removes orphaned symbols when code is modified

**Usage:**
```bash
dev index . # First run: full index
dev update # Fast incremental update
dev index . --force # Force full re-index
```
66 changes: 65 additions & 1 deletion packages/core/src/indexer/__tests__/indexer-edge.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,76 @@ describe('RepositoryIndexer - Edge Case Coverage', () => {

const stats = await indexer.update();

// Should handle gracefully
// Should handle gracefully - deleted files are cleaned up
expect(stats.duration).toBeGreaterThanOrEqual(0);

await indexer.close();
});

it('should handle incremental update with new, changed, and deleted files', async () => {
const repoDir = path.join(testDir, 'incremental-full');
await fs.mkdir(repoDir, { recursive: true });

// Create tsconfig for scanner
await fs.writeFile(
path.join(repoDir, 'tsconfig.json'),
JSON.stringify({ compilerOptions: { target: 'es2020', module: 'commonjs' } }),
'utf-8'
);

// Create initial files with extractable content (functions, not primitive constants)
await fs.writeFile(
path.join(repoDir, 'keep.ts'),
'export function keep() { return 1; }',
'utf-8'
);
await fs.writeFile(
path.join(repoDir, 'modify.ts'),
'export function modify() { return 1; }',
'utf-8'
);
await fs.writeFile(
path.join(repoDir, 'delete.ts'),
'export function del() { return 1; }',
'utf-8'
);

const indexer = new RepositoryIndexer({
repositoryPath: repoDir,
vectorStorePath: path.join(testDir, 'incremental-full.lance'),
});

await indexer.initialize();
const initialStats = await indexer.index();
expect(initialStats.documentsExtracted).toBe(3);

// Make changes:
// 1. Add new file
await fs.writeFile(
path.join(repoDir, 'new.ts'),
'export function newFile() { return 1; }',
'utf-8'
);
// 2. Modify existing file
await fs.writeFile(
path.join(repoDir, 'modify.ts'),
'export function modify() { return 2; }',
'utf-8'
);
// 3. Delete a file
await fs.unlink(path.join(repoDir, 'delete.ts'));

// Update should detect all changes
const updateStats = await indexer.update();

// Should have processed: 1 new + 1 modified = 2 files
// (deleted files don't count as "scanned")
expect(updateStats.filesScanned).toBe(2);
expect(updateStats.documentsIndexed).toBeGreaterThanOrEqual(2);

await indexer.close();
});

it('should handle since date filtering in detectChangedFiles', async () => {
const repoDir = path.join(testDir, 'since-filter');
await fs.mkdir(repoDir, { recursive: true });
Expand Down
24 changes: 17 additions & 7 deletions packages/core/src/indexer/__tests__/indexer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,16 @@ This is a test repository for indexing.`,
const updateDir = path.join(testDir, 'update-test');
await fs.mkdir(updateDir, { recursive: true });

// Create tsconfig for scanner
await fs.writeFile(
path.join(updateDir, 'tsconfig.json'),
JSON.stringify({ compilerOptions: { target: 'es2020', module: 'commonjs' } }),
'utf-8'
);

await fs.writeFile(
path.join(updateDir, 'original.ts'),
'export const original = true;',
'export function original() { return true; }',
'utf-8'
);

Expand All @@ -256,20 +263,23 @@ This is a test repository for indexing.`,

// Initial index
const initialStats = await indexer.index();
expect(initialStats.filesScanned).toBeGreaterThanOrEqual(0);
expect(initialStats.documentsExtracted).toBeGreaterThanOrEqual(1);

// No changes - update should find nothing
const updateStats1 = await indexer.update();
expect(updateStats1.filesScanned).toBe(0);

// Add a new file
await fs.writeFile(path.join(updateDir, 'new.ts'), 'export const newFile = true;', 'utf-8');
await fs.writeFile(
path.join(updateDir, 'new.ts'),
'export function newFile() { return true; }',
'utf-8'
);

// Update should detect new file
// Note: Current implementation does full scan, not true incremental
// This tests the update() method exists and works
// Update should detect and index new file
const updateStats2 = await indexer.update();
expect(updateStats2.filesScanned).toBeGreaterThanOrEqual(0);
expect(updateStats2.filesScanned).toBe(1);
expect(updateStats2.documentsIndexed).toBeGreaterThanOrEqual(1);

await indexer.close();
});
Expand Down
110 changes: 83 additions & 27 deletions packages/core/src/indexer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,10 @@ export class RepositoryIndexer {
const errors: IndexError[] = [];

// Determine which files need reindexing
const filesToReindex = await this.detectChangedFiles(options.since);
const { changed, added, deleted } = await this.detectChangedFiles(options.since);
const filesToReindex = [...changed, ...added];

if (filesToReindex.length === 0) {
if (filesToReindex.length === 0 && deleted.length === 0) {
// No changes, return empty stats
return {
filesScanned: 0,
Expand All @@ -205,21 +206,33 @@ export class RepositoryIndexer {
};
}

// Scan only changed files
const scanResult = await scanRepository({
repoRoot: this.config.repositoryPath,
include: filesToReindex,
exclude: this.config.excludePatterns,
});
// Delete documents for deleted files
for (const file of deleted) {
const oldMetadata = this.state.files[file];
if (oldMetadata?.documentIds) {
try {
await this.vectorStorage.deleteDocuments(oldMetadata.documentIds);
} catch (error) {
errors.push({
type: 'storage',
message: `Failed to delete documents for removed file ${file}`,
file,
error: error instanceof Error ? error : undefined,
timestamp: new Date(),
});
}
}
// Remove from state
delete this.state.files[file];
}

// Remove old documents from these files
for (const file of filesToReindex) {
// Delete old documents for changed files (not added - they have no old docs)
for (const file of changed) {
const oldMetadata = this.state.files[file];
if (oldMetadata?.documentIds) {
try {
await this.vectorStorage.deleteDocuments(oldMetadata.documentIds);
} catch (error) {
// Delete not implemented yet, just log
errors.push({
type: 'storage',
message: `Failed to delete old documents for ${file}`,
Expand All @@ -231,19 +244,37 @@ export class RepositoryIndexer {
}
}

// Index new documents
const embeddingDocuments = prepareDocumentsForEmbedding(scanResult.documents);
await this.vectorStorage.addDocuments(embeddingDocuments);
// Scan and index changed + added files
let documentsExtracted = 0;
let documentsIndexed = 0;

if (filesToReindex.length > 0) {
const scanResult = await scanRepository({
repoRoot: this.config.repositoryPath,
include: filesToReindex,
exclude: this.config.excludePatterns,
});

documentsExtracted = scanResult.documents.length;

// Index new documents
const embeddingDocuments = prepareDocumentsForEmbedding(scanResult.documents);
await this.vectorStorage.addDocuments(embeddingDocuments);
documentsIndexed = embeddingDocuments.length;

// Update state
await this.updateState(scanResult.documents);
// Update state with new documents
await this.updateState(scanResult.documents);
} else {
// Only deletions - still need to save state
await this.saveState();
}

const endTime = new Date();
return {
filesScanned: filesToReindex.length,
documentsExtracted: scanResult.documents.length,
documentsIndexed: embeddingDocuments.length,
vectorsStored: embeddingDocuments.length,
documentsExtracted,
documentsIndexed,
vectorsStored: documentsIndexed,
duration: endTime.getTime() - startTime.getTime(),
errors,
startTime,
Expand Down Expand Up @@ -396,15 +427,21 @@ export class RepositoryIndexer {
}

/**
* Detect files that have changed since last index
* Detect files that have changed, been added, or deleted since last index
*/
private async detectChangedFiles(since?: Date): Promise<string[]> {
private async detectChangedFiles(since?: Date): Promise<{
changed: string[];
added: string[];
deleted: string[];
}> {
if (!this.state) {
return [];
return { changed: [], added: [], deleted: [] };
}

const changedFiles: string[] = [];
const changed: string[] = [];
const deleted: string[] = [];

// Check existing tracked files for changes or deletion
for (const [filePath, metadata] of Object.entries(this.state.files)) {
const fullPath = path.join(this.config.repositoryPath, filePath);

Expand All @@ -421,15 +458,34 @@ export class RepositoryIndexer {
const currentHash = crypto.createHash('sha256').update(content).digest('hex');

if (currentHash !== metadata.hash) {
changedFiles.push(filePath);
changed.push(filePath);
}
} catch {
// File no longer exists or not readable
changedFiles.push(filePath);
// File no longer exists or not readable - mark as deleted
deleted.push(filePath);
}
}

return changedFiles;
// Scan for new files not in state
const scanResult = await scanRepository({
repoRoot: this.config.repositoryPath,
exclude: this.config.excludePatterns,
});

const trackedFiles = new Set(Object.keys(this.state.files));
const added: string[] = [];

for (const doc of scanResult.documents) {
const filePath = doc.metadata.file;
if (!trackedFiles.has(filePath)) {
added.push(filePath);
}
}

// Deduplicate added files (multiple docs per file)
const uniqueAdded = [...new Set(added)];

return { changed, added: uniqueAdded, deleted };
}

/**
Expand Down
19 changes: 16 additions & 3 deletions packages/core/src/vector/__tests__/vector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,22 @@ describe('Vector Storage', () => {
expect(stats.totalDocuments).toBeGreaterThanOrEqual(50);
});

it('should throw error on delete (not supported)', async () => {
// Delete is not supported - use upsert instead
await expect(vectorStorage.deleteDocuments(['any-id'])).rejects.toThrow('not supported');
it('should delete documents by ID', async () => {
// Add a document to delete
await vectorStorage.addDocuments([
{ id: 'to-delete', text: 'This document will be deleted', metadata: { temp: true } },
]);

// Verify it exists
const beforeDelete = await vectorStorage.getDocument('to-delete');
expect(beforeDelete).toBeDefined();

// Delete it
await vectorStorage.deleteDocuments(['to-delete']);

// Verify it's gone
const afterDelete = await vectorStorage.getDocument('to-delete');
expect(afterDelete).toBeNull();
});

it('should handle empty document array', async () => {
Expand Down
12 changes: 5 additions & 7 deletions packages/core/src/vector/store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,11 @@ export class LanceDBVectorStore implements VectorStore {
}

try {
// LanceDB delete requires filtering by a predicate, not by ID list
// This would need a schema change to support proper deletion
// For now, we recommend using upsert (mergeInsert) instead of delete+insert
// See: https://lancedb.github.io/lancedb/guides/tables/#deleting-rows
throw new Error(
'Delete operation not supported. Use upsert via addDocuments() with existing IDs instead.'
);
// Delete using SQL IN predicate
// Escape single quotes in IDs to prevent SQL injection
const escapedIds = ids.map((id) => id.replace(/'/g, "''"));
const predicate = `id IN ('${escapedIds.join("', '")}')`;
await this.table.delete(predicate);
} catch (error) {
throw new Error(
`Failed to delete documents: ${error instanceof Error ? error.message : String(error)}`
Expand Down
2 changes: 1 addition & 1 deletion website/content/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Local semantic code search for Cursor and Claude Code via MCP.
[Get Started](/docs) · [View on GitHub](https://github.com/lytics/dev-agent)

<Callout type="info">
**v0.5.0 coming soon** — Arrow functions, React hooks, and exported constants now indexed. [See what's new →](/updates)
**New in v0.5.1** — Incremental indexing! Only re-index files that actually changed. [See what's new →](/updates)
</Callout>

<Callout type="default">
Expand Down
Loading