Skip to content

Commit 5a81523

Browse files
committed
add delete_index tool, CRLF normalization, non-UTF-8 detection, and UNC path validation
Adds a delete_index tool (gated by DISABLE_DESTRUCTIVE env var) to remove directory indexes, normalizes CRLF line endings before chunking for consistent results, skips non-UTF-8 files during indexing with a warning, and validates Windows UNC path format in path validation.
1 parent 78cb1b5 commit 5a81523

File tree

8 files changed

+327
-6
lines changed

8 files changed

+327
-6
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ yarn-error.log*
1818
*.db
1919
*.sqlite
2020
*.sqlite3
21+
*.sqlite-wal
22+
*.sqlite-shm
2123

2224
# Configuration (except examples)
2325
config.json

src/indexing.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
import { loadGitignoreRules } from './gitignore.js';
1515
import { generateEmbedding } from './embedding.js';
1616
import { initializeStorage, FileRecord } from './storage.js';
17+
import { log } from './logger.js';
1718

1819
export interface ScanOptions {
1920
ignorePatterns: string[];
@@ -251,7 +252,18 @@ export async function indexDirectories(paths: string[], config: Config): Promise
251252
await qdrant.deletePointsByFilePath(file.path);
252253
}
253254

254-
const content = await fs.readFile(file.path, 'utf-8');
255+
const rawContent = await fs.readFile(file.path, 'utf-8');
256+
257+
// Skip non-UTF-8 files (Node.js inserts U+FFFD for invalid byte sequences)
258+
if (rawContent.includes('\uFFFD')) {
259+
log('warning', 'Skipping non-UTF-8 file', { path: file.path });
260+
await sqlite.upsertFile(file, [], ['Skipped: file appears to be non-UTF-8 encoded']);
261+
skipped++;
262+
continue;
263+
}
264+
265+
// Normalize CRLF to LF for consistent chunk boundaries
266+
const content = rawContent.replace(/\r\n/g, '\n');
255267
const chunks = chunkText(content, config.indexing.chunkSize, config.indexing.chunkOverlap);
256268

257269
// Store file metadata in SQLite

src/mcp-handlers.ts

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ interface GetChunkToolArgs {
7474
chunk_id: string;
7575
}
7676

77+
interface DeleteIndexToolArgs {
78+
directory_path: string;
79+
}
80+
7781
// Type guard functions
7882
function isIndexToolArgs(args: unknown): args is IndexToolArgs {
7983
return typeof args === 'object' && args !== null &&
@@ -96,11 +100,16 @@ function isGetContentToolArgs(args: unknown): args is GetContentToolArgs {
96100
}
97101

98102
function isGetChunkToolArgs(args: unknown): args is GetChunkToolArgs {
99-
return typeof args === 'object' && args !== null &&
103+
return typeof args === 'object' && args !== null &&
100104
typeof (args as GetChunkToolArgs).file_path === 'string' &&
101105
typeof (args as GetChunkToolArgs).chunk_id === 'string';
102106
}
103107

108+
function isDeleteIndexToolArgs(args: unknown): args is DeleteIndexToolArgs {
109+
return typeof args === 'object' && args !== null &&
110+
typeof (args as DeleteIndexToolArgs).directory_path === 'string';
111+
}
112+
104113
export async function handleIndexTool(args: unknown, config: Config): Promise<CallToolResult> {
105114
if (!isIndexToolArgs(args)) {
106115
throw new Error('directory_paths is required and must be an array');
@@ -319,6 +328,61 @@ export async function handleServerInfoTool(version: string): Promise<CallToolRes
319328
};
320329
}
321330

331+
export async function handleDeleteIndexTool(args: unknown, config: Config): Promise<CallToolResult> {
332+
if (!isDeleteIndexToolArgs(args)) {
333+
throw new Error('directory_path is required');
334+
}
335+
336+
const dirPath = args.directory_path.trim();
337+
const { sqlite, qdrant } = await initializeStorage(config);
338+
339+
try {
340+
// Check if the directory is actually indexed
341+
const directory = await sqlite.getDirectory(dirPath);
342+
if (!directory) {
343+
throw new Error(
344+
`Directory '${dirPath}' is not indexed. Use 'server_info' to see indexed directories.`
345+
);
346+
}
347+
348+
// Get files for this directory to clean up Qdrant points
349+
const files = await sqlite.getFilesByDirectory(dirPath);
350+
for (const file of files) {
351+
try {
352+
await qdrant.deletePointsByFilePath(file.path);
353+
} catch (error) {
354+
log('warning', 'Failed to delete Qdrant points for file', {
355+
path: file.path,
356+
error: error instanceof Error ? error.message : String(error)
357+
});
358+
}
359+
}
360+
361+
// Delete file records and directory record from SQLite
362+
const deletedFiles = sqlite.deleteFilesByDirectory(dirPath);
363+
sqlite.deleteDirectory(dirPath);
364+
365+
// Refresh the indexed directories cache
366+
refreshIndexedDirsCache(sqlite);
367+
368+
const chunksCount = files.reduce((sum, f) => sum + (f.chunks?.length || 0), 0);
369+
370+
log('info', 'Index deleted', { directory: dirPath, files: deletedFiles, chunks: chunksCount });
371+
mcpServer?.sendLoggingMessage({ level: 'info', data: { event: 'index_deleted', directory: dirPath } });
372+
373+
return {
374+
content: [
375+
{
376+
type: 'text',
377+
text: `Deleted index for ${dirPath}: removed ${deletedFiles} files and ${chunksCount} chunks`
378+
}
379+
]
380+
};
381+
} finally {
382+
sqlite.close();
383+
}
384+
}
385+
322386
export function formatErrorResponse(error: unknown): CallToolResult {
323387
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
324388
log('error', 'Tool error', { error: errorMessage });

src/mcp.ts

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
handleGetContentTool,
1919
handleGetChunkTool,
2020
handleServerInfoTool,
21+
handleDeleteIndexTool,
2122
formatErrorResponse,
2223
setMcpServer
2324
} from './mcp-handlers.js';
@@ -28,7 +29,46 @@ const packageJsonPath = join(__dirname, '../package.json');
2829
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
2930
const VERSION = packageJson.version;
3031

31-
const MCP_TOOLS: Tool[] = [
32+
const DELETE_INDEX_TOOL: Tool = {
33+
name: 'delete_index',
34+
description: `Remove the index for a directory. Deletes all file records, vector embeddings, and the directory entry from the database.
35+
36+
When to use this tool:
37+
- User wants to remove a directory from the search index
38+
- Cleaning up old or irrelevant indexed content
39+
- Freeing database space by removing unused indexes
40+
41+
How it works:
42+
- Removes all file records for the specified directory from SQLite
43+
- Deletes corresponding vector embeddings from Qdrant
44+
- Removes the directory entry from the directories table
45+
- Does NOT delete the actual files on disk
46+
47+
Examples:
48+
- Remove old project: directory_path="/home/user/old-project"
49+
- Clean up test data: directory_path="/home/user/test-files"
50+
51+
Use server_info to see what directories are currently indexed before removing.`,
52+
inputSchema: {
53+
type: 'object',
54+
properties: {
55+
directory_path: {
56+
type: 'string',
57+
description: 'Absolute path of the directory whose index should be removed'
58+
}
59+
},
60+
required: ['directory_path']
61+
},
62+
annotations: {
63+
readOnlyHint: false,
64+
destructiveHint: true,
65+
idempotentHint: true,
66+
openWorldHint: false
67+
}
68+
};
69+
70+
export function getMcpTools(): Tool[] {
71+
const tools: Tool[] = [
3272
{
3373
name: 'index',
3474
description: `Index directories to make their files searchable. Processes files to create vector embeddings for semantic search.
@@ -296,7 +336,15 @@ Returns server version, indexing statistics, directory list, workspace informati
296336
openWorldHint: false
297337
}
298338
}
299-
];
339+
];
340+
341+
// Include delete_index tool unless DISABLE_DESTRUCTIVE is set
342+
if (process.env.DISABLE_DESTRUCTIVE !== 'true') {
343+
tools.push(DELETE_INDEX_TOOL);
344+
}
345+
346+
return tools;
347+
}
300348

301349
export async function startMcpServer(config: Config): Promise<void> {
302350
initLogLevel();
@@ -318,7 +366,7 @@ export async function startMcpServer(config: Config): Promise<void> {
318366

319367
server.setRequestHandler(ListToolsRequestSchema, async () => {
320368
return {
321-
tools: MCP_TOOLS
369+
tools: getMcpTools()
322370
};
323371
});
324372

@@ -344,7 +392,10 @@ export async function startMcpServer(config: Config): Promise<void> {
344392

345393
case 'server_info':
346394
return await handleServerInfoTool(VERSION);
347-
395+
396+
case 'delete_index':
397+
return await handleDeleteIndexTool(args, config);
398+
348399
default:
349400
throw new Error(`Unknown tool: ${name}`);
350401
}

src/path-validation.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ export function validatePathWithinIndexedDirs(filePath: string, indexedDirs: Set
1212
throw new Error('Access denied: path contains null bytes');
1313
}
1414

15+
// Validate Windows UNC path format if it starts with \\
16+
if (filePath.startsWith('\\\\')) {
17+
const uncParts = filePath.split('\\').filter(Boolean);
18+
if (uncParts.length < 2) {
19+
throw new Error('Invalid UNC path format: expected \\\\server\\share\\... pattern');
20+
}
21+
}
22+
1523
// Resolve the path, following symlinks if the file exists
1624
let resolved: string;
1725
try {

src/storage.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,17 @@ export class SQLiteStorage {
457457
}
458458
}
459459

460+
deleteDirectory(path: string): void {
461+
this.db.prepare('DELETE FROM directories WHERE path = ?').run(path);
462+
}
463+
464+
deleteFilesByDirectory(directoryPath: string): number {
465+
const result = this.db.prepare('DELETE FROM files WHERE path = ? OR path LIKE ?').run(
466+
directoryPath, `${directoryPath}/%`
467+
);
468+
return result.changes;
469+
}
470+
460471
close(): void {
461472
openStorageInstances.delete(this);
462473
this.db.close();

0 commit comments

Comments
 (0)