Skip to content

Commit 0a798d8

Browse files
will-lamertonclaude
andcommitted
fix: prevent binary/huge files from polluting LLM context when @-tagged
Return file metadata (type, size, modified date) instead of raw content for binary files and files exceeding 512 KB, protecting context without changing the user-facing tagging experience. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9f5a5a3 commit 0a798d8

File tree

3 files changed

+158
-1
lines changed

3 files changed

+158
-1
lines changed

source/constants.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,62 @@ export const COST_SCORE_CHEAP = 7;
131131
export const COST_SCORE_MODERATE = 5;
132132
export const COST_SCORE_EXPENSIVE = 3;
133133

134+
// === FILE TAGGING ===
135+
export const MAX_FILE_TAG_SIZE_BYTES = 512_000; // 512 KB
136+
export const BINARY_FILE_EXTENSIONS = new Set([
137+
// Images
138+
'.gif',
139+
'.png',
140+
'.jpg',
141+
'.jpeg',
142+
'.ico',
143+
'.bmp',
144+
'.webp',
145+
'.svg',
146+
'.tiff',
147+
// Media
148+
'.mp3',
149+
'.mp4',
150+
'.mov',
151+
'.avi',
152+
'.wav',
153+
'.flac',
154+
'.ogg',
155+
'.webm',
156+
// Archives
157+
'.zip',
158+
'.tar',
159+
'.gz',
160+
'.rar',
161+
'.7z',
162+
// Executables
163+
'.exe',
164+
'.dll',
165+
'.so',
166+
'.dylib',
167+
'.wasm',
168+
// Documents
169+
'.pdf',
170+
'.doc',
171+
'.docx',
172+
'.xls',
173+
'.xlsx',
174+
'.ppt',
175+
'.pptx',
176+
// Fonts
177+
'.woff',
178+
'.woff2',
179+
'.ttf',
180+
'.otf',
181+
'.eot',
182+
// Other
183+
'.bin',
184+
'.dat',
185+
'.o',
186+
'.class',
187+
'.pyc',
188+
]);
189+
134190
// === FILE EXPLORER ===
135191
export const FILE_EXPLORER_VISIBLE_ITEMS = 15;
136192
export const FILE_EXPLORER_TOKEN_WARNING_THRESHOLD = 10000;

source/utils/file-content-loader.spec.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import {mkdir, rm, writeFile} from 'node:fs/promises';
22
import {tmpdir} from 'node:os';
33
import {join} from 'node:path';
44
import test from 'ava';
5+
import {MAX_FILE_TAG_SIZE_BYTES} from '@/constants.js';
56
import {loadFileContent} from './file-content-loader.js';
67

78
console.log(`\nfile-content-loader.spec.ts`);
@@ -26,6 +27,13 @@ test.before(async () => {
2627
'import React from "react";\n\nexport function App() {\n return <div>Hello</div>;\n}',
2728
'utf-8',
2829
);
30+
31+
// Create a binary file (by extension)
32+
await writeFile(join(testDir, 'image.gif'), Buffer.from('GIF89a'));
33+
34+
// Create a large text file exceeding the size limit
35+
const largeContent = 'x'.repeat(MAX_FILE_TAG_SIZE_BYTES + 1);
36+
await writeFile(join(testDir, 'huge.csv'), largeContent, 'utf-8');
2937
});
3038

3139
test.after.always(async () => {
@@ -133,3 +141,36 @@ test('stores absolute path in metadata', async t => {
133141
t.true(result.metadata.absolutePath.length >= relativePath.length);
134142
});
135143

144+
test('returns metadata for binary file extensions', async t => {
145+
const result = await loadFileContent(join(testDir, 'image.gif'));
146+
147+
t.true(result.success);
148+
t.truthy(result.content);
149+
t.true(result.content!.includes('[Binary file:'));
150+
t.true(result.content!.includes('Type: GIF'));
151+
t.true(result.content!.includes('Binary files cannot be included'));
152+
t.is(result.metadata.lineCount, 0);
153+
});
154+
155+
test('returns metadata for files exceeding size limit', async t => {
156+
const result = await loadFileContent(join(testDir, 'huge.csv'));
157+
158+
t.true(result.success);
159+
t.truthy(result.content);
160+
t.true(result.content!.includes('[Large file:'));
161+
t.true(result.content!.includes('exceeds'));
162+
t.true(result.content!.includes('limit for inline tagging'));
163+
t.is(result.metadata.lineCount, 0);
164+
});
165+
166+
test('allows large files when line range is specified', async t => {
167+
const result = await loadFileContent(join(testDir, 'huge.csv'), {
168+
start: 1,
169+
end: 1,
170+
});
171+
172+
t.true(result.success);
173+
// Should load the content (not metadata) since a line range was given
174+
t.true(result.content!.startsWith('Path:'));
175+
});
176+

source/utils/file-content-loader.ts

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import {readFile, stat} from 'node:fs/promises';
2-
import {resolve} from 'node:path';
2+
import {extname, resolve} from 'node:path';
3+
import {BINARY_FILE_EXTENSIONS, MAX_FILE_TAG_SIZE_BYTES} from '@/constants.js';
34

45
interface FileContentResult {
56
success: boolean;
@@ -45,6 +46,59 @@ export async function loadFileContent(
4546
};
4647
}
4748

49+
// Binary extension check
50+
const ext = extname(absPath).toLowerCase();
51+
if (BINARY_FILE_EXTENSIONS.has(ext)) {
52+
const fileType = ext.slice(1).toUpperCase();
53+
const metadataContent = [
54+
`[Binary file: ${filePath}]`,
55+
`Type: ${fileType}`,
56+
`Size: ${fileStats.size.toLocaleString()} bytes (${formatBytes(fileStats.size)})`,
57+
`Last Modified: ${fileStats.mtime.toISOString()}`,
58+
'',
59+
'(Binary files cannot be included as text content)',
60+
].join('\n');
61+
62+
return {
63+
success: true,
64+
content: metadataContent,
65+
metadata: {
66+
path: filePath,
67+
absolutePath: absPath,
68+
size: fileStats.size,
69+
lineCount: 0,
70+
lineRange,
71+
tokens: Math.ceil(metadataContent.length / 4),
72+
},
73+
};
74+
}
75+
76+
// Size check
77+
if (fileStats.size > MAX_FILE_TAG_SIZE_BYTES && !lineRange) {
78+
const estimatedLines = Math.round(fileStats.size / 40);
79+
const metadataContent = [
80+
`[Large file: ${filePath}]`,
81+
`Size: ${fileStats.size.toLocaleString()} bytes (${formatBytes(fileStats.size)})`,
82+
`Lines: ~${estimatedLines.toLocaleString()}`,
83+
`Last Modified: ${fileStats.mtime.toISOString()}`,
84+
'',
85+
`(File exceeds ${formatBytes(MAX_FILE_TAG_SIZE_BYTES)} limit for inline tagging. Use @file:1-100 to tag specific line ranges)`,
86+
].join('\n');
87+
88+
return {
89+
success: true,
90+
content: metadataContent,
91+
metadata: {
92+
path: filePath,
93+
absolutePath: absPath,
94+
size: fileStats.size,
95+
lineCount: 0,
96+
lineRange,
97+
tokens: Math.ceil(metadataContent.length / 4),
98+
},
99+
};
100+
}
101+
48102
// Read file content
49103
let content: string;
50104
try {
@@ -134,3 +188,9 @@ export async function loadFileContent(
134188
function formatFileContent(lines: string[], filePath: string): string {
135189
return `Path: ${filePath}\n\n${lines.join('\n')}`;
136190
}
191+
192+
function formatBytes(bytes: number): string {
193+
if (bytes < 1024) return `${bytes} B`;
194+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
195+
return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
196+
}

0 commit comments

Comments
 (0)