Skip to content

Commit bd114b2

Browse files
committed
feat: Improve file filtering logic to skip binary, minified, and large files for better performance
1 parent 1b95294 commit bd114b2

File tree

1 file changed

+91
-9
lines changed

1 file changed

+91
-9
lines changed

index.js

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,11 +1033,34 @@
10331033
const textFiles = [];
10341034
const binaryFiles = []; // This list is now only for stats/display in the tree, not for content inclusion
10351035
files.forEach(f => {
1036+
// Skip binary files
10361037
if (isBinary(f.name)) {
1037-
binaryFiles.push(f); // Keep for tree display, but not for content generation
1038-
} else {
1039-
textFiles.push(f); // Only these will be read and included in the context
1038+
binaryFiles.push(f);
1039+
return;
1040+
}
1041+
1042+
// Skip minified/compiled files
1043+
if (isMinified(f.name)) {
1044+
console.log('Skipping minified file:', f.name);
1045+
return;
1046+
}
1047+
1048+
// Skip files that shouldn't be in output
1049+
const filename = f.name.toLowerCase();
1050+
1051+
// Skip SVG files (can contain binary/encoded data)
1052+
if (filename.endsWith('.svg')) {
1053+
binaryFiles.push(f);
1054+
return;
10401055
}
1056+
1057+
// Skip large files that might be compiled/minified
1058+
if (f.size > 500 * 1024) { // 500KB
1059+
console.log('Skipping large file (possibly compiled):', f.name, bytes(f.size));
1060+
return;
1061+
}
1062+
1063+
textFiles.push(f);
10411064
});
10421065

10431066
const totalTextSize = textFiles.reduce((s, f) => s + f.size, 0);
@@ -1073,9 +1096,41 @@
10731096
);
10741097
results.forEach((result, idx) => {
10751098
if (result.status === 'fulfilled') {
1099+
const content = result.value;
1100+
const path = batch[idx].path;
1101+
const filename = batch[idx].name;
1102+
1103+
// Validate content is actually text (not binary disguised as text)
1104+
// Check for null bytes or high ratio of non-printable characters
1105+
if (content.indexOf('\0') !== -1) {
1106+
console.warn('Skipping file with null bytes (binary):', filename);
1107+
failed++;
1108+
return;
1109+
}
1110+
1111+
// Skip if content looks like base64 encoded data (common in compiled files)
1112+
const lines = content.split('\n');
1113+
const longBase64Lines = lines.filter(line =>
1114+
line.length > 200 && /^[A-Za-z0-9+/=]+$/.test(line.trim())
1115+
).length;
1116+
1117+
if (longBase64Lines > 10) {
1118+
console.warn('Skipping file with encoded data:', filename);
1119+
failed++;
1120+
return;
1121+
}
1122+
1123+
// Skip extremely long single lines (typical of minified code)
1124+
const hasVeryLongLine = lines.some(line => line.length > 10000);
1125+
if (hasVeryLongLine) {
1126+
console.warn('Skipping file with very long lines (minified):', filename);
1127+
failed++;
1128+
return;
1129+
}
1130+
10761131
contents.push({
1077-
path: batch[idx].path,
1078-
content: result.value
1132+
path: path,
1133+
content: content
10791134
});
10801135
} else {
10811136
console.warn('Skip', batch[idx].name, result.reason);
@@ -1165,12 +1220,39 @@
11651220
);
11661221
results.forEach((result, idx) => {
11671222
if (result.status === 'fulfilled') {
1223+
const content = result.value;
11681224
const p = batch[idx].path;
1169-
parts.push(`=== FILE: ${getFullPath(p)} ===\n`);
1170-
parts.push(`<document path="${p}">\n`, result.value, '\n</document>\n');
1225+
const filename = batch[idx].name;
1226+
1227+
// Same validation as in gen() function
1228+
// Skip files with null bytes (binary disguised as text)
1229+
if (content.indexOf('\0') !== -1) {
1230+
console.warn('Download: Skipping file with null bytes:', filename);
1231+
return;
1232+
}
1233+
1234+
// Skip base64 encoded content
1235+
const lines = content.split('\n');
1236+
const longBase64Lines = lines.filter(line =>
1237+
line.length > 200 && /^[A-Za-z0-9+/=]+$/.test(line.trim())
1238+
).length;
1239+
1240+
if (longBase64Lines > 10) {
1241+
console.warn('Download: Skipping file with encoded data:', filename);
1242+
return;
1243+
}
1244+
1245+
// Skip minified files (very long single lines)
1246+
const hasVeryLongLine = lines.some(line => line.length > 10000);
1247+
if (hasVeryLongLine) {
1248+
console.warn('Download: Skipping minified file:', filename);
1249+
return;
1250+
}
1251+
1252+
parts.push('=== FILE: ' + getFullPath(p) + ' ===\n');
1253+
parts.push('<document path="' + p + '">\n', content, '\n</document>\n');
1254+
11711255
// OPTIMIZE: Nullify the file reference from S.files after reading to help GC
1172-
// Find the corresponding file in S.files and nullify its 'file' property
1173-
// This is crucial for memory management during large downloads
11741256
const sFileIndex = S.files.findIndex(sf => sf.path === p);
11751257
if (sFileIndex !== -1) {
11761258
S.files[sFileIndex].file = null; // Release the File object reference

0 commit comments

Comments
 (0)