|
1033 | 1033 | const textFiles = []; |
1034 | 1034 | const binaryFiles = []; // This list is now only for stats/display in the tree, not for content inclusion |
1035 | 1035 | files.forEach(f => { |
| 1036 | + // Skip binary files |
1036 | 1037 | if (isBinary(f.name)) { |
1037 | | - binaryFiles.push(f); // Keep for tree display, but not for content generation |
1038 | | - } else { |
1039 | | - textFiles.push(f); // Only these will be read and included in the context |
| 1038 | + binaryFiles.push(f); |
| 1039 | + return; |
| 1040 | + } |
| 1041 | + |
| 1042 | + // Skip minified/compiled files |
| 1043 | + if (isMinified(f.name)) { |
| 1044 | + console.log('Skipping minified file:', f.name); |
| 1045 | + return; |
| 1046 | + } |
| 1047 | + |
| 1048 | + // Skip files that shouldn't be in output |
| 1049 | + const filename = f.name.toLowerCase(); |
| 1050 | + |
| 1051 | + // Skip SVG files (can contain binary/encoded data) |
| 1052 | + if (filename.endsWith('.svg')) { |
| 1053 | + binaryFiles.push(f); |
| 1054 | + return; |
1040 | 1055 | } |
| 1056 | + |
| 1057 | + // Skip large files that might be compiled/minified |
| 1058 | + if (f.size > 500 * 1024) { // 500KB |
| 1059 | + console.log('Skipping large file (possibly compiled):', f.name, bytes(f.size)); |
| 1060 | + return; |
| 1061 | + } |
| 1062 | + |
| 1063 | + textFiles.push(f); |
1041 | 1064 | }); |
1042 | 1065 |
|
1043 | 1066 | const totalTextSize = textFiles.reduce((s, f) => s + f.size, 0); |
|
1073 | 1096 | ); |
1074 | 1097 | results.forEach((result, idx) => { |
1075 | 1098 | if (result.status === 'fulfilled') { |
| 1099 | + const content = result.value; |
| 1100 | + const path = batch[idx].path; |
| 1101 | + const filename = batch[idx].name; |
| 1102 | + |
| 1103 | + // Validate content is actually text (not binary disguised as text) |
| 1104 | + // Check for null bytes or high ratio of non-printable characters |
| 1105 | + if (content.indexOf('\0') !== -1) { |
| 1106 | + console.warn('Skipping file with null bytes (binary):', filename); |
| 1107 | + failed++; |
| 1108 | + return; |
| 1109 | + } |
| 1110 | + |
| 1111 | + // Skip if content looks like base64 encoded data (common in compiled files) |
| 1112 | + const lines = content.split('\n'); |
| 1113 | + const longBase64Lines = lines.filter(line => |
| 1114 | + line.length > 200 && /^[A-Za-z0-9+/=]+$/.test(line.trim()) |
| 1115 | + ).length; |
| 1116 | + |
| 1117 | + if (longBase64Lines > 10) { |
| 1118 | + console.warn('Skipping file with encoded data:', filename); |
| 1119 | + failed++; |
| 1120 | + return; |
| 1121 | + } |
| 1122 | + |
| 1123 | + // Skip extremely long single lines (typical of minified code) |
| 1124 | + const hasVeryLongLine = lines.some(line => line.length > 10000); |
| 1125 | + if (hasVeryLongLine) { |
| 1126 | + console.warn('Skipping file with very long lines (minified):', filename); |
| 1127 | + failed++; |
| 1128 | + return; |
| 1129 | + } |
| 1130 | + |
1076 | 1131 | contents.push({ |
1077 | | - path: batch[idx].path, |
1078 | | - content: result.value |
| 1132 | + path: path, |
| 1133 | + content: content |
1079 | 1134 | }); |
1080 | 1135 | } else { |
1081 | 1136 | console.warn('Skip', batch[idx].name, result.reason); |
|
1165 | 1220 | ); |
1166 | 1221 | results.forEach((result, idx) => { |
1167 | 1222 | if (result.status === 'fulfilled') { |
| 1223 | + const content = result.value; |
1168 | 1224 | const p = batch[idx].path; |
1169 | | - parts.push(`=== FILE: ${getFullPath(p)} ===\n`); |
1170 | | - parts.push(`<document path="${p}">\n`, result.value, '\n</document>\n'); |
| 1225 | + const filename = batch[idx].name; |
| 1226 | + |
| 1227 | + // Same validation as in gen() function |
| 1228 | + // Skip files with null bytes (binary disguised as text) |
| 1229 | + if (content.indexOf('\0') !== -1) { |
| 1230 | + console.warn('Download: Skipping file with null bytes:', filename); |
| 1231 | + return; |
| 1232 | + } |
| 1233 | + |
| 1234 | + // Skip base64 encoded content |
| 1235 | + const lines = content.split('\n'); |
| 1236 | + const longBase64Lines = lines.filter(line => |
| 1237 | + line.length > 200 && /^[A-Za-z0-9+/=]+$/.test(line.trim()) |
| 1238 | + ).length; |
| 1239 | + |
| 1240 | + if (longBase64Lines > 10) { |
| 1241 | + console.warn('Download: Skipping file with encoded data:', filename); |
| 1242 | + return; |
| 1243 | + } |
| 1244 | + |
| 1245 | + // Skip minified files (very long single lines) |
| 1246 | + const hasVeryLongLine = lines.some(line => line.length > 10000); |
| 1247 | + if (hasVeryLongLine) { |
| 1248 | + console.warn('Download: Skipping minified file:', filename); |
| 1249 | + return; |
| 1250 | + } |
| 1251 | + |
| 1252 | + parts.push('=== FILE: ' + getFullPath(p) + ' ===\n'); |
| 1253 | + parts.push('<document path="' + p + '">\n', content, '\n</document>\n'); |
| 1254 | + |
1171 | 1255 | // OPTIMIZE: Nullify the file reference from S.files after reading to help GC |
1172 | | - // Find the corresponding file in S.files and nullify its 'file' property |
1173 | | - // This is crucial for memory management during large downloads |
1174 | 1256 | const sFileIndex = S.files.findIndex(sf => sf.path === p); |
1175 | 1257 | if (sFileIndex !== -1) { |
1176 | 1258 | S.files[sFileIndex].file = null; // Release the File object reference |
|
0 commit comments