11import { Task } from "../task/Task"
22import { readLines } from "../../integrations/misc/read-lines"
3+ import { readPartialSingleLineContent } from "../../integrations/misc/read-partial-content"
34import { getModelMaxOutputTokens , getFormatForProvider } from "../../shared/api"
45import * as fs from "fs/promises"
56
@@ -9,14 +10,8 @@ import * as fs from "fs/promises"
910 * when reading files without affecting other context window calculations.
1011 */
1112const FILE_READ_BUFFER_PERCENTAGE = 0.25 // 25% buffer for file reads
12-
13- /**
14- * Constants for the 2-phase validation approach
15- */
1613const CHARS_PER_TOKEN_ESTIMATE = 3
17- const CUTBACK_PERCENTAGE = 0.2 // 20% reduction when over limit
1814const READ_BATCH_SIZE = 50 // Read 50 lines at a time for efficiency
19- const MAX_API_CALLS = 5 // Safety limit to prevent infinite loops
2015const MIN_USEFUL_LINES = 50 // Minimum lines to consider useful
2116
2217/**
@@ -27,7 +22,7 @@ const SMALL_FILE_SIZE = 100 * 1024 // 100KB - safe if context is mostly empty
2722
2823export interface ContextValidationResult {
2924 shouldLimit : boolean
30- safeMaxLines : number
25+ safeMaxLines : number // For single-line files, this represents character count; for multi-line files, it's line count
3126 reason ?: string
3227}
3328
@@ -79,7 +74,6 @@ async function shouldSkipValidation(filePath: string, totalLines: number, cline:
7974 // Get file size
8075 const stats = await fs . stat ( filePath )
8176 const fileSizeBytes = stats . size
82- const fileSizeMB = fileSizeBytes / ( 1024 * 1024 )
8377
8478 // Very small files by size are definitely safe to skip validation
8579 if ( fileSizeBytes < TINY_FILE_SIZE ) {
@@ -99,65 +93,100 @@ async function shouldSkipValidation(filePath: string, totalLines: number, cline:
9993 // we can skip validation as there's plenty of room
10094 if ( contextUsagePercent < 0.5 && fileSizeBytes < SMALL_FILE_SIZE ) {
10195 console . log (
102- `[validateFileSizeForContext ] Skipping validation for ${ filePath } - context mostly empty (${ Math . round ( contextUsagePercent * 100 ) } % used) and file is moderate size ( ${ fileSizeMB . toFixed ( 2 ) } MB) ` ,
96+ `[shouldSkipValidation ] Skipping validation for ${ filePath } - context mostly empty (${ Math . round ( contextUsagePercent * 100 ) } % used) and file is moderate size` ,
10397 )
10498 return true
10599 }
106100 } catch ( error ) {
107101 // If we can't check file size or context state, don't skip validation
108- console . warn ( `[validateFileSizeForContext ] Could not check file size or context state: ${ error } ` )
102+ console . warn ( `[shouldSkipValidation ] Could not check file size or context state: ${ error } ` )
109103 }
110104
111105 return false
112106}
113107
108+ /**
109+ * Detects if a file is effectively a single-line file (1-5 lines with only one non-empty line)
110+ * This handles cases where minified files might have a few empty lines but are essentially single-line
111+ */
112+ async function isEffectivelySingleLine ( filePath : string , totalLines : number ) : Promise < boolean > {
113+ // Only check files with 1-5 lines
114+ if ( totalLines < 1 || totalLines > 5 ) {
115+ return false
116+ }
117+
118+ // Single line files are always effectively single line
119+ if ( totalLines === 1 ) {
120+ return true
121+ }
122+
123+ try {
124+ // Check if file is big (>100KB) and lines 2-5 are empty
125+ const stats = await fs . stat ( filePath )
126+ const fileSizeBytes = stats . size
127+
128+ // Only apply this logic to big files
129+ if ( fileSizeBytes < 100 * 1024 ) {
130+ // Less than 100KB
131+ return false
132+ }
133+
134+ // Read all lines to check if lines 2-5 are empty
135+ const content = await readLines ( filePath , totalLines - 1 , 0 )
136+ const lines = content . split ( "\n" )
137+
138+ // Check if lines 2-5 (indices 1-4) are empty
139+ let hasEmptyLines2to5 = true
140+ for ( let i = 1 ; i < Math . min ( lines . length , 5 ) ; i ++ ) {
141+ if ( lines [ i ] . trim ( ) . length > 0 ) {
142+ hasEmptyLines2to5 = false
143+ break
144+ }
145+ }
146+
147+ console . log (
148+ `[isEffectivelySingleLine] File ${ filePath } : totalLines=${ totalLines } , fileSize=${ ( fileSizeBytes / 1024 ) . toFixed ( 1 ) } KB, hasEmptyLines2to5=${ hasEmptyLines2to5 } ` ,
149+ )
150+
151+ return hasEmptyLines2to5
152+ } catch ( error ) {
153+ console . warn ( `[isEffectivelySingleLine] Error checking file ${ filePath } : ${ error } ` )
154+ return false
155+ }
156+ }
157+
114158/**
115159 * Validates a single-line file (likely minified) to see if it fits in context
116- * Uses the same heuristic and backoff strategy as multi-line files
160+ * Uses only heuristic estimation without actual token counting
117161 */
118162async function validateSingleLineFile (
119163 filePath : string ,
120164 cline : Task ,
121165 contextInfo : ContextInfo ,
122166) : Promise < ContextValidationResult | null > {
123- console . log ( `[validateFileSizeForContext] Single-line file detected: ${ filePath } - checking if it fits in context` )
124-
125167 try {
126- // Phase 1: Use char/3 heuristic to estimate safe content size
168+ // Use char heuristic to estimate safe content size with additional safety margin
127169 const estimatedSafeChars = contextInfo . targetTokenLimit * CHARS_PER_TOKEN_ESTIMATE
128170
129- // Read the single line
130- const fullContent = await readLines ( filePath , 0 , 0 )
171+ // Read only up to the limited chars to avoid loading huge files into memory
172+ const partialContent = await readPartialSingleLineContent ( filePath , estimatedSafeChars )
131173
132- // If the full content fits within our estimated safe chars, try it
133- let contentToValidate = fullContent
134- if ( fullContent . length > estimatedSafeChars ) {
135- // Content is too large, start with estimated safe portion
136- contentToValidate = fullContent . substring ( 0 , estimatedSafeChars )
137- console . log (
138- `[validateFileSizeForContext] Single-line file exceeds estimated safe chars (${ fullContent . length } > ${ estimatedSafeChars } ), starting with truncated content` ,
139- )
140- }
141-
142- // Phase 2: Use shared validation function with cutback
143- const { finalContent, actualTokens } = await validateAndCutbackContent (
144- contentToValidate ,
145- contextInfo . targetTokenLimit ,
146- cline ,
147- true ,
148- )
174+ // Get the full file size to determine if we read the entire file
175+ const stats = await fs . stat ( filePath )
176+ const fullFileSize = stats . size
177+ const isPartialRead = partialContent . length < fullFileSize
149178
150- // Determine the result based on what we could read
151- if ( finalContent . length === fullContent . length ) {
179+ if ( ! isPartialRead ) {
152180 // The entire single line fits
153181 return { shouldLimit : false , safeMaxLines : - 1 }
154- } else if ( finalContent . length > 0 ) {
182+ } else if ( partialContent . length > 0 ) {
155183 // Only a portion of the line fits
156- const percentageRead = Math . round ( ( finalContent . length / fullContent . length ) * 100 )
184+ const percentageRead = Math . round ( ( partialContent . length / fullFileSize ) * 100 )
185+
157186 return {
158187 shouldLimit : true ,
159- safeMaxLines : 1 , // Still technically 1 line, but truncated
160- reason : `Large single-line file (likely minified) exceeds available context space. Only the first ${ percentageRead } % (${ finalContent . length } of ${ fullContent . length } characters) can be loaded. The file contains ${ actualTokens } tokens of the available ${ contextInfo . targetTokenLimit } tokens . Context: ${ contextInfo . currentlyUsed } /${ contextInfo . contextWindow } tokens used (${ Math . round ( ( contextInfo . currentlyUsed / contextInfo . contextWindow ) * 100 ) } %). This is a hard limit - no additional content from this file can be accessed.` ,
188+ safeMaxLines : partialContent . length , // Return actual character count for single-line files
189+ reason : `Large single-line file (likely minified) exceeds available context space. Only the first ${ percentageRead } % (${ partialContent . length } of ${ fullFileSize } characters) can be loaded. Context: ${ contextInfo . currentlyUsed } /${ contextInfo . contextWindow } tokens used (${ Math . round ( ( contextInfo . currentlyUsed / contextInfo . contextWindow ) * 100 ) } %). This is a hard limit - no additional content from this file can be accessed.` ,
161190 }
162191 } else {
163192 // Can't fit any content
@@ -168,8 +197,24 @@ async function validateSingleLineFile(
168197 }
169198 }
170199 } catch ( error ) {
171- console . warn ( `[validateFileSizeForContext] Error processing single-line file: ${ error } ` )
172- return null // Fall through to regular validation
200+ // Check for specific error types that indicate memory issues
201+ if ( error instanceof Error ) {
202+ const errorMessage = error . message . toLowerCase ( )
203+ if (
204+ errorMessage . includes ( "heap" ) ||
205+ errorMessage . includes ( "memory" ) ||
206+ errorMessage . includes ( "allocation" )
207+ ) {
208+ // Return a safe fallback instead of crashing
209+ return {
210+ shouldLimit : true ,
211+ safeMaxLines : 0 ,
212+ reason : `File is too large to process due to memory constraints. Error: ${ error . message } . This file cannot be accessed.` ,
213+ }
214+ }
215+ }
216+
217+ return null // Fall through to regular validation for other errors
173218 }
174219}
175220
@@ -216,97 +261,6 @@ async function readFileInBatches(
216261 return { content : accumulatedContent , lineCount : currentLine , lineToCharMap }
217262}
218263
219- /**
220- * Shared function to validate content with actual API and apply cutback if needed
221- * Works for both single-line and multi-line content
222- */
223- async function validateAndCutbackContent (
224- content : string ,
225- targetTokenLimit : number ,
226- cline : Task ,
227- isSingleLine : boolean = false ,
228- ) : Promise < { finalContent : string ; actualTokens : number ; didCutback : boolean } > {
229- let finalContent = content
230- let apiCallCount = 0
231- let actualTokens = 0
232- let didCutback = false
233-
234- while ( apiCallCount < MAX_API_CALLS ) {
235- apiCallCount ++
236-
237- // Make the actual API call to count tokens
238- actualTokens = await cline . api . countTokens ( [ { type : "text" , text : finalContent } ] )
239-
240- console . log (
241- `[validateFileSizeForContext] API call ${ apiCallCount } : ${ actualTokens } tokens for ${ finalContent . length } chars${ isSingleLine ? " (single-line)" : "" } ` ,
242- )
243-
244- if ( actualTokens <= targetTokenLimit ) {
245- // We're under the limit, we're done!
246- break
247- }
248-
249- // We're over the limit - cut back by CUTBACK_PERCENTAGE
250- const targetLength = Math . floor ( finalContent . length * ( 1 - CUTBACK_PERCENTAGE ) )
251-
252- // Safety check
253- if ( targetLength === 0 || targetLength === finalContent . length ) {
254- break
255- }
256-
257- finalContent = finalContent . substring ( 0 , targetLength )
258- didCutback = true
259- }
260-
261- return { finalContent, actualTokens, didCutback }
262- }
263-
264- /**
265- * Validates content with actual API and cuts back if needed (for multi-line files)
266- */
267- async function validateAndAdjustContent (
268- accumulatedContent : string ,
269- initialLineCount : number ,
270- lineToCharMap : Map < number , number > ,
271- targetTokenLimit : number ,
272- totalLines : number ,
273- cline : Task ,
274- ) : Promise < { finalContent : string ; finalLineCount : number } > {
275- // Use the shared validation function
276- const { finalContent, didCutback } = await validateAndCutbackContent (
277- accumulatedContent ,
278- targetTokenLimit ,
279- cline ,
280- false ,
281- )
282-
283- // If no cutback was needed, return original line count
284- if ( ! didCutback ) {
285- return { finalContent, finalLineCount : initialLineCount }
286- }
287-
288- // Find the line that corresponds to the cut content length
289- let cutoffLine = 0
290- for ( const [ lineNum , charPos ] of lineToCharMap . entries ( ) ) {
291- if ( charPos > finalContent . length ) {
292- break
293- }
294- cutoffLine = lineNum
295- }
296-
297- // Ensure we don't cut back too far
298- if ( cutoffLine < 10 ) {
299- console . warn ( `[validateFileSizeForContext] Cutback resulted in too few lines (${ cutoffLine } ), using minimum` )
300- cutoffLine = Math . min ( MIN_USEFUL_LINES , totalLines )
301- }
302-
303- // Get the character position for the cutoff line
304- const cutoffCharPos = lineToCharMap . get ( cutoffLine ) || 0
305- const adjustedContent = accumulatedContent . substring ( 0 , cutoffCharPos )
306-
307- return { finalContent : adjustedContent , finalLineCount : cutoffLine }
308- }
309-
310264/**
311265 * Handles error cases with conservative fallback
312266 */
@@ -316,8 +270,6 @@ async function handleValidationError(
316270 currentMaxReadFileLine : number ,
317271 error : unknown ,
318272) : Promise < ContextValidationResult > {
319- console . warn ( `[validateFileSizeForContext] Error accessing runtime state: ${ error } ` )
320-
321273 // In error cases, we can't check context state, so use simple file size heuristics
322274 try {
323275 const stats = await fs . stat ( filePath )
@@ -329,7 +281,6 @@ async function handleValidationError(
329281 }
330282 } catch ( statError ) {
331283 // If we can't even stat the file, proceed with conservative defaults
332- console . warn ( `[validateFileSizeForContext] Could not stat file: ${ statError } ` )
333284 }
334285
335286 if ( totalLines > 10000 ) {
@@ -362,54 +313,54 @@ export async function validateFileSizeForContext(
362313 // Get context information
363314 const contextInfo = await getContextInfo ( cline )
364315
365- // Special handling for single-line files (likely minified)
366- if ( totalLines === 1 ) {
316+ // Special handling for single-line files (likely minified) or effectively single-line files
317+ const isEffSingleLine = await isEffectivelySingleLine ( filePath , totalLines )
318+ if ( isEffSingleLine ) {
367319 const singleLineResult = await validateSingleLineFile ( filePath , cline , contextInfo )
368320 if ( singleLineResult ) {
369321 return singleLineResult
370322 }
371323 // Fall through to regular validation if single-line validation failed
372324 }
373325
374- // Phase 1: Read content up to estimated safe character limit
326+ // Read content up to estimated safe character limit
375327 const estimatedSafeChars = contextInfo . targetTokenLimit * CHARS_PER_TOKEN_ESTIMATE
376- const { content, lineCount, lineToCharMap } = await readFileInBatches ( filePath , totalLines , estimatedSafeChars )
377-
378- // Phase 2: Validate with actual API and cutback if needed
379- const { finalContent, finalLineCount } = await validateAndAdjustContent (
380- content ,
381- lineCount ,
382- lineToCharMap ,
383- contextInfo . targetTokenLimit ,
384- totalLines ,
385- cline ,
386- )
328+ console . log ( `[validateFileSizeForContext] Estimated safe chars for ${ filePath } : ${ estimatedSafeChars } ` )
387329
388- // Log final statistics
389- console . log ( `[validateFileSizeForContext] Final: ${ finalLineCount } lines, ${ finalContent . length } chars` )
330+ const { content , lineCount } = await readFileInBatches ( filePath , totalLines , estimatedSafeChars )
331+ console . log ( `[validateFileSizeForContext] Read ${ lineCount } lines ( ${ content . length } chars) from ${ filePath } ` )
390332
391- // Ensure we provide at least a minimum useful amount
392- const finalSafeMaxLines = Math . max ( MIN_USEFUL_LINES , finalLineCount )
393-
394- // If we read the entire file without exceeding the limit, no limitation needed
395- if ( finalLineCount >= totalLines ) {
333+ // If we read the entire file without hitting the character limit, no limitation needed
334+ if ( lineCount >= totalLines ) {
335+ console . log ( `[validateFileSizeForContext] Read entire file ${ filePath } without hitting limit` )
396336 return { shouldLimit : false , safeMaxLines : currentMaxReadFileLine }
397337 }
398338
339+ // We hit the character limit before reading all lines
340+ // Ensure we provide at least a minimum useful amount
341+ const finalSafeMaxLines = Math . max ( MIN_USEFUL_LINES , lineCount )
342+ console . log (
343+ `[validateFileSizeForContext] Hit character limit for ${ filePath } : lineCount=${ lineCount } , finalSafeMaxLines=${ finalSafeMaxLines } ` ,
344+ )
345+
399346 // If we couldn't read even the minimum useful lines
400- if ( finalLineCount < MIN_USEFUL_LINES ) {
401- return {
347+ if ( lineCount < MIN_USEFUL_LINES ) {
348+ const result = {
402349 shouldLimit : true ,
403350 safeMaxLines : finalSafeMaxLines ,
404- reason : `Very limited context space. Could only safely read ${ finalLineCount } lines before exceeding token limit. Context: ${ contextInfo . currentlyUsed } /${ contextInfo . contextWindow } tokens used (${ Math . round ( ( contextInfo . currentlyUsed / contextInfo . contextWindow ) * 100 ) } %). Limited to ${ finalSafeMaxLines } lines. Consider using search_files or line_range for specific sections.` ,
351+ reason : `Very limited context space. Could only safely read ${ lineCount } lines before exceeding token limit. Context: ${ contextInfo . currentlyUsed } /${ contextInfo . contextWindow } tokens used (${ Math . round ( ( contextInfo . currentlyUsed / contextInfo . contextWindow ) * 100 ) } %). Limited to ${ finalSafeMaxLines } lines. Consider using search_files or line_range for specific sections.` ,
405352 }
353+ console . log ( `[validateFileSizeForContext] Returning very limited context result for ${ filePath } :` , result )
354+ return result
406355 }
407356
408- return {
357+ const result = {
409358 shouldLimit : true ,
410359 safeMaxLines : finalSafeMaxLines ,
411360 reason : `File exceeds available context space. Safely read ${ finalSafeMaxLines } lines out of ${ totalLines } total lines. Context usage: ${ contextInfo . currentlyUsed } /${ contextInfo . contextWindow } tokens (${ Math . round ( ( contextInfo . currentlyUsed / contextInfo . contextWindow ) * 100 ) } %). Use line_range to read specific sections.` ,
412361 }
362+ console . log ( `[validateFileSizeForContext] Returning limited context result for ${ filePath } :` , result )
363+ return result
413364 } catch ( error ) {
414365 return handleValidationError ( filePath , totalLines , currentMaxReadFileLine , error )
415366 }
0 commit comments