RooCodeInc
diff --git a/‎src/core/tools/contextValidator.ts‎
Lines changed: 111 additions & 160 deletions b/‎src/core/tools/contextValidator.ts‎
Lines changed: 111 additions & 160 deletions
@@ -1,5 +1,6 @@
 import { Task } from "../task/Task"
 import { readLines } from "../../integrations/misc/read-lines"
+import { readPartialSingleLineContent } from "../../integrations/misc/read-partial-content"
 import { getModelMaxOutputTokens, getFormatForProvider } from "../../shared/api"
 import * as fs from "fs/promises"
 
@@ -9,14 +10,8 @@ import * as fs from "fs/promises"
  * when reading files without affecting other context window calculations.
  */
 const FILE_READ_BUFFER_PERCENTAGE = 0.25 // 25% buffer for file reads
-
-/**
- * Constants for the 2-phase validation approach
- */
 const CHARS_PER_TOKEN_ESTIMATE = 3
-const CUTBACK_PERCENTAGE = 0.2 // 20% reduction when over limit
 const READ_BATCH_SIZE = 50 // Read 50 lines at a time for efficiency
-const MAX_API_CALLS = 5 // Safety limit to prevent infinite loops
 const MIN_USEFUL_LINES = 50 // Minimum lines to consider useful
 
 /**
@@ -27,7 +22,7 @@ const SMALL_FILE_SIZE = 100 * 1024 // 100KB - safe if context is mostly empty
 
 export interface ContextValidationResult {
 	shouldLimit: boolean
-	safeMaxLines: number
+	safeMaxLines: number // For single-line files, this represents character count; for multi-line files, it's line count
 	reason?: string
 }
 
@@ -79,7 +74,6 @@ async function shouldSkipValidation(filePath: string, totalLines: number, cline:
 		// Get file size
 		const stats = await fs.stat(filePath)
 		const fileSizeBytes = stats.size
-		const fileSizeMB = fileSizeBytes / (1024 * 1024)
 
 		// Very small files by size are definitely safe to skip validation
 		if (fileSizeBytes < TINY_FILE_SIZE) {
@@ -99,65 +93,100 @@ async function shouldSkipValidation(filePath: string, totalLines: number, cline:
 		// we can skip validation as there's plenty of room
 		if (contextUsagePercent < 0.5 && fileSizeBytes < SMALL_FILE_SIZE) {
 			console.log(
-				`[validateFileSizeForContext] Skipping validation for ${filePath} - context mostly empty (${Math.round(contextUsagePercent * 100)}% used) and file is moderate size (${fileSizeMB.toFixed(2)}MB)`,
+				`[shouldSkipValidation] Skipping validation for ${filePath} - context mostly empty (${Math.round(contextUsagePercent * 100)}% used) and file is moderate size`,
 			)
 			return true
 		}
 	} catch (error) {
 		// If we can't check file size or context state, don't skip validation
-		console.warn(`[validateFileSizeForContext] Could not check file size or context state: ${error}`)
+		console.warn(`[shouldSkipValidation] Could not check file size or context state: ${error}`)
 	}
 
 	return false
 }
 
+/**
+ * Detects if a file is effectively a single-line file (1-5 lines with only one non-empty line)
+ * This handles cases where minified files might have a few empty lines but are essentially single-line
+ */
+async function isEffectivelySingleLine(filePath: string, totalLines: number): Promise<boolean> {
+	// Only check files with 1-5 lines
+	if (totalLines < 1 || totalLines > 5) {
+		return false
+	}
+
+	// Single line files are always effectively single line
+	if (totalLines === 1) {
+		return true
+	}
+
+	try {
+		// Check if file is big (>100KB) and lines 2-5 are empty
+		const stats = await fs.stat(filePath)
+		const fileSizeBytes = stats.size
+
+		// Only apply this logic to big files
+		if (fileSizeBytes < 100 * 1024) {
+			// Less than 100KB
+			return false
+		}
+
+		// Read all lines to check if lines 2-5 are empty
+		const content = await readLines(filePath, totalLines - 1, 0)
+		const lines = content.split("\n")
+
+		// Check if lines 2-5 (indices 1-4) are empty
+		let hasEmptyLines2to5 = true
+		for (let i = 1; i < Math.min(lines.length, 5); i++) {
+			if (lines[i].trim().length > 0) {
+				hasEmptyLines2to5 = false
+				break
+			}
+		}
+
+		console.log(
+			`[isEffectivelySingleLine] File ${filePath}: totalLines=${totalLines}, fileSize=${(fileSizeBytes / 1024).toFixed(1)}KB, hasEmptyLines2to5=${hasEmptyLines2to5}`,
+		)
+
+		return hasEmptyLines2to5
+	} catch (error) {
+		console.warn(`[isEffectivelySingleLine] Error checking file ${filePath}: ${error}`)
+		return false
+	}
+}
+
 /**
  * Validates a single-line file (likely minified) to see if it fits in context
- * Uses the same heuristic and backoff strategy as multi-line files
+ * Uses only heuristic estimation without actual token counting
  */
 async function validateSingleLineFile(
 	filePath: string,
 	cline: Task,
 	contextInfo: ContextInfo,
 ): Promise<ContextValidationResult | null> {
-	console.log(`[validateFileSizeForContext] Single-line file detected: ${filePath} - checking if it fits in context`)
-
 	try {
-		// Phase 1: Use char/3 heuristic to estimate safe content size
+		// Use char heuristic to estimate safe content size with additional safety margin
 		const estimatedSafeChars = contextInfo.targetTokenLimit * CHARS_PER_TOKEN_ESTIMATE
 
-		// Read the single line
-		const fullContent = await readLines(filePath, 0, 0)
+		// Read only up to the limited chars to avoid loading huge files into memory
+		const partialContent = await readPartialSingleLineContent(filePath, estimatedSafeChars)
 
-		// If the full content fits within our estimated safe chars, try it
-		let contentToValidate = fullContent
-		if (fullContent.length > estimatedSafeChars) {
-			// Content is too large, start with estimated safe portion
-			contentToValidate = fullContent.substring(0, estimatedSafeChars)
-			console.log(
-				`[validateFileSizeForContext] Single-line file exceeds estimated safe chars (${fullContent.length} > ${estimatedSafeChars}), starting with truncated content`,
-			)
-		}
-
-		// Phase 2: Use shared validation function with cutback
-		const { finalContent, actualTokens } = await validateAndCutbackContent(
-			contentToValidate,
-			contextInfo.targetTokenLimit,
-			cline,
-			true,
-		)
+		// Get the full file size to determine if we read the entire file
+		const stats = await fs.stat(filePath)
+		const fullFileSize = stats.size
+		const isPartialRead = partialContent.length < fullFileSize
 
-		// Determine the result based on what we could read
-		if (finalContent.length === fullContent.length) {
+		if (!isPartialRead) {
 			// The entire single line fits
 			return { shouldLimit: false, safeMaxLines: -1 }
-		} else if (finalContent.length > 0) {
+		} else if (partialContent.length > 0) {
 			// Only a portion of the line fits
-			const percentageRead = Math.round((finalContent.length / fullContent.length) * 100)
+			const percentageRead = Math.round((partialContent.length / fullFileSize) * 100)
+
 			return {
 				shouldLimit: true,
-				safeMaxLines: 1, // Still technically 1 line, but truncated
-				reason: `Large single-line file (likely minified) exceeds available context space. Only the first ${percentageRead}% (${finalContent.length} of ${fullContent.length} characters) can be loaded. The file contains ${actualTokens} tokens of the available ${contextInfo.targetTokenLimit} tokens. Context: ${contextInfo.currentlyUsed}/${contextInfo.contextWindow} tokens used (${Math.round((contextInfo.currentlyUsed / contextInfo.contextWindow) * 100)}%). This is a hard limit - no additional content from this file can be accessed.`,
+				safeMaxLines: partialContent.length, // Return actual character count for single-line files
+				reason: `Large single-line file (likely minified) exceeds available context space. Only the first ${percentageRead}% (${partialContent.length} of ${fullFileSize} characters) can be loaded. Context: ${contextInfo.currentlyUsed}/${contextInfo.contextWindow} tokens used (${Math.round((contextInfo.currentlyUsed / contextInfo.contextWindow) * 100)}%). This is a hard limit - no additional content from this file can be accessed.`,
 			}
 		} else {
 			// Can't fit any content
@@ -168,8 +197,24 @@ async function validateSingleLineFile(
 			}
 		}
 	} catch (error) {
-		console.warn(`[validateFileSizeForContext] Error processing single-line file: ${error}`)
-		return null // Fall through to regular validation
+		// Check for specific error types that indicate memory issues
+		if (error instanceof Error) {
+			const errorMessage = error.message.toLowerCase()
+			if (
+				errorMessage.includes("heap") ||
+				errorMessage.includes("memory") ||
+				errorMessage.includes("allocation")
+			) {
+				// Return a safe fallback instead of crashing
+				return {
+					shouldLimit: true,
+					safeMaxLines: 0,
+					reason: `File is too large to process due to memory constraints. Error: ${error.message}. This file cannot be accessed.`,
+				}
+			}
+		}
+
+		return null // Fall through to regular validation for other errors
 	}
 }
 
@@ -216,97 +261,6 @@ async function readFileInBatches(
 	return { content: accumulatedContent, lineCount: currentLine, lineToCharMap }
 }
 
-/**
- * Shared function to validate content with actual API and apply cutback if needed
- * Works for both single-line and multi-line content
- */
-async function validateAndCutbackContent(
-	content: string,
-	targetTokenLimit: number,
-	cline: Task,
-	isSingleLine: boolean = false,
-): Promise<{ finalContent: string; actualTokens: number; didCutback: boolean }> {
-	let finalContent = content
-	let apiCallCount = 0
-	let actualTokens = 0
-	let didCutback = false
-
-	while (apiCallCount < MAX_API_CALLS) {
-		apiCallCount++
-
-		// Make the actual API call to count tokens
-		actualTokens = await cline.api.countTokens([{ type: "text", text: finalContent }])
-
-		console.log(
-			`[validateFileSizeForContext] API call ${apiCallCount}: ${actualTokens} tokens for ${finalContent.length} chars${isSingleLine ? " (single-line)" : ""}`,
-		)
-
-		if (actualTokens <= targetTokenLimit) {
-			// We're under the limit, we're done!
-			break
-		}
-
-		// We're over the limit - cut back by CUTBACK_PERCENTAGE
-		const targetLength = Math.floor(finalContent.length * (1 - CUTBACK_PERCENTAGE))
-
-		// Safety check
-		if (targetLength === 0 || targetLength === finalContent.length) {
-			break
-		}
-
-		finalContent = finalContent.substring(0, targetLength)
-		didCutback = true
-	}
-
-	return { finalContent, actualTokens, didCutback }
-}
-
-/**
- * Validates content with actual API and cuts back if needed (for multi-line files)
- */
-async function validateAndAdjustContent(
-	accumulatedContent: string,
-	initialLineCount: number,
-	lineToCharMap: Map<number, number>,
-	targetTokenLimit: number,
-	totalLines: number,
-	cline: Task,
-): Promise<{ finalContent: string; finalLineCount: number }> {
-	// Use the shared validation function
-	const { finalContent, didCutback } = await validateAndCutbackContent(
-		accumulatedContent,
-		targetTokenLimit,
-		cline,
-		false,
-	)
-
-	// If no cutback was needed, return original line count
-	if (!didCutback) {
-		return { finalContent, finalLineCount: initialLineCount }
-	}
-
-	// Find the line that corresponds to the cut content length
-	let cutoffLine = 0
-	for (const [lineNum, charPos] of lineToCharMap.entries()) {
-		if (charPos > finalContent.length) {
-			break
-		}
-		cutoffLine = lineNum
-	}
-
-	// Ensure we don't cut back too far
-	if (cutoffLine < 10) {
-		console.warn(`[validateFileSizeForContext] Cutback resulted in too few lines (${cutoffLine}), using minimum`)
-		cutoffLine = Math.min(MIN_USEFUL_LINES, totalLines)
-	}
-
-	// Get the character position for the cutoff line
-	const cutoffCharPos = lineToCharMap.get(cutoffLine) || 0
-	const adjustedContent = accumulatedContent.substring(0, cutoffCharPos)
-
-	return { finalContent: adjustedContent, finalLineCount: cutoffLine }
-}
-
 /**
  * Handles error cases with conservative fallback
  */
@@ -316,8 +270,6 @@ async function handleValidationError(
 	currentMaxReadFileLine: number,
 	error: unknown,
 ): Promise<ContextValidationResult> {
-	console.warn(`[validateFileSizeForContext] Error accessing runtime state: ${error}`)
-
 	// In error cases, we can't check context state, so use simple file size heuristics
 	try {
 		const stats = await fs.stat(filePath)
@@ -329,7 +281,6 @@ async function handleValidationError(
 		}
 	} catch (statError) {
 		// If we can't even stat the file, proceed with conservative defaults
-		console.warn(`[validateFileSizeForContext] Could not stat file: ${statError}`)
 	}
 
 	if (totalLines > 10000) {
@@ -362,54 +313,54 @@ export async function validateFileSizeForContext(
 		// Get context information
 		const contextInfo = await getContextInfo(cline)
 
-		// Special handling for single-line files (likely minified)
-		if (totalLines === 1) {
+		// Special handling for single-line files (likely minified) or effectively single-line files
+		const isEffSingleLine = await isEffectivelySingleLine(filePath, totalLines)
+		if (isEffSingleLine) {
 			const singleLineResult = await validateSingleLineFile(filePath, cline, contextInfo)
 			if (singleLineResult) {
 				return singleLineResult
 			}
 			// Fall through to regular validation if single-line validation failed
 		}
 
-		// Phase 1: Read content up to estimated safe character limit
+		// Read content up to estimated safe character limit
 		const estimatedSafeChars = contextInfo.targetTokenLimit * CHARS_PER_TOKEN_ESTIMATE
-		const { content, lineCount, lineToCharMap } = await readFileInBatches(filePath, totalLines, estimatedSafeChars)
-
-		// Phase 2: Validate with actual API and cutback if needed
-		const { finalContent, finalLineCount } = await validateAndAdjustContent(
-			content,
-			lineCount,
-			lineToCharMap,
-			contextInfo.targetTokenLimit,
-			totalLines,
-			cline,
-		)
+		console.log(`[validateFileSizeForContext] Estimated safe chars for ${filePath}: ${estimatedSafeChars}`)
 
-		// Log final statistics
-		console.log(`[validateFileSizeForContext] Final: ${finalLineCount} lines, ${finalContent.length} chars`)
+		const { content, lineCount } = await readFileInBatches(filePath, totalLines, estimatedSafeChars)
+		console.log(`[validateFileSizeForContext] Read ${lineCount} lines (${content.length} chars) from ${filePath}`)
 
-		// Ensure we provide at least a minimum useful amount
-		const finalSafeMaxLines = Math.max(MIN_USEFUL_LINES, finalLineCount)
-
-		// If we read the entire file without exceeding the limit, no limitation needed
-		if (finalLineCount >= totalLines) {
+		// If we read the entire file without hitting the character limit, no limitation needed
+		if (lineCount >= totalLines) {
+			console.log(`[validateFileSizeForContext] Read entire file ${filePath} without hitting limit`)
 			return { shouldLimit: false, safeMaxLines: currentMaxReadFileLine }
 		}
 
+		// We hit the character limit before reading all lines
+		// Ensure we provide at least a minimum useful amount
+		const finalSafeMaxLines = Math.max(MIN_USEFUL_LINES, lineCount)
+		console.log(
+			`[validateFileSizeForContext] Hit character limit for ${filePath}: lineCount=${lineCount}, finalSafeMaxLines=${finalSafeMaxLines}`,
+		)
+
 		// If we couldn't read even the minimum useful lines
-		if (finalLineCount < MIN_USEFUL_LINES) {
-			return {
+		if (lineCount < MIN_USEFUL_LINES) {
+			const result = {
 				shouldLimit: true,
 				safeMaxLines: finalSafeMaxLines,
-				reason: `Very limited context space. Could only safely read ${finalLineCount} lines before exceeding token limit. Context: ${contextInfo.currentlyUsed}/${contextInfo.contextWindow} tokens used (${Math.round((contextInfo.currentlyUsed / contextInfo.contextWindow) * 100)}%). Limited to ${finalSafeMaxLines} lines. Consider using search_files or line_range for specific sections.`,
+				reason: `Very limited context space. Could only safely read ${lineCount} lines before exceeding token limit. Context: ${contextInfo.currentlyUsed}/${contextInfo.contextWindow} tokens used (${Math.round((contextInfo.currentlyUsed / contextInfo.contextWindow) * 100)}%). Limited to ${finalSafeMaxLines} lines. Consider using search_files or line_range for specific sections.`,
 			}
+			console.log(`[validateFileSizeForContext] Returning very limited context result for ${filePath}:`, result)
+			return result
 		}
 
-		return {
+		const result = {
 			shouldLimit: true,
 			safeMaxLines: finalSafeMaxLines,
 			reason: `File exceeds available context space. Safely read ${finalSafeMaxLines} lines out of ${totalLines} total lines. Context usage: ${contextInfo.currentlyUsed}/${contextInfo.contextWindow} tokens (${Math.round((contextInfo.currentlyUsed / contextInfo.contextWindow) * 100)}%). Use line_range to read specific sections.`,
 		}
+		console.log(`[validateFileSizeForContext] Returning limited context result for ${filePath}:`, result)
+		return result
 	} catch (error) {
 		return handleValidationError(filePath, totalLines, currentMaxReadFileLine, error)
 	}