Skip to content
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 213 additions & 9 deletions src/core/tools/__tests__/readFileTool.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@ import * as path from "path"

import { countFileLines } from "../../../integrations/misc/line-counter"
import { readLines } from "../../../integrations/misc/read-lines"
import { extractTextFromFile } from "../../../integrations/misc/extract-text"
import { readPartialContent } from "../../../integrations/misc/read-partial-content"
import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../../integrations/misc/extract-text"
import { parseSourceCodeDefinitionsForFile } from "../../../services/tree-sitter"
import { isBinaryFile } from "isbinaryfile"
import { ReadFileToolUse, ToolParamName, ToolResponse } from "../../../shared/tools"
import { readFileTool } from "../readFileTool"
import { formatResponse } from "../../prompts/responses"
import * as contextValidatorModule from "../contextValidator"
import { DEFAULT_MAX_IMAGE_FILE_SIZE_MB, DEFAULT_MAX_TOTAL_IMAGE_SIZE_MB } from "../helpers/imageHelpers"

vi.mock("../../../i18n", () => ({
t: vi.fn((key: string) => key),
}))

vi.mock("path", async () => {
const originalPath = await vi.importActual("path")
return {
Expand All @@ -26,11 +32,25 @@ vi.mock("path", async () => {
vi.mock("isbinaryfile")

vi.mock("../../../integrations/misc/line-counter")
vi.mock("../../../integrations/misc/read-lines")
vi.mock("../../../integrations/misc/read-lines", () => ({
readLines: vi.fn().mockResolvedValue("mocked line content"),
}))
vi.mock("../../../integrations/misc/read-partial-content", () => ({
readPartialSingleLineContent: vi.fn().mockResolvedValue("mocked partial content"),
readPartialContent: vi.fn().mockResolvedValue({
content: "mocked partial content",
charactersRead: 100,
totalCharacters: 1000,
linesRead: 5,
totalLines: 50,
lastLineRead: 5,
}),
}))
vi.mock("../contextValidator")

// Mock fs/promises readFile for image tests
const fsPromises = vi.hoisted(() => ({
readFile: vi.fn(),
readFile: vi.fn().mockResolvedValue(Buffer.from("mock file content")),
stat: vi.fn().mockResolvedValue({ size: 1024 }),
}))
vi.mock("fs/promises", () => fsPromises)
Expand Down Expand Up @@ -115,7 +135,7 @@ vi.mock("../../ignore/RooIgnoreController", () => ({
}))

vi.mock("../../../utils/fs", () => ({
fileExistsAtPath: vi.fn().mockReturnValue(true),
fileExistsAtPath: vi.fn().mockResolvedValue(true),
}))

// Global beforeEach to ensure clean mock state between all test suites
Expand Down Expand Up @@ -263,6 +283,12 @@ describe("read_file tool with maxReadFileLine setting", () => {
mockedPathResolve.mockReturnValue(absoluteFilePath)
mockedIsBinaryFile.mockResolvedValue(false)

// Default mock for validateFileSizeForContext - no limit
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: false,
safeContentLimit: -1,
})

mockInputContent = fileContent

// Setup the extractTextFromFile mock implementation with the current mockInputContent
Expand Down Expand Up @@ -382,8 +408,7 @@ describe("read_file tool with maxReadFileLine setting", () => {
expect(result).toContain(`<list_code_definition_names>`)

// Verify XML structure
expect(result).toContain("<notice>Showing only 0 of 5 total lines")
expect(result).toContain("</notice>")
expect(result).toContain("<notice>tools:readFile.showingOnlyLines</notice>")
expect(result).toContain("<list_code_definition_names>")
expect(result).toContain(sourceCodeDef.trim())
expect(result).toContain("</list_code_definition_names>")
Expand All @@ -409,7 +434,7 @@ describe("read_file tool with maxReadFileLine setting", () => {
expect(result).toContain(`<file><path>${testFilePath}</path>`)
expect(result).toContain(`<content lines="1-3">`)
expect(result).toContain(`<list_code_definition_names>`)
expect(result).toContain("<notice>Showing only 3 of 5 total lines")
expect(result).toContain("<notice>tools:readFile.showingOnlyLines</notice>")
})
})

Expand Down Expand Up @@ -523,6 +548,7 @@ describe("read_file tool XML output structure", () => {

mockedPathResolve.mockReturnValue(absoluteFilePath)
mockedIsBinaryFile.mockResolvedValue(false)
mockedCountFileLines.mockResolvedValue(5) // Default line count

// Set default implementation for extractTextFromFile
mockedExtractTextFromFile.mockImplementation((filePath) => {
Expand Down Expand Up @@ -1326,6 +1352,172 @@ describe("read_file tool XML output structure", () => {
)
})
})

describe("line range instructions", () => {
beforeEach(() => {
// Reset mocks
vi.clearAllMocks()

// Mock file system functions
vi.mocked(isBinaryFile).mockResolvedValue(false)
vi.mocked(countFileLines).mockResolvedValue(10000) // Large file
vi.mocked(readLines).mockResolvedValue("line content")
vi.mocked(extractTextFromFile).mockResolvedValue("file content")

// Mock addLineNumbers
vi.mocked(addLineNumbers).mockImplementation((content, start) => `${start || 1} | ${content}`)
})

it("should always include inline line_range instructions when shouldLimit is true", async () => {
// Mock a large file
vi.mocked(countFileLines).mockResolvedValue(10000)

// Mock contextValidator to return shouldLimit true
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: true,
safeContentLimit: 2000,
reason: "This is a partial read - the remaining content cannot be accessed due to context limitations.",
})

// Mock readPartialContent to return truncated content
vi.mocked(readPartialContent).mockResolvedValue({
content: "Line 1\nLine 2\n...truncated...",
charactersRead: 2000,
totalCharacters: 500000,
linesRead: 100,
totalLines: 10000,
lastLineRead: 100,
})

const result = await executeReadFileTool(
{ args: `<file><path>large-file.ts</path></file>` },
{ totalLines: 10000, maxReadFileLine: -1 },
)

// Verify the result contains the simplified partial read notice
expect(result).toContain("<notice>")
expect(result).toContain(
"This is a partial read - the remaining content cannot be accessed due to context limitations.",
)
})

it("should not show any special notice when file fits in context", async () => {
// Mock small file that fits in context
vi.mocked(countFileLines).mockResolvedValue(100)
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: false,
safeContentLimit: -1,
})

const result = await executeReadFileTool({ args: `<file><path>small-file.ts</path></file>` })

// Should have file content but no notice about limits
expect(result).toContain("<file>")
expect(result).toContain("<path>small-file.ts</path>")
expect(result).toContain("<content")
expect(result).not.toContain("Use line_range")
expect(result).not.toContain("File exceeds available context space")
})

it("should not include line_range instructions for single-line files", async () => {
// Mock a single-line file that exceeds context
vi.mocked(countFileLines).mockResolvedValue(1)

// Mock contextValidator to return shouldLimit true with single-line file message
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: true,
safeContentLimit: 5000,
reason: "This is a partial read - the remaining content cannot be accessed due to context limitations.",
})

// Mock readPartialContent to return truncated content for single-line file
vi.mocked(readPartialContent).mockResolvedValue({
content: "const a=1;const b=2;...truncated",
charactersRead: 5000,
totalCharacters: 10000,
linesRead: 1,
totalLines: 1,
lastLineRead: 1,
})

const result = await executeReadFileTool(
{ args: `<file><path>minified.js</path></file>` },
{ totalLines: 1, maxReadFileLine: -1 },
)

// Verify the result contains the simplified partial read notice
expect(result).toContain("<notice>")
expect(result).toContain(
"This is a partial read - the remaining content cannot be accessed due to context limitations.",
)
})

it("should include line_range instructions for multi-line files that exceed context", async () => {
// Mock a multi-line file that exceeds context
vi.mocked(countFileLines).mockResolvedValue(5000)

// Mock contextValidator to return shouldLimit true with multi-line file message
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: true,
safeContentLimit: 50000,
reason: "This is a partial read - the remaining content cannot be accessed due to context limitations.",
})

// Mock readPartialContent to return truncated content
vi.mocked(readPartialContent).mockResolvedValue({
content: "Line 1\nLine 2\n...truncated...",
charactersRead: 50000,
totalCharacters: 250000,
linesRead: 1000,
totalLines: 5000,
lastLineRead: 1000,
})

const result = await executeReadFileTool(
{ args: `<file><path>large-file.ts</path></file>` },
{ totalLines: 5000, maxReadFileLine: -1 },
)

// Verify the result contains the simplified partial read notice
expect(result).toContain("<notice>")
expect(result).toContain(
"This is a partial read - the remaining content cannot be accessed due to context limitations.",
)
})

it("should handle normal file read section for single-line files with validation notice", async () => {
// Mock a single-line file that has shouldLimit true but fits after truncation
vi.mocked(countFileLines).mockResolvedValue(1)

// Mock contextValidator to return shouldLimit true with a single-line file notice
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: true,
safeContentLimit: 8000,
reason: "This is a partial read - the remaining content cannot be accessed due to context limitations.",
})

// Mock readPartialContent for single-line file
vi.mocked(readPartialContent).mockResolvedValue({
content: "const a=1;const b=2;const c=3;",
charactersRead: 8000,
totalCharacters: 10000,
linesRead: 1,
totalLines: 1,
lastLineRead: 1,
})

const result = await executeReadFileTool(
{ args: `<file><path>semi-large.js</path></file>` },
{ totalLines: 1, maxReadFileLine: -1 },
)

// Verify the result contains the simplified partial read notice
expect(result).toContain("<notice>")
expect(result).toContain(
"This is a partial read - the remaining content cannot be accessed due to context limitations.",
)
})
})
})

describe("read_file tool with image support", () => {
Expand Down Expand Up @@ -1591,12 +1783,24 @@ describe("read_file tool with image support", () => {
mockedPathResolve.mockReturnValue(absolutePath)
mockedExtractTextFromFile.mockResolvedValue("PDF content extracted")

// Ensure the file is treated as binary and PDF is in supported formats
mockedIsBinaryFile.mockResolvedValue(true)
mockedCountFileLines.mockResolvedValue(0)
vi.mocked(getSupportedBinaryFormats).mockReturnValue([".pdf", ".docx", ".ipynb"])

// Mock contextValidator to not interfere with PDF processing
vi.mocked(contextValidatorModule.validateFileSizeForContext).mockResolvedValue({
shouldLimit: false,
safeContentLimit: -1,
})

// Execute
const result = await executeReadImageTool(binaryPath)

// Verify it uses extractTextFromFile instead
// Verify it doesn't treat the PDF as an image
expect(result).not.toContain("<image_data>")
// Make the test platform-agnostic by checking the call was made (path normalization can vary)

// Should call extractTextFromFile for PDF processing
expect(mockedExtractTextFromFile).toHaveBeenCalledTimes(1)
const callArgs = mockedExtractTextFromFile.mock.calls[0]
expect(callArgs[0]).toMatch(/[\\\/]test[\\\/]document\.pdf$/)
Expand Down
100 changes: 100 additions & 0 deletions src/core/tools/contextValidator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { Task } from "../task/Task"
import * as fs from "fs/promises"

/**
* Conservative buffer percentage for file reading.
* We use a very conservative estimate to ensure files fit in context.
*/
const FILE_READ_BUFFER_PERCENTAGE = 0.4 // 40% buffer for safety
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the 40% buffer intentionally this conservative? It might be worth making this configurable or adjusting based on model capabilities. Some models might handle closer-to-limit content better than others.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now yes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like we shouldn’t need to be so conservative here if the rest of the logic is working right

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah sorry-- I think I just picked a big number for the simple version


/**
* Very conservative character to token ratio
* Using 2.5 chars per token instead of 3-4 to be extra safe
*/
const CHARS_PER_TOKEN_CONSERVATIVE = 2.5

/**
* File size thresholds
*/
const TINY_FILE_SIZE = 10 * 1024 // 10KB - always safe
const SMALL_FILE_SIZE = 50 * 1024 // 50KB - safe if context is mostly empty
const MEDIUM_FILE_SIZE = 500 * 1024 // 500KB - needs validation
const LARGE_FILE_SIZE = 1024 * 1024 // 1MB - always limit

export interface ContextValidationResult {
shouldLimit: boolean
safeContentLimit: number // Character count limit
reason?: string
}

/**
* Simple validation based on file size and available context.
* Uses very conservative estimates to avoid context overflow.
*/
export async function validateFileSizeForContext(
filePath: string,
totalLines: number,
currentMaxReadFileLine: number,
cline: Task,
): Promise<ContextValidationResult> {
try {
// Get file size
const stats = await fs.stat(filePath)
const fileSizeBytes = stats.size

// Tiny files are always safe
if (fileSizeBytes < TINY_FILE_SIZE) {
return { shouldLimit: false, safeContentLimit: -1 }
}

// Get context information
const modelInfo = cline.api.getModel().info
const { contextTokens: currentContextTokens } = cline.getTokenUsage()
const contextWindow = modelInfo.contextWindow
const currentlyUsed = currentContextTokens || 0

// Calculate available space with conservative buffer
const remainingTokens = contextWindow - currentlyUsed
const usableTokens = Math.floor(remainingTokens * (1 - FILE_READ_BUFFER_PERCENTAGE))

// Reserve space for response (use 25% of remaining or 4096, whichever is smaller)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should use the common logic for this

const responseReserve = Math.min(Math.floor(usableTokens * 0.25), 4096)
const availableForFile = usableTokens - responseReserve

// Convert to conservative character estimate
const safeCharLimit = Math.floor(availableForFile * CHARS_PER_TOKEN_CONSERVATIVE)

// For small files with mostly empty context, allow full read
const contextUsagePercent = currentlyUsed / contextWindow
if (fileSizeBytes < SMALL_FILE_SIZE && contextUsagePercent < 0.3) {
return { shouldLimit: false, safeContentLimit: -1 }
}

// For medium files, check if they fit within safe limit
if (fileSizeBytes < MEDIUM_FILE_SIZE && fileSizeBytes <= safeCharLimit) {
return { shouldLimit: false, safeContentLimit: -1 }
}

// For large files or when approaching limits, always limit
if (fileSizeBytes > safeCharLimit || fileSizeBytes > LARGE_FILE_SIZE) {
// Use a very conservative limit
const finalLimit = Math.min(safeCharLimit, 100000) // Cap at 100K chars
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might annoy people who are trying to use a model with a large context window to read large files

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the plan for this PR was to do something stupid to fix the temporary error-- basically never reading big files

This PR has the full implementation and doesn't have that limit
#6319


return {
shouldLimit: true,
safeContentLimit: finalLimit,
reason: "This is a partial read - the remaining content cannot be accessed due to context limitations.",
}
}

return { shouldLimit: false, safeContentLimit: -1 }
} catch (error) {
// On any error, use ultra-conservative defaults
console.warn(`[validateFileSizeForContext] Error during validation: ${error}`)
return {
shouldLimit: true,
safeContentLimit: 50000, // 50K chars as safe fallback
reason: "This is a partial read - the remaining content cannot be accessed due to context limitations.",
}
}
}
Loading
Loading