Skip to content

Commit 9aa04a3

Browse files
authored
Add limit for files Cline reads into context (RooCodeInc#2199)
* Revert "Merge pull request RooCodeInc#1964 from cline/ocasta181/ENG-209" This reverts commit ca17a06, reversing changes made to 6b9c2a1. * Set hard 300kb limit on file size cline reads into context * Add changeset
1 parent ca17a06 commit 9aa04a3

File tree

10 files changed

+64
-391
lines changed

10 files changed

+64
-391
lines changed

.changeset/late-cougars-hear.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"claude-dev": patch
3+
---
4+
5+
Add hard limit for file size Cline reads into context

.changeset/seven-flowers-lay.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

src/core/Cline.ts

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ import { ClineHandler } from "../api/providers/cline"
6262
import { ClineProvider, GlobalFileNames } from "./webview/ClineProvider"
6363
import { DEFAULT_LANGUAGE_SETTINGS, getLanguageKey, LanguageDisplay, LanguageKey } from "../shared/Languages"
6464
import { telemetryService } from "../services/telemetry/TelemetryService"
65-
import { getMaxAllowedSize } from "../utils/content-size"
6665

6766
const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution
6867

@@ -1354,8 +1353,25 @@ export class Cline {
13541353
if (previousRequest && previousRequest.text) {
13551354
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
13561355
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
1357-
let contextWindow = this.api.getModel().info.contextWindow || 64_000 // minimum context (Deepseek)
1358-
const maxAllowedSize = getMaxAllowedSize(contextWindow)
1356+
let contextWindow = this.api.getModel().info.contextWindow || 128_000
1357+
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
1358+
if (this.api instanceof OpenAiHandler && this.api.getModel().id.toLowerCase().includes("deepseek")) {
1359+
contextWindow = 64_000
1360+
}
1361+
let maxAllowedSize: number
1362+
switch (contextWindow) {
1363+
case 64_000: // deepseek models
1364+
maxAllowedSize = contextWindow - 27_000
1365+
break
1366+
case 128_000: // most models
1367+
maxAllowedSize = contextWindow - 30_000
1368+
break
1369+
case 200_000: // claude models
1370+
maxAllowedSize = contextWindow - 40_000
1371+
break
1372+
default:
1373+
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
1374+
}
13591375

13601376
// This is the most reliable way to know when we're close to hitting the context window.
13611377
if (totalTokens >= maxAllowedSize) {
@@ -1999,17 +2015,15 @@ export class Cline {
19992015
}
20002016
telemetryService.captureToolUsage(this.taskId, block.name, false, true)
20012017
}
2002-
// Get context window and used context from API model
2003-
const contextWindow = this.api.getModel().info.contextWindow
2004-
2005-
// Pass the raw context window size - extractTextFromFile will calculate the appropriate limit
2006-
const content = await extractTextFromFile(absolutePath, contextWindow)
2018+
// now execute the tool like normal
2019+
const content = await extractTextFromFile(absolutePath)
20072020
pushToolResult(content)
20082021

20092022
break
20102023
}
20112024
} catch (error) {
20122025
await handleError("reading file", error)
2026+
20132027
break
20142028
}
20152029
}
@@ -3376,10 +3390,9 @@ export class Cline {
33763390
block.text.includes("<task>") ||
33773391
block.text.includes("<user_message>")
33783392
) {
3379-
let contextWindow = this.api.getModel().info.contextWindow
33803393
return {
33813394
...block,
3382-
text: await parseMentions(block.text, cwd, this.urlContentFetcher, contextWindow),
3395+
text: await parseMentions(block.text, cwd, this.urlContentFetcher),
33833396
}
33843397
}
33853398
}
@@ -3484,12 +3497,11 @@ export class Cline {
34843497
}
34853498
}
34863499
}
3487-
34883500
// only show inactive terminals if there's output to show
34893501
if (inactiveTerminals.length > 0) {
34903502
const inactiveTerminalOutputs = new Map<number, string>()
34913503
for (const inactiveTerminal of inactiveTerminals) {
3492-
const newOutput = await this.terminalManager.getUnretrievedOutput(inactiveTerminal.id)
3504+
const newOutput = this.terminalManager.getUnretrievedOutput(inactiveTerminal.id)
34933505
if (newOutput) {
34943506
inactiveTerminalOutputs.set(inactiveTerminal.id, newOutput)
34953507
}

src/core/mentions/index.ts

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,7 @@ export function openMention(mention?: string): void {
3838
}
3939
}
4040

41-
export async function parseMentions(
42-
text: string,
43-
cwd: string,
44-
urlContentFetcher: UrlContentFetcher,
45-
contextWindow?: number,
46-
): Promise<string> {
41+
export async function parseMentions(text: string, cwd: string, urlContentFetcher: UrlContentFetcher): Promise<string> {
4742
const mentions: Set<string> = new Set()
4843
let parsedText = text.replace(mentionRegexGlobal, (match, mention) => {
4944
mentions.add(mention)
@@ -95,7 +90,7 @@ export async function parseMentions(
9590
} else if (mention.startsWith("/")) {
9691
const mentionPath = mention.slice(1)
9792
try {
98-
const content = await getFileOrFolderContent(mentionPath, cwd, contextWindow)
93+
const content = await getFileOrFolderContent(mentionPath, cwd)
9994
if (mention.endsWith("/")) {
10095
parsedText += `\n\n<folder_content path="${mentionPath}">\n${content}\n</folder_content>`
10196
} else {
@@ -150,7 +145,7 @@ export async function parseMentions(
150145
return parsedText
151146
}
152147

153-
async function getFileOrFolderContent(mentionPath: string, cwd: string, contextWindow?: number): Promise<string> {
148+
async function getFileOrFolderContent(mentionPath: string, cwd: string): Promise<string> {
154149
const absPath = path.resolve(cwd, mentionPath)
155150

156151
try {
@@ -161,7 +156,7 @@ async function getFileOrFolderContent(mentionPath: string, cwd: string, contextW
161156
if (isBinary) {
162157
return "(Binary file, unable to display content)"
163158
}
164-
const content = await extractTextFromFile(absPath, contextWindow)
159+
const content = await extractTextFromFile(absPath)
165160
return content
166161
} else if (stats.isDirectory()) {
167162
const entries = await fs.readdir(absPath, { withFileTypes: true })
@@ -182,7 +177,7 @@ async function getFileOrFolderContent(mentionPath: string, cwd: string, contextW
182177
if (isBinary) {
183178
return undefined
184179
}
185-
const content = await extractTextFromFile(absoluteFilePath, contextWindow)
180+
const content = await extractTextFromFile(absoluteFilePath)
186181
return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
187182
} catch (error) {
188183
return undefined

src/integrations/misc/extract-text.test.ts

Lines changed: 0 additions & 67 deletions
This file was deleted.

src/integrations/misc/extract-text.ts

Lines changed: 11 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -4,102 +4,35 @@ import pdf from "pdf-parse/lib/pdf-parse"
44
import mammoth from "mammoth"
55
import fs from "fs/promises"
66
import { isBinaryFile } from "isbinaryfile"
7-
import { estimateContentSize, estimateFileSize, wouldExceedSizeLimit, getMaxAllowedSize } from "../../utils/content-size"
8-
import { ContentTooLargeError } from "../../shared/errors"
7+
import { getFileSizeInKB } from "../../utils/fs"
98

10-
/**
11-
* Checks if terminal output would exceed size limits and returns the content if safe
12-
* @param content The terminal output content to check
13-
* @param contextWindow Context window limit in tokens
14-
* @param command The command that generated this output (for error reporting)
15-
* @returns The validated content
16-
* @throws ContentTooLargeError if content exceeds size limit
17-
*/
18-
export async function extractTextFromTerminal(content: string | Buffer, contextWindow: number, command: string): Promise<string> {
19-
console.debug(`[TERMINAL_SIZE_CHECK] Checking size for command output: ${command}`)
20-
21-
// Convert to string but don't trim yet
22-
const rawContent = content.toString()
23-
console.debug(`[TERMINAL_SIZE_CHECK] Raw content length: ${rawContent.length}`)
24-
25-
// Check size before trimming
26-
const sizeEstimate = estimateContentSize(rawContent, contextWindow)
27-
console.debug(`[TERMINAL_SIZE_CHECK] Content size: ${sizeEstimate.bytes} bytes`)
28-
console.debug(`[TERMINAL_SIZE_CHECK] Estimated tokens: ${sizeEstimate.estimatedTokens}`)
29-
console.debug(`[TERMINAL_SIZE_CHECK] Context window: ${contextWindow}`)
30-
31-
if (sizeEstimate.wouldExceedLimit) {
32-
console.debug(`[TERMINAL_SIZE_CHECK] Output exceeds size limit`)
33-
throw new ContentTooLargeError({
34-
type: "terminal",
35-
command,
36-
size: sizeEstimate,
37-
})
38-
}
39-
40-
// Only trim after size check passes
41-
const cleanContent = rawContent.trim()
42-
console.debug(`[TERMINAL_SIZE_CHECK] Clean content length: ${cleanContent.length}`)
43-
console.debug(`[TERMINAL_SIZE_CHECK] Size check passed`)
44-
return cleanContent
45-
}
46-
47-
export async function extractTextFromFile(
48-
filePath: string,
49-
contextWindow: number = 64_000 /* minimum context (Deepseek) */,
50-
): Promise<string> {
9+
export async function extractTextFromFile(filePath: string): Promise<string> {
5110
try {
5211
await fs.access(filePath)
5312
} catch (error) {
5413
throw new Error(`File not found: ${filePath}`)
5514
}
56-
57-
console.debug(`[FILE_READ_CHECK] Checking size for file: ${filePath}`)
58-
59-
// Get file stats to check size
60-
const stats = await fs.stat(filePath)
61-
console.debug(`[FILE_SIZE_CHECK] File size: ${stats.size} bytes`)
62-
63-
// Calculate max allowed size from context window
64-
const maxAllowedSize = getMaxAllowedSize(contextWindow)
65-
console.debug(`[FILE_SIZE_CHECK] Max allowed size: ${maxAllowedSize} tokens`)
66-
67-
// Check if file size would exceed limit before attempting to read
68-
// This is more efficient than creating a full SizeEstimate object when we just need a boolean check
69-
if (wouldExceedSizeLimit(stats.size, contextWindow)) {
70-
console.debug(`[FILE_SIZE_CHECK] File exceeds size limit`)
71-
// Only create the full size estimate when we need it for the error
72-
const sizeEstimate = await estimateFileSize(filePath, maxAllowedSize)
73-
throw new ContentTooLargeError({
74-
type: "file",
75-
path: filePath,
76-
size: sizeEstimate,
77-
})
78-
}
79-
console.debug(`[FILE_SIZE_CHECK] File size check passed`)
8015
const fileExtension = path.extname(filePath).toLowerCase()
81-
console.debug(`[FILE_READ] Reading file: ${filePath}`)
82-
let content: string
8316
switch (fileExtension) {
8417
case ".pdf":
85-
content = await extractTextFromPDF(filePath)
86-
break
18+
return extractTextFromPDF(filePath)
8719
case ".docx":
88-
content = await extractTextFromDOCX(filePath)
89-
break
20+
return extractTextFromDOCX(filePath)
9021
case ".ipynb":
91-
content = await extractTextFromIPYNB(filePath)
92-
break
22+
return extractTextFromIPYNB(filePath)
9323
default:
9424
const isBinary = await isBinaryFile(filePath).catch(() => false)
9525
if (!isBinary) {
96-
content = await fs.readFile(filePath, "utf8")
26+
// If file is over 300KB, throw an error
27+
const fileSizeInKB = await getFileSizeInKB(filePath)
28+
if (fileSizeInKB > 300) {
29+
throw new Error(`File is too large to read into context.`)
30+
}
31+
return await fs.readFile(filePath, "utf8")
9732
} else {
9833
throw new Error(`Cannot read text for file type: ${fileExtension}`)
9934
}
10035
}
101-
console.debug(`[FILE_READ_COMPLETE] File read complete. Content length: ${content.length} chars`)
102-
return content
10336
}
10437

10538
async function extractTextFromPDF(filePath: string): Promise<string> {

src/shared/errors.ts

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)