Skip to content

Commit b381358

Browse files
committed
feat: implement provider-aware large file reading with context validation
- Add contextValidator module for intelligent context window management - Implement dynamic token calculation based on model capabilities - Support multiple file handling strategies (truncate, chunk, fail) - Integrate context validation into readFileTool - Add clear user guidance when files exceed context limits - Implement chunked file reading for large files - Add comprehensive error messages for better UX This addresses issue #8038 by preventing context window exhaustion when reading large or multiple files.
1 parent 2263d86 commit b381358

File tree

3 files changed

+790
-3
lines changed

3 files changed

+790
-3
lines changed
Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest"
2+
import * as fs from "fs/promises"
3+
import {
4+
validateFileContext,
5+
validateMultipleFiles,
6+
calculateAvailableTokens,
7+
readFileInChunks,
8+
FileReadingConfig,
9+
} from "../contextValidator"
10+
import type { ModelInfo } from "@roo-code/types"
11+
12+
// Define types that are internal to contextValidator
13+
interface ValidationOptions {
14+
model: ModelInfo
15+
apiConfiguration: any
16+
currentTokenUsage: number
17+
config: FileReadingConfig
18+
partialReadsEnabled: boolean
19+
}
20+
21+
// Mock fs module
22+
vi.mock("fs/promises")
23+
24+
describe("contextValidator", () => {
25+
const mockModelInfo: ModelInfo = {
26+
contextWindow: 10000,
27+
maxTokens: 4000,
28+
supportsImages: false,
29+
supportsPromptCache: false,
30+
inputPrice: 0,
31+
outputPrice: 0,
32+
cacheWritesPrice: 0,
33+
cacheReadsPrice: 0,
34+
description: "Test model",
35+
}
36+
37+
const defaultConfig: FileReadingConfig = {
38+
largeFileHandling: "truncate",
39+
safetyBufferPercent: 25,
40+
maxChunkLines: 1000,
41+
showDefinitionsOnTruncate: true,
42+
}
43+
44+
const defaultOptions: ValidationOptions = {
45+
model: mockModelInfo,
46+
apiConfiguration: {},
47+
currentTokenUsage: 0,
48+
config: defaultConfig,
49+
partialReadsEnabled: true,
50+
}
51+
52+
beforeEach(() => {
53+
vi.clearAllMocks()
54+
})
55+
56+
describe("calculateAvailableTokens", () => {
57+
it("should calculate available tokens with safety buffer", () => {
58+
const result = calculateAvailableTokens(mockModelInfo, {}, 2000, 25)
59+
// Context window: 10000
60+
// Max output: 4000
61+
// Usable: 10000 - 4000 = 6000
62+
// Current usage: 2000
63+
// Available before buffer: 6000 - 2000 = 4000
64+
// With 25% buffer: 4000 * 0.75 = 3000
65+
expect(result).toBe(3000)
66+
})
67+
68+
it("should handle models without maxTokens", () => {
69+
const modelWithoutMax = { ...mockModelInfo, maxTokens: undefined }
70+
const result = calculateAvailableTokens(modelWithoutMax, {}, 2000, 25)
71+
// Context window: 10000
72+
// No max output, use 20% of context: 2000
73+
// Usable: 10000 - 2000 = 8000
74+
// Current usage: 2000
75+
// Available before buffer: 8000 - 2000 = 6000
76+
// With 25% buffer: 6000 * 0.75 = 4500
77+
expect(result).toBe(4500)
78+
})
79+
80+
it("should return 0 when context is exhausted", () => {
81+
const result = calculateAvailableTokens(mockModelInfo, {}, 8000, 25)
82+
expect(result).toBe(0)
83+
})
84+
85+
it("should handle API configuration overrides", () => {
86+
const apiConfig = { maxTokens: 2000 }
87+
const result = calculateAvailableTokens(mockModelInfo, apiConfig, 1000, 25)
88+
// API override: 2000
89+
// Current usage: 1000
90+
// Available before buffer: 2000 - 1000 = 1000
91+
// With 25% buffer: 1000 * 0.75 = 750
92+
expect(result).toBe(750)
93+
})
94+
})
95+
96+
describe("validateFileContext", () => {
97+
it("should validate small file successfully", async () => {
98+
const fileContent = "Line 1\nLine 2\nLine 3"
99+
vi.mocked(fs.stat).mockResolvedValue({ size: fileContent.length } as any)
100+
vi.mocked(fs.readFile).mockResolvedValue(fileContent)
101+
102+
const result = await validateFileContext("/test/file.txt", defaultOptions)
103+
104+
expect(result.canRead).toBe(true)
105+
expect(result.estimatedTokens).toBeGreaterThan(0)
106+
expect(result.suggestedAction).toBe("read_full")
107+
})
108+
109+
it("should suggest partial read for large files when truncate is enabled", async () => {
110+
const largeContent = Array(10000).fill("This is a long line of text").join("\n")
111+
vi.mocked(fs.stat).mockResolvedValue({ size: largeContent.length } as any)
112+
vi.mocked(fs.readFile).mockResolvedValue(largeContent)
113+
114+
const result = await validateFileContext("/test/large.txt", defaultOptions)
115+
116+
expect(result.canRead).toBe(true)
117+
expect(result.suggestedAction).toBe("read_partial")
118+
expect(result.maxSafeLines).toBeLessThan(10000)
119+
expect(result.message).toContain("truncated")
120+
})
121+
122+
it('should fail for large files when largeFileHandling is "fail"', async () => {
123+
const largeContent = Array(10000).fill("This is a long line of text").join("\n")
124+
vi.mocked(fs.stat).mockResolvedValue({ size: largeContent.length } as any)
125+
vi.mocked(fs.readFile).mockResolvedValue(largeContent)
126+
127+
const failOptions = {
128+
...defaultOptions,
129+
config: { ...defaultConfig, largeFileHandling: "fail" as const },
130+
}
131+
132+
const result = await validateFileContext("/test/large.txt", failOptions)
133+
134+
expect(result.canRead).toBe(false)
135+
expect(result.message).toContain("exceeds available context")
136+
})
137+
138+
it("should suggest chunked reading when enabled", async () => {
139+
const largeContent = Array(10000).fill("This is a long line of text").join("\n")
140+
vi.mocked(fs.stat).mockResolvedValue({ size: largeContent.length } as any)
141+
vi.mocked(fs.readFile).mockResolvedValue(largeContent)
142+
143+
const chunkOptions = {
144+
...defaultOptions,
145+
config: { ...defaultConfig, largeFileHandling: "chunk" as const },
146+
}
147+
148+
const result = await validateFileContext("/test/large.txt", chunkOptions)
149+
150+
expect(result.canRead).toBe(true)
151+
expect(result.suggestedAction).toBe("read_chunks")
152+
expect(result.message).toContain("chunks")
153+
})
154+
155+
it("should handle binary files", async () => {
156+
vi.mocked(fs.stat).mockResolvedValue({ size: 1000000 } as any)
157+
// Simulate binary file by throwing encoding error
158+
vi.mocked(fs.readFile).mockRejectedValue(new Error("Invalid UTF-8"))
159+
160+
const result = await validateFileContext("/test/binary.bin", defaultOptions)
161+
162+
expect(result.canRead).toBe(false)
163+
expect(result.isBinary).toBe(true)
164+
expect(result.message).toContain("binary file")
165+
})
166+
167+
it("should handle minified files with very long lines", async () => {
168+
const minifiedContent = "a".repeat(100000) // Single very long line
169+
vi.mocked(fs.stat).mockResolvedValue({ size: minifiedContent.length } as any)
170+
vi.mocked(fs.readFile).mockResolvedValue(minifiedContent)
171+
172+
const result = await validateFileContext("/test/minified.js", defaultOptions)
173+
174+
expect(result.canRead).toBe(true)
175+
expect(result.suggestedAction).toBe("read_partial")
176+
expect(result.message).toContain("minified")
177+
})
178+
179+
it("should respect partialReadsEnabled flag", async () => {
180+
const largeContent = Array(10000).fill("This is a long line of text").join("\n")
181+
vi.mocked(fs.stat).mockResolvedValue({ size: largeContent.length } as any)
182+
vi.mocked(fs.readFile).mockResolvedValue(largeContent)
183+
184+
const noPartialOptions = {
185+
...defaultOptions,
186+
partialReadsEnabled: false,
187+
}
188+
189+
const result = await validateFileContext("/test/large.txt", noPartialOptions)
190+
191+
expect(result.canRead).toBe(false)
192+
expect(result.message).toContain("Partial reads are disabled")
193+
})
194+
})
195+
196+
describe("validateMultipleFiles", () => {
197+
it("should validate multiple files and track cumulative token usage", async () => {
198+
const file1Content = "Small file 1"
199+
const file2Content = "Small file 2"
200+
201+
vi.mocked(fs.stat)
202+
.mockResolvedValueOnce({ size: file1Content.length } as any)
203+
.mockResolvedValueOnce({ size: file2Content.length } as any)
204+
205+
vi.mocked(fs.readFile).mockResolvedValueOnce(file1Content).mockResolvedValueOnce(file2Content)
206+
207+
const result = await validateMultipleFiles(["/test/file1.txt", "/test/file2.txt"], defaultOptions)
208+
209+
expect(result.size).toBe(2)
210+
const validation1 = result.get("/test/file1.txt")
211+
const validation2 = result.get("/test/file2.txt")
212+
213+
expect(validation1?.canRead).toBe(true)
214+
expect(validation2?.canRead).toBe(true)
215+
})
216+
217+
it("should handle when combined files exceed context", async () => {
218+
// Create files that individually fit but together exceed context
219+
const largeContent = Array(2000).fill("This is a long line of text").join("\n")
220+
221+
vi.mocked(fs.stat).mockResolvedValue({ size: largeContent.length } as any)
222+
vi.mocked(fs.readFile).mockResolvedValue(largeContent)
223+
224+
const result = await validateMultipleFiles(
225+
["/test/file1.txt", "/test/file2.txt", "/test/file3.txt"],
226+
defaultOptions,
227+
)
228+
229+
// At least one file should be marked for truncation or failure
230+
const validations = Array.from(result.values())
231+
const hasPartialReads = validations.some((v) => v.suggestedAction === "read_partial")
232+
const hasFailures = validations.some((v) => !v.canRead)
233+
234+
expect(hasPartialReads || hasFailures).toBe(true)
235+
})
236+
})
237+
238+
describe("readFileInChunks", () => {
239+
it("should read file in chunks", async () => {
240+
const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`)
241+
const content = lines.join("\n")
242+
243+
vi.mocked(fs.readFile).mockResolvedValue(content)
244+
245+
const chunks: any[] = []
246+
for await (const chunk of readFileInChunks("/test/file.txt", 30, 100)) {
247+
chunks.push(chunk)
248+
}
249+
250+
expect(chunks.length).toBeGreaterThan(1)
251+
expect(chunks[0].startLine).toBe(1)
252+
expect(chunks[0].endLine).toBe(30)
253+
expect(chunks[chunks.length - 1].isLastChunk).toBe(true)
254+
})
255+
256+
it("should handle files smaller than chunk size", async () => {
257+
const lines = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`)
258+
const content = lines.join("\n")
259+
260+
vi.mocked(fs.readFile).mockResolvedValue(content)
261+
262+
const chunks: any[] = []
263+
for await (const chunk of readFileInChunks("/test/file.txt", 30, 10)) {
264+
chunks.push(chunk)
265+
}
266+
267+
expect(chunks.length).toBe(1)
268+
expect(chunks[0].startLine).toBe(1)
269+
expect(chunks[0].endLine).toBe(10)
270+
expect(chunks[0].isLastChunk).toBe(true)
271+
})
272+
273+
it("should handle empty files", async () => {
274+
vi.mocked(fs.readFile).mockResolvedValue("")
275+
276+
const chunks: any[] = []
277+
for await (const chunk of readFileInChunks("/test/empty.txt", 30, 0)) {
278+
chunks.push(chunk)
279+
}
280+
281+
expect(chunks.length).toBe(0)
282+
})
283+
})
284+
285+
describe("edge cases", () => {
286+
it("should handle file read errors gracefully", async () => {
287+
vi.mocked(fs.stat).mockRejectedValue(new Error("File not found"))
288+
289+
const result = await validateFileContext("/test/nonexistent.txt", defaultOptions)
290+
291+
expect(result.canRead).toBe(false)
292+
expect(result.message).toContain("Error reading file")
293+
})
294+
295+
it("should handle extremely large safety buffers", async () => {
296+
const content = "Small file"
297+
vi.mocked(fs.stat).mockResolvedValue({ size: content.length } as any)
298+
vi.mocked(fs.readFile).mockResolvedValue(content)
299+
300+
const highBufferOptions = {
301+
...defaultOptions,
302+
config: { ...defaultConfig, safetyBufferPercent: 90 },
303+
}
304+
305+
const result = await validateFileContext("/test/file.txt", highBufferOptions)
306+
307+
// Even small files might not fit with 90% buffer
308+
expect(result.estimatedTokens).toBeGreaterThan(0)
309+
})
310+
311+
it("should handle models with very small context windows", async () => {
312+
const smallModel = { ...mockModelInfo, contextWindow: 100, maxTokens: 20 }
313+
const content = "This is a test file with some content"
314+
315+
vi.mocked(fs.stat).mockResolvedValue({ size: content.length } as any)
316+
vi.mocked(fs.readFile).mockResolvedValue(content)
317+
318+
const smallModelOptions = {
319+
...defaultOptions,
320+
model: smallModel,
321+
}
322+
323+
const result = await validateFileContext("/test/file.txt", smallModelOptions)
324+
325+
// File might not fit in very small context
326+
if (!result.canRead) {
327+
expect(result.message).toContain("exceeds")
328+
} else {
329+
expect(result.suggestedAction).toBe("read_partial")
330+
}
331+
})
332+
})
333+
})

0 commit comments

Comments
 (0)