diff --git a/packages/types/src/file-limits.ts b/packages/types/src/file-limits.ts new file mode 100644 index 0000000000..c215148c94 --- /dev/null +++ b/packages/types/src/file-limits.ts @@ -0,0 +1,39 @@ +/** + * File size and limit constants used across the application + */ + +/** + * Files larger than this threshold will be checked for token count + * to prevent consuming too much of the context window + */ +export const LARGE_FILE_SIZE_THRESHOLD = 100 * 1024 // 100KB + +/** + * Files larger than this size will have the safeguard applied automatically + * without token counting + */ +export const VERY_LARGE_FILE_SIZE = 1024 * 1024 // 1MB + +/** + * Default number of lines to read when applying the large file safeguard + */ +export const FALLBACK_MAX_LINES = 2000 + +/** + * Maximum character count for file reading when safeguard is applied. + * Based on typical token-to-character ratio (1 token ≈ 4 characters), + * this ensures we don't consume too much of the context window. + * For a 100k token context window at 50%, this would be ~200k characters. + */ +export const MAX_CHAR_LIMIT = 200_000 // 200k characters + +/** + * Percentage of the context window to use as the maximum token threshold + * for file reading operations + */ +export const CONTEXT_WINDOW_PERCENTAGE = 0.5 // 50% + +/** + * Average characters per token ratio used for estimation + */ +export const CHARS_PER_TOKEN_RATIO = 4 diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index 44937da235..952ad1fd0f 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -4,6 +4,7 @@ export * from "./api.js" export * from "./codebase-index.js" export * from "./cloud.js" export * from "./experiment.js" +export * from "./file-limits.js" export * from "./followup.js" export * from "./global-settings.js" export * from "./history.js" diff --git a/src/core/tools/__tests__/readFileTool.spec.ts b/src/core/tools/__tests__/readFileTool.spec.ts index 44be1d3b92..5f7a7a74d4 100644 --- a/src/core/tools/__tests__/readFileTool.spec.ts +++ b/src/core/tools/__tests__/readFileTool.spec.ts @@ -1,15 +1,18 @@ // npx vitest src/core/tools/__tests__/readFileTool.spec.ts import * as path from "path" +import { stat } from "fs/promises" import { countFileLines } from "../../../integrations/misc/line-counter" import { readLines } from "../../../integrations/misc/read-lines" +import { readLinesWithCharLimit } from "../../../integrations/misc/read-lines-char-limit" import { extractTextFromFile } from "../../../integrations/misc/extract-text" import { parseSourceCodeDefinitionsForFile } from "../../../services/tree-sitter" import { isBinaryFile } from "isbinaryfile" import { ReadFileToolUse, ToolParamName, ToolResponse } from "../../../shared/tools" import { readFileTool } from "../readFileTool" import { formatResponse } from "../../prompts/responses" +import { tiktoken } from "../../../utils/tiktoken" vi.mock("path", async () => { const originalPath = await vi.importActual("path") @@ -24,29 +27,40 @@ vi.mock("fs/promises", () => ({ mkdir: vi.fn().mockResolvedValue(undefined), writeFile: vi.fn().mockResolvedValue(undefined), readFile: vi.fn().mockResolvedValue("{}"), + stat: vi.fn().mockResolvedValue({ size: 1024 }), // Default 1KB file })) vi.mock("isbinaryfile") vi.mock("../../../integrations/misc/line-counter") vi.mock("../../../integrations/misc/read-lines") +vi.mock("../../../integrations/misc/read-lines-char-limit") // Mock input content for tests let mockInputContent = "" // First create all the mocks -vi.mock("../../../integrations/misc/extract-text") +vi.mock("../../../integrations/misc/extract-text", () => ({ + extractTextFromFile: vi.fn(), + addLineNumbers: vi.fn(), + getSupportedBinaryFormats: vi.fn(() => [".pdf", ".docx", ".ipynb"]), +})) vi.mock("../../../services/tree-sitter") +vi.mock("../../../utils/tiktoken") + +// Import the mocked functions +import { addLineNumbers, getSupportedBinaryFormats } from "../../../integrations/misc/extract-text" // Then create the mock functions -const addLineNumbersMock = vi.fn().mockImplementation((text, startLine = 1) => { +const addLineNumbersMock = vi.mocked(addLineNumbers) +addLineNumbersMock.mockImplementation((text: string, startLine = 1) => { if (!text) return "" const lines = typeof text === "string" ? text.split("\n") : [text] - return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n") + return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n") }) -const extractTextFromFileMock = vi.fn() -const getSupportedBinaryFormatsMock = vi.fn(() => [".pdf", ".docx", ".ipynb"]) +const extractTextFromFileMock = vi.mocked(extractTextFromFile) +const getSupportedBinaryFormatsMock = vi.mocked(getSupportedBinaryFormats) vi.mock("../../ignore/RooIgnoreController", () => ({ RooIgnoreController: class { @@ -127,6 +141,15 @@ describe("read_file tool with maxReadFileLine setting", () => { mockCline.recordToolUsage = vi.fn().mockReturnValue(undefined) mockCline.recordToolError = vi.fn().mockReturnValue(undefined) + // Add default api mock + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + toolResult = undefined }) @@ -383,6 +406,15 @@ describe("read_file tool XML output structure", () => { mockCline.recordToolError = vi.fn().mockReturnValue(undefined) mockCline.didRejectTool = false + // Add default api mock + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + toolResult = undefined }) @@ -520,3 +552,439 @@ describe("read_file tool XML output structure", () => { }) }) }) + +describe("read_file tool with large file safeguard", () => { + // Test data + const testFilePath = "test/largefile.txt" + const absoluteFilePath = "/test/largefile.txt" + + // Mocked functions + const mockedCountFileLines = vi.mocked(countFileLines) + const mockedReadLines = vi.mocked(readLines) + const mockedExtractTextFromFile = vi.mocked(extractTextFromFile) + const mockedIsBinaryFile = vi.mocked(isBinaryFile) + const mockedPathResolve = vi.mocked(path.resolve) + const mockedTiktoken = vi.mocked(tiktoken) + const mockedStat = vi.mocked(stat) + + const mockCline: any = {} + let mockProvider: any + let toolResult: ToolResponse | undefined + + beforeEach(() => { + vi.clearAllMocks() + + mockedPathResolve.mockReturnValue(absoluteFilePath) + mockedIsBinaryFile.mockResolvedValue(false) + + mockProvider = { + getState: vi.fn(), + deref: vi.fn().mockReturnThis(), + } + + mockCline.cwd = "/" + mockCline.task = "Test" + mockCline.providerRef = mockProvider + mockCline.rooIgnoreController = { + validateAccess: vi.fn().mockReturnValue(true), + } + mockCline.say = vi.fn().mockResolvedValue(undefined) + mockCline.ask = vi.fn().mockResolvedValue({ response: "yesButtonClicked" }) + mockCline.fileContextTracker = { + trackFileContext: vi.fn().mockResolvedValue(undefined), + } + mockCline.recordToolUsage = vi.fn().mockReturnValue(undefined) + mockCline.recordToolError = vi.fn().mockReturnValue(undefined) + + // Add default api mock + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + toolResult = undefined + }) + + async function executeReadFileTool( + params: Partial = {}, + options: { + maxReadFileLine?: number + totalLines?: number + tokenCount?: number + fileSize?: number + } = {}, + ): Promise { + const maxReadFileLine = options.maxReadFileLine ?? -1 + const totalLines = options.totalLines ?? 5 + const tokenCount = options.tokenCount ?? 100 + const fileSize = options.fileSize ?? 1024 // Default 1KB + + mockProvider.getState.mockResolvedValue({ maxReadFileLine }) + mockedCountFileLines.mockResolvedValue(totalLines) + mockedTiktoken.mockResolvedValue(tokenCount) + mockedStat.mockResolvedValue({ size: fileSize } as any) + + const argsContent = `${testFilePath}` + + const toolUse: ReadFileToolUse = { + type: "tool_use", + name: "read_file", + params: { args: argsContent, ...params }, + partial: false, + } + + await readFileTool( + mockCline, + toolUse, + mockCline.ask, + vi.fn(), + (result: ToolResponse) => { + toolResult = result + }, + (_: ToolParamName, content?: string) => content ?? "", + ) + + return toolResult + } + + describe("when file has large size and high token count", () => { + it("should apply safeguard and read only first 2000 lines", async () => { + // Setup - large file with high token count + const largeFileContent = Array(15000).fill("This is a line of text").join("\n") + const partialContent = Array(2000).fill("This is a line of text").join("\n") + + mockedExtractTextFromFile.mockResolvedValue(largeFileContent) + + // Mock readLinesWithCharLimit + const mockedReadLinesWithCharLimit = vi.mocked(readLinesWithCharLimit) + mockedReadLinesWithCharLimit.mockResolvedValue({ + content: partialContent, + linesRead: 2000, + charactersRead: partialContent.length, + wasTruncated: true, + }) + + // Setup addLineNumbers mock for this test + addLineNumbersMock.mockImplementation((text: string) => { + const lines = text.split("\n") + return lines.map((line: string, i: number) => `${i + 1} | ${line}`).join("\n") + }) + + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + // Execute with large file size and high token count + const result = await executeReadFileTool( + {}, + { + maxReadFileLine: -1, + totalLines: 15000, + tokenCount: 60000, // Above threshold + fileSize: 200 * 1024, // 200KB - above threshold + }, + ) + + // Verify safeguard was applied + expect(mockedTiktoken).toHaveBeenCalled() + expect(mockedReadLinesWithCharLimit).toHaveBeenCalled() + + // Verify the result contains the safeguard notice + expect(result).toContain("readFile.safeguardNotice") + expect(result).toContain(``) + }) + + it("should not apply safeguard when token count is below threshold", async () => { + // Setup - large file but with low token count + const fileContent = Array(15000).fill("Short").join("\n") + const numberedContent = fileContent + .split("\n") + .map((line, i) => `${i + 1} | ${line}`) + .join("\n") + + mockedExtractTextFromFile.mockImplementation(() => Promise.resolve(numberedContent)) + + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + // Execute with large file size but low token count + const result = await executeReadFileTool( + {}, + { + maxReadFileLine: -1, + totalLines: 15000, + tokenCount: 30000, // Below threshold + fileSize: 200 * 1024, // 200KB - above threshold + }, + ) + + // Verify safeguard was NOT applied + expect(mockedTiktoken).toHaveBeenCalled() + const mockedReadLinesWithCharLimit = vi.mocked(readLinesWithCharLimit) + expect(mockedReadLinesWithCharLimit).not.toHaveBeenCalled() + expect(mockedExtractTextFromFile).toHaveBeenCalled() + + // Verify no safeguard notice + expect(result).not.toContain("preserve context space") + expect(result).toContain(``) + }) + + it("should not apply safeguard for small files", async () => { + // Setup - small file + const fileContent = Array(999).fill("This is a line of text").join("\n") + const numberedContent = fileContent + .split("\n") + .map((line, i) => `${i + 1} | ${line}`) + .join("\n") + + mockedExtractTextFromFile.mockImplementation(() => Promise.resolve(numberedContent)) + + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + // Execute with small file size + const result = await executeReadFileTool( + {}, + { + maxReadFileLine: -1, + totalLines: 999, + tokenCount: 100000, // Even with high token count + fileSize: 50 * 1024, // 50KB - below threshold + }, + ) + + // Verify tiktoken was NOT called (optimization) + expect(mockedTiktoken).not.toHaveBeenCalled() + const mockedReadLinesWithCharLimit = vi.mocked(readLinesWithCharLimit) + expect(mockedReadLinesWithCharLimit).not.toHaveBeenCalled() + expect(mockedExtractTextFromFile).toHaveBeenCalled() + + // Verify no safeguard notice + expect(result).not.toContain("preserve context space") + expect(result).toContain(``) + }) + + it("should apply safeguard for very large files even if token counting fails", async () => { + // Setup - very large file and token counting fails + const partialContent = Array(2000).fill("This is a line of text").join("\n") + + mockedExtractTextFromFile.mockResolvedValue("Large content") + + // Mock readLinesWithCharLimit + const mockedReadLinesWithCharLimit = vi.mocked(readLinesWithCharLimit) + mockedReadLinesWithCharLimit.mockResolvedValue({ + content: partialContent, + linesRead: 2000, + charactersRead: partialContent.length, + wasTruncated: true, + }) + + // Setup addLineNumbers mock for partial content + addLineNumbersMock.mockImplementation((text: string) => { + const lines = text.split("\n") + return lines.map((line: string, i: number) => `${i + 1} | ${line}`).join("\n") + }) + + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + // Set up the provider state + mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 }) + mockedCountFileLines.mockResolvedValue(6000) + mockedStat.mockResolvedValue({ size: 2 * 1024 * 1024 } as any) // 2MB file + + // IMPORTANT: Set up tiktoken to reject AFTER other mocks are set + mockedTiktoken.mockRejectedValue(new Error("Token counting failed")) + + const argsContent = `${testFilePath}` + + const toolUse: ReadFileToolUse = { + type: "tool_use", + name: "read_file", + params: { args: argsContent }, + partial: false, + } + + await readFileTool( + mockCline, + toolUse, + mockCline.ask, + vi.fn(), + (result: ToolResponse) => { + toolResult = result + }, + (_: ToolParamName, content?: string) => content ?? "", + ) + + // Verify safeguard was applied despite token counting failure + expect(mockedTiktoken).toHaveBeenCalled() + expect(mockedReadLinesWithCharLimit).toHaveBeenCalled() + + // Verify the result contains the safeguard notice + expect(toolResult).toContain("readFile.safeguardNotice") + expect(toolResult).toContain(``) + }) + + it("should not apply safeguard when maxReadFileLine is not -1", async () => { + // Setup + const fileContent = Array(20000).fill("This is a line of text").join("\n") + mockedExtractTextFromFile.mockResolvedValue(fileContent) + + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + // Execute with maxReadFileLine = 500 (not -1) + const result = await executeReadFileTool( + {}, + { + maxReadFileLine: 500, + totalLines: 20000, + tokenCount: 100000, + fileSize: 2 * 1024 * 1024, // 2MB + }, + ) + + // Verify tiktoken was NOT called + expect(mockedTiktoken).not.toHaveBeenCalled() + + // The normal maxReadFileLine logic should apply (using readLines, not readLinesWithCharLimit) + expect(mockedReadLines).toHaveBeenCalled() + const mockedReadLinesWithCharLimit = vi.mocked(readLinesWithCharLimit) + expect(mockedReadLinesWithCharLimit).not.toHaveBeenCalled() + }) + + it("should handle line ranges correctly with safeguard", async () => { + // When line ranges are specified, safeguard should not apply + const rangeContent = "Line 100\nLine 101\nLine 102" + mockedReadLines.mockResolvedValue(rangeContent) + + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + const argsContent = `${testFilePath}100-102` + + const toolUse: ReadFileToolUse = { + type: "tool_use", + name: "read_file", + params: { args: argsContent }, + partial: false, + } + + mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 }) + mockedCountFileLines.mockResolvedValue(10000) + mockedStat.mockResolvedValue({ size: 10 * 1024 * 1024 } as any) // 10MB file + + await readFileTool( + mockCline, + toolUse, + mockCline.ask, + vi.fn(), + (result: ToolResponse) => { + toolResult = result + }, + (_: ToolParamName, content?: string) => content ?? "", + ) + + // Verify tiktoken was NOT called for range reads + expect(mockedTiktoken).not.toHaveBeenCalled() + expect(toolResult).toContain(``) + expect(toolResult).not.toContain("preserve context space") + }) + }) + + describe("safeguard thresholds", () => { + it("should use correct thresholds for file size and token count", async () => { + // Mock the api.getModel() to return a model with context window + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + + // Test boundary conditions + + // Just below size threshold - no token check + await executeReadFileTool({}, { fileSize: 100 * 1024 - 1, maxReadFileLine: -1 }) // Just under 100KB + expect(mockedTiktoken).not.toHaveBeenCalled() + + // Just above size threshold - token check performed + vi.clearAllMocks() + // Re-mock the api.getModel() after clearAllMocks + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + mockedExtractTextFromFile.mockResolvedValue("content") + await executeReadFileTool({}, { fileSize: 100 * 1024 + 1, maxReadFileLine: -1, tokenCount: 40000 }) // Just over 100KB + expect(mockedTiktoken).toHaveBeenCalled() + + // Token count just below threshold - no safeguard + expect(toolResult).not.toContain("preserve context space") + + // Token count just above threshold - safeguard applied + vi.clearAllMocks() + // Re-mock the api.getModel() after clearAllMocks + mockCline.api = { + getModel: vi.fn().mockReturnValue({ + info: { + contextWindow: 100000, + }, + }), + } + mockedExtractTextFromFile.mockResolvedValue("content") + const mockedReadLinesWithCharLimit = vi.mocked(readLinesWithCharLimit) + mockedReadLinesWithCharLimit.mockResolvedValue({ + content: "partial content", + linesRead: 2000, + charactersRead: 50000, + wasTruncated: true, + }) + await executeReadFileTool({}, { fileSize: 100 * 1024 + 1, maxReadFileLine: -1, tokenCount: 50001 }) + expect(mockedReadLinesWithCharLimit).toHaveBeenCalled() + expect(toolResult).toContain("readFile.safeguardNotice") + }) + }) +}) diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 6de8dd5642..dc7328c24f 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -1,5 +1,6 @@ import path from "path" import { isBinaryFile } from "isbinaryfile" +import { stat } from "fs/promises" import { Task } from "../task/Task" import { ClineSayTool } from "../../shared/ExtensionMessage" @@ -14,6 +15,16 @@ import { readLines } from "../../integrations/misc/read-lines" import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../integrations/misc/extract-text" import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter" import { parseXml } from "../../utils/xml" +import { tiktoken } from "../../utils/tiktoken" +import { + LARGE_FILE_SIZE_THRESHOLD, + VERY_LARGE_FILE_SIZE, + FALLBACK_MAX_LINES, + CONTEXT_WINDOW_PERCENTAGE, + MAX_CHAR_LIMIT, + CHARS_PER_TOKEN_RATIO, +} from "@roo-code/types" +import { readLinesWithCharLimit } from "../../integrations/misc/read-lines-char-limit" export function getReadFileToolDescription(blockName: string, blockParams: any): string { // Handle both single path and multiple files via args @@ -516,13 +527,86 @@ export async function readFileTool( continue } - // Handle normal file read - const content = await extractTextFromFile(fullPath) - const lineRangeAttr = ` lines="1-${totalLines}"` + // Handle normal file read with safeguard for large files + // Get the actual context window size from the model + const contextWindow = cline.api.getModel().info.contextWindow || 100000 // Default to 100k if not available + const MAX_TOKEN_THRESHOLD = Math.floor(contextWindow * CONTEXT_WINDOW_PERCENTAGE) + const MAX_CHAR_THRESHOLD = MAX_TOKEN_THRESHOLD * CHARS_PER_TOKEN_RATIO + + // Check if we should apply the safeguard + let shouldApplySafeguard = false + let safeguardNotice = "" + let fullContent: string | null = null + let actualLinesRead = totalLines + + if (maxReadFileLine === -1) { + // Get file size + const fileStats = await stat(fullPath) + const fileSizeKB = Math.round(fileStats.size / 1024) + + if (fileStats.size > LARGE_FILE_SIZE_THRESHOLD) { + // File is large enough to warrant token count check + try { + // Read the full content once + fullContent = await extractTextFromFile(fullPath) + const tokenCount = await tiktoken([{ type: "text", text: fullContent }]) + + if (tokenCount > MAX_TOKEN_THRESHOLD) { + shouldApplySafeguard = true + // Clear fullContent to avoid using it when we need partial content + fullContent = null + } + // If tokenCount <= MAX_TOKEN_THRESHOLD, we keep fullContent to reuse it + } catch (error) { + // If token counting fails, apply safeguard based on file size alone + console.warn(`Failed to count tokens for large file ${relPath}:`, error) + if (fileStats.size > VERY_LARGE_FILE_SIZE) { + // For very large files (>1MB), apply safeguard anyway + shouldApplySafeguard = true + } + } + } + } + + let content: string + let lineRangeAttr: string + + if (shouldApplySafeguard) { + // Read partial file with character-based safeguard + // Use the smaller of MAX_CHAR_LIMIT or the calculated character threshold + const charLimit = Math.min(MAX_CHAR_LIMIT, MAX_CHAR_THRESHOLD) + const result = await readLinesWithCharLimit(fullPath, charLimit) + + content = addLineNumbers(result.content, 1) + actualLinesRead = result.linesRead + lineRangeAttr = ` lines="1-${actualLinesRead}"` + + const fileStats = await stat(fullPath) + const fileSizeKB = Math.round(fileStats.size / 1024) + + if (result.wasTruncated) { + safeguardNotice = `${t("tools:readFile.safeguardNotice", { + fileSizeKB, + actualLinesRead, + charactersRead: result.charactersRead.toLocaleString(), + })}\n` + } + } else { + // Read full file - reuse fullContent if we already have it + if (fullContent !== null) { + content = fullContent + } else { + content = await extractTextFromFile(fullPath) + } + lineRangeAttr = ` lines="1-${totalLines}"` + } + let xmlInfo = totalLines > 0 ? `\n${content}\n` : `` if (totalLines === 0) { xmlInfo += `File is empty\n` + } else if (safeguardNotice) { + xmlInfo += safeguardNotice } // Track file read diff --git a/src/i18n/locales/ca/tools.json b/src/i18n/locales/ca/tools.json index 5b3a228bde..2916c75502 100644 --- a/src/i18n/locales/ca/tools.json +++ b/src/i18n/locales/ca/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (línies {{start}}-{{end}})", "definitionsOnly": " (només definicions)", - "maxLines": " (màxim {{max}} línies)" + "maxLines": " (màxim {{max}} línies)", + "safeguardNotice": "Aquest fitxer té {{fileSizeKB}}KB i consumiria una part significativa de la finestra de context. Mostrant només les primeres {{actualLinesRead}} línies completes ({{charactersRead}} caràcters) per preservar l'espai de context. Utilitza line_range si necessites llegir seccions específiques." }, "toolRepetitionLimitReached": "Roo sembla estar atrapat en un bucle, intentant la mateixa acció ({{toolName}}) repetidament. Això podria indicar un problema amb la seva estratègia actual. Considera reformular la tasca, proporcionar instruccions més específiques o guiar-lo cap a un enfocament diferent.", "codebaseSearch": { diff --git a/src/i18n/locales/de/tools.json b/src/i18n/locales/de/tools.json index eb1afbc082..eebc218397 100644 --- a/src/i18n/locales/de/tools.json +++ b/src/i18n/locales/de/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (Zeilen {{start}}-{{end}})", "definitionsOnly": " (nur Definitionen)", - "maxLines": " (maximal {{max}} Zeilen)" + "maxLines": " (maximal {{max}} Zeilen)", + "safeguardNotice": "Diese Datei ist {{fileSizeKB}}KB groß und würde einen erheblichen Teil des Kontextfensters verbrauchen. Es werden nur die ersten {{actualLinesRead}} vollständigen Zeilen ({{charactersRead}} Zeichen) angezeigt, um Kontextplatz zu sparen. Verwenden Sie line_range, wenn Sie bestimmte Abschnitte lesen müssen." }, "toolRepetitionLimitReached": "Roo scheint in einer Schleife festzustecken und versucht wiederholt dieselbe Aktion ({{toolName}}). Dies könnte auf ein Problem mit der aktuellen Strategie hindeuten. Überlege dir, die Aufgabe umzuformulieren, genauere Anweisungen zu geben oder Roo zu einem anderen Ansatz zu führen.", "codebaseSearch": { diff --git a/src/i18n/locales/en/tools.json b/src/i18n/locales/en/tools.json index 0265a84398..e6a9940794 100644 --- a/src/i18n/locales/en/tools.json +++ b/src/i18n/locales/en/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (lines {{start}}-{{end}})", "definitionsOnly": " (definitions only)", - "maxLines": " (max {{max}} lines)" + "maxLines": " (max {{max}} lines)", + "safeguardNotice": "This file is {{fileSizeKB}}KB and would consume a significant portion of the context window. Showing only the first {{actualLinesRead}} complete lines ({{charactersRead}} characters) to preserve context space. Use line_range if you need to read specific sections." }, "toolRepetitionLimitReached": "Roo appears to be stuck in a loop, attempting the same action ({{toolName}}) repeatedly. This might indicate a problem with its current strategy. Consider rephrasing the task, providing more specific instructions, or guiding it towards a different approach.", "codebaseSearch": { diff --git a/src/i18n/locales/es/tools.json b/src/i18n/locales/es/tools.json index 303f5365ed..a01df99e30 100644 --- a/src/i18n/locales/es/tools.json +++ b/src/i18n/locales/es/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (líneas {{start}}-{{end}})", "definitionsOnly": " (solo definiciones)", - "maxLines": " (máximo {{max}} líneas)" + "maxLines": " (máximo {{max}} líneas)", + "safeguardNotice": "Este archivo tiene {{fileSizeKB}}KB y consumiría una parte significativa de la ventana de contexto. Mostrando solo las primeras {{actualLinesRead}} líneas completas ({{charactersRead}} caracteres) para preservar el espacio de contexto. Usa line_range si necesitas leer secciones específicas." }, "toolRepetitionLimitReached": "Roo parece estar atrapado en un bucle, intentando la misma acción ({{toolName}}) repetidamente. Esto podría indicar un problema con su estrategia actual. Considera reformular la tarea, proporcionar instrucciones más específicas o guiarlo hacia un enfoque diferente.", "codebaseSearch": { diff --git a/src/i18n/locales/fr/tools.json b/src/i18n/locales/fr/tools.json index a6c71aca33..113a7cdb27 100644 --- a/src/i18n/locales/fr/tools.json +++ b/src/i18n/locales/fr/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (lignes {{start}}-{{end}})", "definitionsOnly": " (définitions uniquement)", - "maxLines": " (max {{max}} lignes)" + "maxLines": " (max {{max}} lignes)", + "safeguardNotice": "Ce fichier fait {{fileSizeKB}}KB et consommerait une partie importante de la fenêtre de contexte. Affichage uniquement des {{actualLinesRead}} premières lignes complètes ({{charactersRead}} caractères) pour préserver l'espace de contexte. Utilisez line_range si vous devez lire des sections spécifiques." }, "toolRepetitionLimitReached": "Roo semble être bloqué dans une boucle, tentant la même action ({{toolName}}) de façon répétée. Cela pourrait indiquer un problème avec sa stratégie actuelle. Envisage de reformuler la tâche, de fournir des instructions plus spécifiques ou de le guider vers une approche différente.", "codebaseSearch": { diff --git a/src/i18n/locales/hi/tools.json b/src/i18n/locales/hi/tools.json index 0cb4aeb14e..f5260b6f54 100644 --- a/src/i18n/locales/hi/tools.json +++ b/src/i18n/locales/hi/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (पंक्तियाँ {{start}}-{{end}})", "definitionsOnly": " (केवल परिभाषाएँ)", - "maxLines": " (अधिकतम {{max}} पंक्तियाँ)" + "maxLines": " (अधिकतम {{max}} पंक्तियाँ)", + "safeguardNotice": "यह फ़ाइल {{fileSizeKB}}KB की है और संदर्भ विंडो का एक महत्वपूर्ण हिस्सा उपभोग करेगी। संदर्भ स्थान को संरक्षित करने के लिए केवल पहली {{actualLinesRead}} पूर्ण पंक्तियाँ ({{charactersRead}} वर्ण) दिखाई जा रही हैं। यदि आपको विशिष्ट अनुभाग पढ़ने की आवश्यकता है तो line_range का उपयोग करें।" }, "toolRepetitionLimitReached": "Roo एक लूप में फंसा हुआ लगता है, बार-बार एक ही क्रिया ({{toolName}}) को दोहरा रहा है। यह उसकी वर्तमान रणनीति में किसी समस्या का संकेत हो सकता है। कार्य को पुनः परिभाषित करने, अधिक विशिष्ट निर्देश देने, या उसे एक अलग दृष्टिकोण की ओर मार्गदर्शित करने पर विचार करें।", "codebaseSearch": { diff --git a/src/i18n/locales/id/tools.json b/src/i18n/locales/id/tools.json index 2e3c4f0c22..3941d295c8 100644 --- a/src/i18n/locales/id/tools.json +++ b/src/i18n/locales/id/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (baris {{start}}-{{end}})", "definitionsOnly": " (hanya definisi)", - "maxLines": " (maks {{max}} baris)" + "maxLines": " (maks {{max}} baris)", + "safeguardNotice": "File ini berukuran {{fileSizeKB}}KB dan akan menggunakan sebagian besar jendela konteks. Hanya menampilkan {{actualLinesRead}} baris lengkap pertama ({{charactersRead}} karakter) untuk menjaga ruang konteks. Gunakan line_range jika Anda perlu membaca bagian tertentu." }, "toolRepetitionLimitReached": "Roo tampaknya terjebak dalam loop, mencoba aksi yang sama ({{toolName}}) berulang kali. Ini mungkin menunjukkan masalah dengan strategi saat ini. Pertimbangkan untuk mengubah frasa tugas, memberikan instruksi yang lebih spesifik, atau mengarahkannya ke pendekatan yang berbeda.", "codebaseSearch": { diff --git a/src/i18n/locales/it/tools.json b/src/i18n/locales/it/tools.json index ffae474f1d..f0cbfc5571 100644 --- a/src/i18n/locales/it/tools.json +++ b/src/i18n/locales/it/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (righe {{start}}-{{end}})", "definitionsOnly": " (solo definizioni)", - "maxLines": " (max {{max}} righe)" + "maxLines": " (max {{max}} righe)", + "safeguardNotice": "Questo file è di {{fileSizeKB}}KB e consumerebbe una parte significativa della finestra di contesto. Vengono mostrate solo le prime {{actualLinesRead}} righe complete ({{charactersRead}} caratteri) per preservare lo spazio di contesto. Usa line_range se devi leggere sezioni specifiche." }, "toolRepetitionLimitReached": "Roo sembra essere bloccato in un ciclo, tentando ripetutamente la stessa azione ({{toolName}}). Questo potrebbe indicare un problema con la sua strategia attuale. Considera di riformulare l'attività, fornire istruzioni più specifiche o guidarlo verso un approccio diverso.", "codebaseSearch": { diff --git a/src/i18n/locales/ja/tools.json b/src/i18n/locales/ja/tools.json index 04a5fcc085..54c38eb514 100644 --- a/src/i18n/locales/ja/tools.json +++ b/src/i18n/locales/ja/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " ({{start}}-{{end}}行目)", "definitionsOnly": " (定義のみ)", - "maxLines": " (最大{{max}}行)" + "maxLines": " (最大{{max}}行)", + "safeguardNotice": "このファイルは{{fileSizeKB}}KBで、コンテキストウィンドウの大部分を消費します。コンテキストスペースを保持するため、最初の{{actualLinesRead}}行({{charactersRead}}文字)のみを表示しています。特定のセクションを読む必要がある場合は、line_rangeを使用してください。" }, "toolRepetitionLimitReached": "Rooが同じ操作({{toolName}})を繰り返し試みるループに陥っているようです。これは現在の方法に問題がある可能性を示しています。タスクの言い換え、より具体的な指示の提供、または別のアプローチへの誘導を検討してください。", "codebaseSearch": { diff --git a/src/i18n/locales/ko/tools.json b/src/i18n/locales/ko/tools.json index e43a541794..941bf0bdd8 100644 --- a/src/i18n/locales/ko/tools.json +++ b/src/i18n/locales/ko/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " ({{start}}-{{end}}행)", "definitionsOnly": " (정의만)", - "maxLines": " (최대 {{max}}행)" + "maxLines": " (최대 {{max}}행)", + "safeguardNotice": "이 파일은 {{fileSizeKB}}KB로 컨텍스트 창의 상당 부분을 차지합니다. 컨텍스트 공간을 보존하기 위해 처음 {{actualLinesRead}}개의 완전한 줄({{charactersRead}}자)만 표시합니다. 특정 섹션을 읽어야 하는 경우 line_range를 사용하세요." }, "toolRepetitionLimitReached": "Roo가 같은 동작({{toolName}})을 반복적으로 시도하면서 루프에 갇힌 것 같습니다. 이는 현재 전략에 문제가 있을 수 있음을 나타냅니다. 작업을 다시 표현하거나, 더 구체적인 지침을 제공하거나, 다른 접근 방식으로 안내해 보세요.", "codebaseSearch": { diff --git a/src/i18n/locales/nl/tools.json b/src/i18n/locales/nl/tools.json index 56a8cdbc46..f300f8ba1b 100644 --- a/src/i18n/locales/nl/tools.json +++ b/src/i18n/locales/nl/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (regels {{start}}-{{end}})", "definitionsOnly": " (alleen definities)", - "maxLines": " (max {{max}} regels)" + "maxLines": " (max {{max}} regels)", + "safeguardNotice": "Dit bestand is {{fileSizeKB}}KB en zou een aanzienlijk deel van het contextvenster gebruiken. Er worden alleen de eerste {{actualLinesRead}} volledige regels ({{charactersRead}} tekens) weergegeven om contextruimte te behouden. Gebruik line_range als u specifieke secties moet lezen." }, "toolRepetitionLimitReached": "Roo lijkt vast te zitten in een lus, waarbij hij herhaaldelijk dezelfde actie ({{toolName}}) probeert. Dit kan duiden op een probleem met de huidige strategie. Overweeg de taak te herformuleren, specifiekere instructies te geven of Roo naar een andere aanpak te leiden.", "codebaseSearch": { diff --git a/src/i18n/locales/pl/tools.json b/src/i18n/locales/pl/tools.json index 62568826aa..57eaea3d2e 100644 --- a/src/i18n/locales/pl/tools.json +++ b/src/i18n/locales/pl/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (linie {{start}}-{{end}})", "definitionsOnly": " (tylko definicje)", - "maxLines": " (maks. {{max}} linii)" + "maxLines": " (maks. {{max}} linii)", + "safeguardNotice": "Ten plik ma {{fileSizeKB}}KB i zużyłby znaczną część okna kontekstu. Wyświetlane jest tylko pierwsze {{actualLinesRead}} pełnych linii ({{charactersRead}} znaków), aby zachować przestrzeń kontekstu. Użyj line_range, jeśli musisz przeczytać określone sekcje." }, "toolRepetitionLimitReached": "Wygląda na to, że Roo utknął w pętli, wielokrotnie próbując wykonać tę samą akcję ({{toolName}}). Może to wskazywać na problem z jego obecną strategią. Rozważ przeformułowanie zadania, podanie bardziej szczegółowych instrukcji lub nakierowanie go na inne podejście.", "codebaseSearch": { diff --git a/src/i18n/locales/pt-BR/tools.json b/src/i18n/locales/pt-BR/tools.json index f74e0f8196..aafbe71b41 100644 --- a/src/i18n/locales/pt-BR/tools.json +++ b/src/i18n/locales/pt-BR/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (linhas {{start}}-{{end}})", "definitionsOnly": " (apenas definições)", - "maxLines": " (máx. {{max}} linhas)" + "maxLines": " (máx. {{max}} linhas)", + "safeguardNotice": "Este arquivo tem {{fileSizeKB}}KB e consumiria uma parte significativa da janela de contexto. Mostrando apenas as primeiras {{actualLinesRead}} linhas completas ({{charactersRead}} caracteres) para preservar o espaço de contexto. Use line_range se precisar ler seções específicas." }, "toolRepetitionLimitReached": "Roo parece estar preso em um loop, tentando a mesma ação ({{toolName}}) repetidamente. Isso pode indicar um problema com sua estratégia atual. Considere reformular a tarefa, fornecer instruções mais específicas ou guiá-lo para uma abordagem diferente.", "codebaseSearch": { diff --git a/src/i18n/locales/ru/tools.json b/src/i18n/locales/ru/tools.json index 1e59d10499..e6a61f689a 100644 --- a/src/i18n/locales/ru/tools.json +++ b/src/i18n/locales/ru/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (строки {{start}}-{{end}})", "definitionsOnly": " (только определения)", - "maxLines": " (макс. {{max}} строк)" + "maxLines": " (макс. {{max}} строк)", + "safeguardNotice": "Этот файл размером {{fileSizeKB}}КБ займет значительную часть контекстного окна. Показаны только первые {{actualLinesRead}} полных строк ({{charactersRead}} символов) для сохранения контекстного пространства. Используйте line_range, если нужно прочитать определенные разделы." }, "toolRepetitionLimitReached": "Похоже, что Roo застрял в цикле, многократно пытаясь выполнить одно и то же действие ({{toolName}}). Это может указывать на проблему с его текущей стратегией. Попробуйте переформулировать задачу, предоставить более конкретные инструкции или направить его к другому подходу.", "codebaseSearch": { diff --git a/src/i18n/locales/tr/tools.json b/src/i18n/locales/tr/tools.json index e4c73cdc4b..d133826c56 100644 --- a/src/i18n/locales/tr/tools.json +++ b/src/i18n/locales/tr/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (satır {{start}}-{{end}})", "definitionsOnly": " (sadece tanımlar)", - "maxLines": " (maks. {{max}} satır)" + "maxLines": " (maks. {{max}} satır)", + "safeguardNotice": "Bu dosya {{fileSizeKB}}KB boyutunda ve bağlam penceresinin önemli bir kısmını tüketecektir. Bağlam alanını korumak için yalnızca ilk {{actualLinesRead}} tam satır ({{charactersRead}} karakter) gösteriliyor. Belirli bölümleri okumanız gerekiyorsa line_range kullanın." }, "toolRepetitionLimitReached": "Roo bir döngüye takılmış gibi görünüyor, aynı eylemi ({{toolName}}) tekrar tekrar deniyor. Bu, mevcut stratejisinde bir sorun olduğunu gösterebilir. Görevi yeniden ifade etmeyi, daha spesifik talimatlar vermeyi veya onu farklı bir yaklaşıma yönlendirmeyi düşünün.", "codebaseSearch": { diff --git a/src/i18n/locales/vi/tools.json b/src/i18n/locales/vi/tools.json index 9811ee12c9..00f67fb19a 100644 --- a/src/i18n/locales/vi/tools.json +++ b/src/i18n/locales/vi/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (dòng {{start}}-{{end}})", "definitionsOnly": " (chỉ định nghĩa)", - "maxLines": " (tối đa {{max}} dòng)" + "maxLines": " (tối đa {{max}} dòng)", + "safeguardNotice": "Tệp này có kích thước {{fileSizeKB}}KB và sẽ chiếm một phần đáng kể của cửa sổ ngữ cảnh. Chỉ hiển thị {{actualLinesRead}} dòng đầu tiên hoàn chỉnh ({{charactersRead}} ký tự) để bảo toàn không gian ngữ cảnh. Sử dụng line_range nếu bạn cần đọc các phần cụ thể." }, "toolRepetitionLimitReached": "Roo dường như đang bị mắc kẹt trong một vòng lặp, liên tục cố gắng thực hiện cùng một hành động ({{toolName}}). Điều này có thể cho thấy vấn đề với chiến lược hiện tại. Hãy cân nhắc việc diễn đạt lại nhiệm vụ, cung cấp hướng dẫn cụ thể hơn, hoặc hướng Roo theo một cách tiếp cận khác.", "codebaseSearch": { diff --git a/src/i18n/locales/zh-CN/tools.json b/src/i18n/locales/zh-CN/tools.json index 13641b8d43..55bb52f36e 100644 --- a/src/i18n/locales/zh-CN/tools.json +++ b/src/i18n/locales/zh-CN/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (第 {{start}}-{{end}} 行)", "definitionsOnly": " (仅定义)", - "maxLines": " (最多 {{max}} 行)" + "maxLines": " (最多 {{max}} 行)", + "safeguardNotice": "此文件大小为 {{fileSizeKB}}KB,会占用上下文窗口的大部分空间。为了保留上下文空间,仅显示前 {{actualLinesRead}} 行完整内容({{charactersRead}} 个字符)。如需读取特定部分,请使用 line_range。" }, "toolRepetitionLimitReached": "Roo 似乎陷入循环,反复尝试同一操作 ({{toolName}})。这可能表明当前策略存在问题。请考虑重新描述任务、提供更具体的指示或引导其尝试不同的方法。", "codebaseSearch": { diff --git a/src/i18n/locales/zh-TW/tools.json b/src/i18n/locales/zh-TW/tools.json index a726e3c919..43b079d32b 100644 --- a/src/i18n/locales/zh-TW/tools.json +++ b/src/i18n/locales/zh-TW/tools.json @@ -2,7 +2,8 @@ "readFile": { "linesRange": " (第 {{start}}-{{end}} 行)", "definitionsOnly": " (僅定義)", - "maxLines": " (最多 {{max}} 行)" + "maxLines": " (最多 {{max}} 行)", + "safeguardNotice": "此檔案大小為 {{fileSizeKB}}KB,會佔用上下文視窗的大部分空間。為了保留上下文空間,僅顯示前 {{actualLinesRead}} 行完整內容({{charactersRead}} 個字元)。如需讀取特定部分,請使用 line_range。" }, "toolRepetitionLimitReached": "Roo 似乎陷入循環,反覆嘗試同一操作 ({{toolName}})。這可能表明目前策略存在問題。請考慮重新描述工作、提供更具體的指示或引導其嘗試不同的方法。", "codebaseSearch": { diff --git a/src/integrations/misc/__tests__/read-lines-char-limit.spec.ts b/src/integrations/misc/__tests__/read-lines-char-limit.spec.ts new file mode 100644 index 0000000000..a983bad705 --- /dev/null +++ b/src/integrations/misc/__tests__/read-lines-char-limit.spec.ts @@ -0,0 +1,224 @@ +import { promises as fs } from "fs" +import path from "path" +import { readLinesWithCharLimit } from "../read-lines-char-limit" + +describe("readLinesWithCharLimit", () => { + const testDir = path.join(__dirname, "test-files") + const testFile = path.join(testDir, "char-limit-test.txt") + const longLineFile = path.join(testDir, "long-lines.txt") + const mixedFile = path.join(testDir, "mixed-content.txt") + + beforeAll(async () => { + // Create test directory + await fs.mkdir(testDir, { recursive: true }) + + // Create test file with predictable content + // Each line is "Line X" (6 chars) + newline (1 char) = 7 chars per line + const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`).join("\n") + await fs.writeFile(testFile, lines) + + // Create file with very long lines + const longLine = "A".repeat(1000) // 1000 chars + const longLines = Array.from({ length: 5 }, () => longLine).join("\n") + await fs.writeFile(longLineFile, longLines) + + // Create file with mixed line lengths + const mixedContent = [ + "Short", // 5 chars + "Medium length line", // 18 chars + "A".repeat(100), // 100 chars + "Another short", // 13 chars + "B".repeat(200), // 200 chars + ].join("\n") + await fs.writeFile(mixedFile, mixedContent) + }) + + afterAll(async () => { + // Clean up test files + await fs.rm(testDir, { recursive: true, force: true }) + }) + + describe("basic functionality", () => { + it("should read complete file when char limit is not exceeded", async () => { + const result = await readLinesWithCharLimit(testFile, 1000) + + expect(result.wasTruncated).toBe(false) + expect(result.linesRead).toBe(20) + // Lines 1-9: "Line X\n" (7 chars each) = 9 * 7 = 63 + // Lines 10-19: "Line XX\n" (8 chars each) = 10 * 8 = 80 + // Line 20: "Line 20" (7 chars, no newline) + // Total: 63 + 80 + 7 = 150 + expect(result.charactersRead).toBe(150) + expect(result.content).toContain("Line 1") + expect(result.content).toContain("Line 20") + }) + + it("should truncate at line boundary when char limit is exceeded", async () => { + // Set limit to 50 chars, which should include ~7 complete lines + const result = await readLinesWithCharLimit(testFile, 50) + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(7) // 7 * 7 = 49 chars + expect(result.charactersRead).toBe(49) + expect(result.content).toContain("Line 1") + expect(result.content).toContain("Line 7") + expect(result.content).not.toContain("Line 8") + }) + + it("should handle startLine parameter correctly", async () => { + // Start from line 5 (0-based index 4) + const result = await readLinesWithCharLimit(testFile, 50, 4) + + expect(result.wasTruncated).toBe(true) + // Lines 5-9: "Line X\n" (7 chars each) = 5 * 7 = 35 + // Line 10: "Line 10\n" (8 chars) = 8 + // Total so far: 43 chars, can fit one more line + // Line 11: "Line 11\n" (8 chars) would make 51, exceeds limit + // So we get lines 5-10 = 6 lines + expect(result.linesRead).toBe(6) + expect(result.content).toContain("Line 5") + expect(result.content).toContain("Line 10") + expect(result.content).not.toContain("Line 4") + expect(result.content).not.toContain("Line 11") + }) + }) + + describe("edge cases", () => { + it("should handle empty files", async () => { + const emptyFile = path.join(testDir, "empty.txt") + await fs.writeFile(emptyFile, "") + + const result = await readLinesWithCharLimit(emptyFile, 100) + + expect(result.wasTruncated).toBe(false) + expect(result.linesRead).toBe(0) + expect(result.charactersRead).toBe(0) + expect(result.content).toBe("") + }) + + it("should handle single character limit", async () => { + const result = await readLinesWithCharLimit(testFile, 1) + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(0) // Can't fit even one line + expect(result.charactersRead).toBe(0) + expect(result.content).toBe("") + }) + + it("should handle file with no newline at end", async () => { + const noNewlineFile = path.join(testDir, "no-newline.txt") + await fs.writeFile(noNewlineFile, "Line without newline") + + const result = await readLinesWithCharLimit(noNewlineFile, 100) + + expect(result.wasTruncated).toBe(false) + expect(result.linesRead).toBe(1) + expect(result.charactersRead).toBe(20) + expect(result.content).toBe("Line without newline") + }) + + it("should reject negative maxChars", async () => { + await expect(readLinesWithCharLimit(testFile, -1)).rejects.toThrow("maxChars must be positive") + }) + + it("should reject negative startLine", async () => { + await expect(readLinesWithCharLimit(testFile, 100, -1)).rejects.toThrow("startLine must be non-negative") + }) + }) + + describe("long lines handling", () => { + it("should not include partial lines when they exceed char limit", async () => { + // Each line is 1001 chars (1000 'A's + newline) + // With 1500 char limit, should only include 1 complete line + const result = await readLinesWithCharLimit(longLineFile, 1500) + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(1) + expect(result.charactersRead).toBe(1001) + expect(result.content).toMatch(/^A{1000}\n$/) + }) + + it("should handle case where first line exceeds limit", async () => { + // Limit is less than first line length + const result = await readLinesWithCharLimit(longLineFile, 500) + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(0) + expect(result.charactersRead).toBe(0) + expect(result.content).toBe("") + }) + }) + + describe("mixed content handling", () => { + it("should correctly count characters with mixed line lengths", async () => { + // First 3 lines: "Short\n" (6) + "Medium length line\n" (19) + 100 A's + \n (101) = 126 chars + const result = await readLinesWithCharLimit(mixedFile, 130) + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(3) + expect(result.charactersRead).toBe(126) + expect(result.content).toContain("Short") + expect(result.content).toContain("Medium length line") + expect(result.content).toContain("A".repeat(100)) + expect(result.content).not.toContain("Another short") + }) + + it("should handle exact character boundary", async () => { + // Exactly enough for first two lines + const result = await readLinesWithCharLimit(mixedFile, 25) + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(2) + expect(result.charactersRead).toBe(25) + expect(result.content).toBe("Short\nMedium length line\n") + }) + }) + + describe("unicode handling", () => { + it("should handle unicode characters correctly", async () => { + const unicodeFile = path.join(testDir, "unicode.txt") + const unicodeContent = [ + "Hello 👋", // 8 chars (emoji counts as 2) + "世界", // 2 chars + "🌍🌎🌏", // 6 chars (3 emojis) + ].join("\n") + await fs.writeFile(unicodeFile, unicodeContent) + + const result = await readLinesWithCharLimit(unicodeFile, 20) + + expect(result.wasTruncated).toBe(false) + expect(result.linesRead).toBe(3) + // Note: character count is based on JavaScript string length + expect(result.content).toContain("Hello 👋") + expect(result.content).toContain("世界") + expect(result.content).toContain("🌍🌎🌏") + }) + }) + + describe("performance considerations", () => { + it("should handle large files efficiently", async () => { + const largeFile = path.join(testDir, "large.txt") + // Create a 10MB file + const chunk = "A".repeat(1000) + "\n" + const chunks = Array(10000).fill(chunk).join("") + await fs.writeFile(largeFile, chunks) + + const startTime = Date.now() + const result = await readLinesWithCharLimit(largeFile, 10000) + const duration = Date.now() - startTime + + expect(result.wasTruncated).toBe(true) + expect(result.linesRead).toBe(9) // 9 complete lines + expect(result.charactersRead).toBe(9009) // 9 * 1001 + expect(duration).toBeLessThan(100) // Should complete quickly + }) + }) + + describe("file not found handling", () => { + it("should reject when file does not exist", async () => { + const nonExistentFile = path.join(testDir, "does-not-exist.txt") + + await expect(readLinesWithCharLimit(nonExistentFile, 100)).rejects.toThrow() + }) + }) +}) diff --git a/src/integrations/misc/read-lines-char-limit.ts b/src/integrations/misc/read-lines-char-limit.ts new file mode 100644 index 0000000000..850354956f --- /dev/null +++ b/src/integrations/misc/read-lines-char-limit.ts @@ -0,0 +1,117 @@ +import { createReadStream } from "fs" + +/** + * Result of reading lines with character limit + */ +export interface ReadLinesCharLimitResult { + /** The content that was read */ + content: string + /** The number of complete lines that were read */ + linesRead: number + /** Whether the file was truncated due to character limit */ + wasTruncated: boolean + /** Total number of characters read (excluding any incomplete final line) */ + charactersRead: number +} + +/** + * Reads lines from a file up to a maximum character count, ensuring we don't + * break in the middle of a line. + * + * @param filepath - Path to the file to read + * @param maxChars - Maximum number of characters to read + * @param startLine - Optional. The line number to start reading from (0-based, inclusive) + * @returns Promise resolving to the read result with content and metadata + */ +export function readLinesWithCharLimit( + filepath: string, + maxChars: number, + startLine: number = 0, +): Promise { + return new Promise((resolve, reject) => { + // Validate inputs + if (maxChars <= 0) { + return reject(new RangeError(`maxChars must be positive, got ${maxChars}`)) + } + if (startLine < 0) { + return reject(new RangeError(`startLine must be non-negative, got ${startLine}`)) + } + + const input = createReadStream(filepath, { encoding: "utf8" }) + let buffer = "" + let currentLineNumber = 0 + let result = "" + let charactersRead = 0 + let linesIncluded = 0 + let wasTruncated = false + + // Handle errors + input.on("error", reject) + + // Process data chunks + input.on("data", (chunk) => { + buffer += chunk.toString() + + let pos = 0 + let nextNewline = buffer.indexOf("\n", pos) + + // Process complete lines in the buffer + while (nextNewline !== -1) { + const lineWithNewline = buffer.substring(pos, nextNewline + 1) + + // Check if we're past the start line + if (currentLineNumber >= startLine) { + // Check if adding this line would exceed the character limit + if (charactersRead + lineWithNewline.length > maxChars) { + // We've hit the limit, stop reading + wasTruncated = true + input.destroy() + resolve({ + content: result, + linesRead: linesIncluded, + wasTruncated, + charactersRead, + }) + return + } + + // Add the line to the result + result += lineWithNewline + charactersRead += lineWithNewline.length + linesIncluded++ + } + + // Move to next line + pos = nextNewline + 1 + currentLineNumber++ + nextNewline = buffer.indexOf("\n", pos) + } + + // Keep the incomplete line in the buffer + buffer = buffer.substring(pos) + }) + + // Handle end of file + input.on("end", () => { + // Process any remaining data in buffer (last line without newline) + if (buffer.length > 0 && currentLineNumber >= startLine) { + // Check if adding this final line would exceed the limit + if (charactersRead + buffer.length <= maxChars) { + result += buffer + charactersRead += buffer.length + linesIncluded++ + } else { + // Mark as truncated if we couldn't include the last line + wasTruncated = true + } + } + + resolve({ + content: result, + linesRead: linesIncluded, + wasTruncated, + charactersRead, + }) + }) + }) +}