Skip to content

Commit 37776ba

Browse files
committed
feat: enhance extractTextFromFile to support line limit validation and documentation
1 parent 83df51a commit 37776ba

File tree

2 files changed

+365
-2
lines changed

2 files changed

+365
-2
lines changed
Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
// npx vitest core/mentions/__tests__/processUserContentMentions.spec.ts
2+
3+
import { describe, it, expect, vi, beforeEach } from "vitest"
4+
import { processUserContentMentions } from "../processUserContentMentions"
5+
import { parseMentions } from "../index"
6+
import { UrlContentFetcher } from "../../../services/browser/UrlContentFetcher"
7+
import { FileContextTracker } from "../../context-tracking/FileContextTracker"
8+
9+
// Mock the parseMentions function
10+
vi.mock("../index", () => ({
11+
parseMentions: vi.fn(),
12+
}))
13+
14+
describe("processUserContentMentions", () => {
15+
let mockUrlContentFetcher: UrlContentFetcher
16+
let mockFileContextTracker: FileContextTracker
17+
let mockRooIgnoreController: any
18+
19+
beforeEach(() => {
20+
vi.clearAllMocks()
21+
22+
mockUrlContentFetcher = {} as UrlContentFetcher
23+
mockFileContextTracker = {} as FileContextTracker
24+
mockRooIgnoreController = {}
25+
26+
// Default mock implementation
27+
vi.mocked(parseMentions).mockImplementation(async (text) => `parsed: ${text}`)
28+
})
29+
30+
describe("maxReadFileLine parameter", () => {
31+
it("should pass maxReadFileLine to parseMentions when provided", async () => {
32+
const userContent = [
33+
{
34+
type: "text" as const,
35+
text: "<task>Read file with limit</task>",
36+
},
37+
]
38+
39+
await processUserContentMentions({
40+
userContent,
41+
cwd: "/test",
42+
urlContentFetcher: mockUrlContentFetcher,
43+
fileContextTracker: mockFileContextTracker,
44+
rooIgnoreController: mockRooIgnoreController,
45+
maxReadFileLine: 100,
46+
})
47+
48+
expect(parseMentions).toHaveBeenCalledWith(
49+
"<task>Read file with limit</task>",
50+
"/test",
51+
mockUrlContentFetcher,
52+
mockFileContextTracker,
53+
mockRooIgnoreController,
54+
true,
55+
100,
56+
)
57+
})
58+
59+
it("should pass undefined maxReadFileLine when not provided", async () => {
60+
const userContent = [
61+
{
62+
type: "text" as const,
63+
text: "<task>Read file without limit</task>",
64+
},
65+
]
66+
67+
await processUserContentMentions({
68+
userContent,
69+
cwd: "/test",
70+
urlContentFetcher: mockUrlContentFetcher,
71+
fileContextTracker: mockFileContextTracker,
72+
rooIgnoreController: mockRooIgnoreController,
73+
})
74+
75+
expect(parseMentions).toHaveBeenCalledWith(
76+
"<task>Read file without limit</task>",
77+
"/test",
78+
mockUrlContentFetcher,
79+
mockFileContextTracker,
80+
mockRooIgnoreController,
81+
true,
82+
undefined,
83+
)
84+
})
85+
86+
it("should handle UNLIMITED_LINES constant correctly", async () => {
87+
const userContent = [
88+
{
89+
type: "text" as const,
90+
text: "<task>Read unlimited lines</task>",
91+
},
92+
]
93+
94+
await processUserContentMentions({
95+
userContent,
96+
cwd: "/test",
97+
urlContentFetcher: mockUrlContentFetcher,
98+
fileContextTracker: mockFileContextTracker,
99+
rooIgnoreController: mockRooIgnoreController,
100+
maxReadFileLine: -1,
101+
})
102+
103+
expect(parseMentions).toHaveBeenCalledWith(
104+
"<task>Read unlimited lines</task>",
105+
"/test",
106+
mockUrlContentFetcher,
107+
mockFileContextTracker,
108+
mockRooIgnoreController,
109+
true,
110+
-1,
111+
)
112+
})
113+
})
114+
115+
describe("content processing", () => {
116+
it("should process text blocks with <task> tags", async () => {
117+
const userContent = [
118+
{
119+
type: "text" as const,
120+
text: "<task>Do something</task>",
121+
},
122+
]
123+
124+
const result = await processUserContentMentions({
125+
userContent,
126+
cwd: "/test",
127+
urlContentFetcher: mockUrlContentFetcher,
128+
fileContextTracker: mockFileContextTracker,
129+
})
130+
131+
expect(parseMentions).toHaveBeenCalled()
132+
expect(result[0]).toEqual({
133+
type: "text",
134+
text: "parsed: <task>Do something</task>",
135+
})
136+
})
137+
138+
it("should process text blocks with <feedback> tags", async () => {
139+
const userContent = [
140+
{
141+
type: "text" as const,
142+
text: "<feedback>Fix this issue</feedback>",
143+
},
144+
]
145+
146+
const result = await processUserContentMentions({
147+
userContent,
148+
cwd: "/test",
149+
urlContentFetcher: mockUrlContentFetcher,
150+
fileContextTracker: mockFileContextTracker,
151+
})
152+
153+
expect(parseMentions).toHaveBeenCalled()
154+
expect(result[0]).toEqual({
155+
type: "text",
156+
text: "parsed: <feedback>Fix this issue</feedback>",
157+
})
158+
})
159+
160+
it("should not process text blocks without task or feedback tags", async () => {
161+
const userContent = [
162+
{
163+
type: "text" as const,
164+
text: "Regular text without special tags",
165+
},
166+
]
167+
168+
const result = await processUserContentMentions({
169+
userContent,
170+
cwd: "/test",
171+
urlContentFetcher: mockUrlContentFetcher,
172+
fileContextTracker: mockFileContextTracker,
173+
})
174+
175+
expect(parseMentions).not.toHaveBeenCalled()
176+
expect(result[0]).toEqual(userContent[0])
177+
})
178+
179+
it("should process tool_result blocks with string content", async () => {
180+
const userContent = [
181+
{
182+
type: "tool_result" as const,
183+
tool_use_id: "123",
184+
content: "<feedback>Tool feedback</feedback>",
185+
},
186+
]
187+
188+
const result = await processUserContentMentions({
189+
userContent,
190+
cwd: "/test",
191+
urlContentFetcher: mockUrlContentFetcher,
192+
fileContextTracker: mockFileContextTracker,
193+
})
194+
195+
expect(parseMentions).toHaveBeenCalled()
196+
expect(result[0]).toEqual({
197+
type: "tool_result",
198+
tool_use_id: "123",
199+
content: "parsed: <feedback>Tool feedback</feedback>",
200+
})
201+
})
202+
203+
it("should process tool_result blocks with array content", async () => {
204+
const userContent = [
205+
{
206+
type: "tool_result" as const,
207+
tool_use_id: "123",
208+
content: [
209+
{
210+
type: "text" as const,
211+
text: "<task>Array task</task>",
212+
},
213+
{
214+
type: "text" as const,
215+
text: "Regular text",
216+
},
217+
],
218+
},
219+
]
220+
221+
const result = await processUserContentMentions({
222+
userContent,
223+
cwd: "/test",
224+
urlContentFetcher: mockUrlContentFetcher,
225+
fileContextTracker: mockFileContextTracker,
226+
})
227+
228+
expect(parseMentions).toHaveBeenCalledTimes(1)
229+
expect(result[0]).toEqual({
230+
type: "tool_result",
231+
tool_use_id: "123",
232+
content: [
233+
{
234+
type: "text",
235+
text: "parsed: <task>Array task</task>",
236+
},
237+
{
238+
type: "text",
239+
text: "Regular text",
240+
},
241+
],
242+
})
243+
})
244+
245+
it("should handle mixed content types", async () => {
246+
const userContent = [
247+
{
248+
type: "text" as const,
249+
text: "<task>First task</task>",
250+
},
251+
{
252+
type: "image" as const,
253+
source: {
254+
type: "base64" as const,
255+
media_type: "image/png" as const,
256+
data: "base64data",
257+
},
258+
},
259+
{
260+
type: "tool_result" as const,
261+
tool_use_id: "456",
262+
content: "<feedback>Feedback</feedback>",
263+
},
264+
]
265+
266+
const result = await processUserContentMentions({
267+
userContent,
268+
cwd: "/test",
269+
urlContentFetcher: mockUrlContentFetcher,
270+
fileContextTracker: mockFileContextTracker,
271+
maxReadFileLine: 50,
272+
})
273+
274+
expect(parseMentions).toHaveBeenCalledTimes(2)
275+
expect(result).toHaveLength(3)
276+
expect(result[0]).toEqual({
277+
type: "text",
278+
text: "parsed: <task>First task</task>",
279+
})
280+
expect(result[1]).toEqual(userContent[1]) // Image block unchanged
281+
expect(result[2]).toEqual({
282+
type: "tool_result",
283+
tool_use_id: "456",
284+
content: "parsed: <feedback>Feedback</feedback>",
285+
})
286+
})
287+
})
288+
289+
describe("showRooIgnoredFiles parameter", () => {
290+
it("should default showRooIgnoredFiles to true", async () => {
291+
const userContent = [
292+
{
293+
type: "text" as const,
294+
text: "<task>Test default</task>",
295+
},
296+
]
297+
298+
await processUserContentMentions({
299+
userContent,
300+
cwd: "/test",
301+
urlContentFetcher: mockUrlContentFetcher,
302+
fileContextTracker: mockFileContextTracker,
303+
})
304+
305+
expect(parseMentions).toHaveBeenCalledWith(
306+
"<task>Test default</task>",
307+
"/test",
308+
mockUrlContentFetcher,
309+
mockFileContextTracker,
310+
undefined,
311+
true, // showRooIgnoredFiles should default to true
312+
undefined,
313+
)
314+
})
315+
316+
it("should respect showRooIgnoredFiles when explicitly set to false", async () => {
317+
const userContent = [
318+
{
319+
type: "text" as const,
320+
text: "<task>Test explicit false</task>",
321+
},
322+
]
323+
324+
await processUserContentMentions({
325+
userContent,
326+
cwd: "/test",
327+
urlContentFetcher: mockUrlContentFetcher,
328+
fileContextTracker: mockFileContextTracker,
329+
showRooIgnoredFiles: false,
330+
})
331+
332+
expect(parseMentions).toHaveBeenCalledWith(
333+
"<task>Test explicit false</task>",
334+
"/test",
335+
mockUrlContentFetcher,
336+
mockFileContextTracker,
337+
undefined,
338+
false,
339+
undefined,
340+
)
341+
})
342+
})
343+
})

src/integrations/misc/extract-text.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,27 @@ export function getSupportedBinaryFormats(): string[] {
5050
return Object.keys(SUPPORTED_BINARY_FORMATS)
5151
}
5252

53+
/**
54+
* Extracts text content from a file, with support for various formats including PDF, DOCX, XLSX, and plain text.
55+
* For large text files, can limit the number of lines read to prevent context exhaustion.
56+
*
57+
* @param filePath - Path to the file to extract text from
58+
* @param maxReadFileLine - Maximum number of lines to read from text files.
59+
* Use UNLIMITED_LINES (-1) or undefined for no limit.
60+
* Must be a positive integer or UNLIMITED_LINES.
61+
* @returns Promise resolving to the extracted text content with line numbers
62+
* @throws {Error} If file not found, unsupported format, or invalid parameters
63+
*/
5364
export async function extractTextFromFile(filePath: string, maxReadFileLine?: number): Promise<string> {
65+
// Validate maxReadFileLine parameter
66+
if (maxReadFileLine !== undefined && maxReadFileLine !== -1) {
67+
if (!Number.isInteger(maxReadFileLine) || maxReadFileLine < 1) {
68+
throw new Error(
69+
`Invalid maxReadFileLine: ${maxReadFileLine}. Must be a positive integer or -1 for unlimited.`,
70+
)
71+
}
72+
}
73+
5474
try {
5575
await fs.access(filePath)
5676
} catch (error) {
@@ -70,10 +90,10 @@ export async function extractTextFromFile(filePath: string, maxReadFileLine?: nu
7090

7191
if (!isBinary) {
7292
// Check if we need to apply line limit
73-
if (maxReadFileLine && maxReadFileLine > 0) {
93+
if (maxReadFileLine !== undefined && maxReadFileLine !== -1) {
7494
const totalLines = await countFileLines(filePath)
7595
if (totalLines > maxReadFileLine) {
76-
// Read only up to maxReadFileLine
96+
// Read only up to maxReadFileLine (endLine is 0-based and inclusive)
7797
const content = await readLines(filePath, maxReadFileLine - 1, 0)
7898
const numberedContent = addLineNumbers(content)
7999
return (

0 commit comments

Comments
 (0)