Skip to content

Commit 3c61106

Browse files
committed
🤖 Add image reading support to file_read tool
- Detect MIME types using mime-types package - Read images as base64 binary data - Use toModelOutput to send images as media content to AI models - Update UI to display image previews - Add comprehensive tests for image reading and conversion
1 parent 88daab3 commit 3c61106

File tree

6 files changed

+259
-12
lines changed

6 files changed

+259
-12
lines changed

bun.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"lru-cache": "^11.2.2",
2222
"markdown-it": "^14.1.0",
2323
"mermaid": "^11.12.0",
24+
"mime-types": "^3.0.1",
2425
"minimist": "^1.2.8",
2526
"posthog-js": "^1.276.0",
2627
"react": "^18.2.0",
@@ -61,6 +62,7 @@
6162
"@types/jest": "^30.0.0",
6263
"@types/katex": "^0.16.7",
6364
"@types/markdown-it": "^14.1.2",
65+
"@types/mime-types": "^3.0.1",
6466
"@types/minimist": "^1.2.5",
6567
"@types/react": "^18.2.0",
6668
"@types/react-dom": "^18.2.0",
@@ -744,6 +746,8 @@
744746

745747
"@types/mime": ["@types/[email protected]", "", {}, "sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w=="],
746748

749+
"@types/mime-types": ["@types/[email protected]", "", {}, "sha512-xRMsfuQbnRq1Ef+C+RKaENOxXX87Ygl38W1vDfPHRku02TgQr+Qd8iivLtAMcR0KF5/29xlnFihkTlbqFrGOVQ=="],
750+
747751
"@types/minimist": ["@types/[email protected]", "", {}, "sha512-hov8bUuiLiyFPGyFPE1lwWhmzYbirOXQNNo40+y3zow8aFVTeyn3VWL0VFFfdNddA8S4Vf0Tc062rzyNr7Paag=="],
748752

749753
"@types/ms": ["@types/[email protected]", "", {}, "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA=="],

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"lru-cache": "^11.2.2",
5151
"markdown-it": "^14.1.0",
5252
"mermaid": "^11.12.0",
53+
"mime-types": "^3.0.1",
5354
"minimist": "^1.2.8",
5455
"posthog-js": "^1.276.0",
5556
"react": "^18.2.0",
@@ -90,6 +91,7 @@
9091
"@types/jest": "^30.0.0",
9192
"@types/katex": "^0.16.7",
9293
"@types/markdown-it": "^14.1.2",
94+
"@types/mime-types": "^3.0.1",
9395
"@types/minimist": "^1.2.5",
9496
"@types/react": "^18.2.0",
9597
"@types/react-dom": "^18.2.0",

src/components/tools/FileReadToolCall.tsx

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,14 @@ const InfoValue = styled.span`
101101
word-break: break-all;
102102
`;
103103

104+
const ImagePreview = styled.img`
105+
max-width: 100%;
106+
max-height: 400px;
107+
border-radius: 3px;
108+
display: block;
109+
margin: 8px 0;
110+
`;
111+
104112
interface FileReadToolCallProps {
105113
args: FileReadToolArgs;
106114
result?: FileReadToolResult;
@@ -170,7 +178,10 @@ export const FileReadToolCall: React.FC<FileReadToolCallProps> = ({
170178
<Tooltip>file_read</Tooltip>
171179
</TooltipWrapper>
172180
<FilePathText>{filePath}</FilePathText>
173-
{result && result.success && parsedContent && (
181+
{result && result.success && result.mime_type?.startsWith("image/") && (
182+
<MetadataText>{result.mime_type}</MetadataText>
183+
)}
184+
{result && result.success && parsedContent && !result.mime_type?.startsWith("image/") && (
174185
<MetadataText>
175186
read {formatBytes(parsedContent.actualBytes)} of {formatBytes(result.file_size)}
176187
</MetadataText>
@@ -210,19 +221,32 @@ export const FileReadToolCall: React.FC<FileReadToolCallProps> = ({
210221
</DetailSection>
211222
)}
212223

213-
{result.success && result.content && parsedContent && (
224+
{result.success && result.mime_type?.startsWith("image/") && (
214225
<DetailSection>
215-
<DetailLabel>Content</DetailLabel>
216-
<ContentBlock>
217-
<LineNumbers>
218-
{parsedContent.lineNumbers.map((lineNum, i) => (
219-
<div key={i}>{lineNum}</div>
220-
))}
221-
</LineNumbers>
222-
<ContentText>{parsedContent.actualContent}</ContentText>
223-
</ContentBlock>
226+
<DetailLabel>Image Preview</DetailLabel>
227+
<ImagePreview
228+
src={`data:${result.mime_type};base64,${result.content}`}
229+
alt={args.filePath}
230+
/>
224231
</DetailSection>
225232
)}
233+
234+
{result.success &&
235+
result.content &&
236+
!result.mime_type?.startsWith("image/") &&
237+
parsedContent && (
238+
<DetailSection>
239+
<DetailLabel>Content</DetailLabel>
240+
<ContentBlock>
241+
<LineNumbers>
242+
{parsedContent.lineNumbers.map((lineNum, i) => (
243+
<div key={i}>{lineNum}</div>
244+
))}
245+
</LineNumbers>
246+
<ContentText>{parsedContent.actualContent}</ContentText>
247+
</ContentBlock>
248+
</DetailSection>
249+
)}
226250
</>
227251
)}
228252

src/services/tools/file_read.test.ts

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,4 +388,180 @@ describe("file_read tool", () => {
388388
expect(result.content).toContain("content in subdir");
389389
}
390390
});
391+
392+
it("should read image files and return base64 content with mime type", async () => {
393+
// Setup - create a simple 1x1 PNG image (smallest valid PNG)
394+
const pngBuffer = Buffer.from([
395+
0x89,
396+
0x50,
397+
0x4e,
398+
0x47,
399+
0x0d,
400+
0x0a,
401+
0x1a,
402+
0x0a, // PNG signature
403+
0x00,
404+
0x00,
405+
0x00,
406+
0x0d,
407+
0x49,
408+
0x48,
409+
0x44,
410+
0x52, // IHDR chunk
411+
0x00,
412+
0x00,
413+
0x00,
414+
0x01,
415+
0x00,
416+
0x00,
417+
0x00,
418+
0x01, // 1x1 dimensions
419+
0x08,
420+
0x06,
421+
0x00,
422+
0x00,
423+
0x00,
424+
0x1f,
425+
0x15,
426+
0xc4,
427+
0x89,
428+
0x00,
429+
0x00,
430+
0x00,
431+
0x0a,
432+
0x49,
433+
0x44,
434+
0x41,
435+
0x54,
436+
0x78,
437+
0x9c,
438+
0x63,
439+
0x00,
440+
0x01,
441+
0x00,
442+
0x00,
443+
0x05,
444+
0x00,
445+
0x01,
446+
0x0d,
447+
0x0a,
448+
0x2d,
449+
0xb4,
450+
0x00,
451+
0x00,
452+
0x00,
453+
0x00,
454+
0x49,
455+
0x45,
456+
0x4e,
457+
0x44,
458+
0xae,
459+
0x42,
460+
0x60,
461+
0x82,
462+
]);
463+
const imagePath = path.join(testDir, "test.png");
464+
await fs.writeFile(imagePath, pngBuffer);
465+
466+
using testEnv = createTestFileReadTool({ cwd: testDir });
467+
const tool = testEnv.tool;
468+
const args: FileReadToolArgs = {
469+
filePath: imagePath,
470+
};
471+
472+
// Execute
473+
const result = (await tool.execute!(args, mockToolCallOptions)) as FileReadToolResult;
474+
475+
// Assert
476+
expect(result.success).toBe(true);
477+
if (result.success) {
478+
expect(result.mime_type).toBe("image/png");
479+
expect(result.lines_read).toBe(0); // Images don't have lines
480+
expect(result.content).toBe(pngBuffer.toString("base64"));
481+
expect(result.file_size).toBe(pngBuffer.length);
482+
}
483+
});
484+
485+
it("should return media content for images via toModelOutput", async () => {
486+
// Setup - create a simple image
487+
const jpegBuffer = Buffer.from([
488+
0xff,
489+
0xd8,
490+
0xff,
491+
0xe0,
492+
0x00,
493+
0x10,
494+
0x4a,
495+
0x46, // JPEG header
496+
0x49,
497+
0x46,
498+
0x00,
499+
0x01,
500+
0x01,
501+
0x00,
502+
0x00,
503+
0x01,
504+
0x00,
505+
0x01,
506+
0x00,
507+
0x00,
508+
0xff,
509+
0xd9, // End of image
510+
]);
511+
const imagePath = path.join(testDir, "test.jpg");
512+
await fs.writeFile(imagePath, jpegBuffer);
513+
514+
using testEnv = createTestFileReadTool({ cwd: testDir });
515+
const tool = testEnv.tool;
516+
const args: FileReadToolArgs = {
517+
filePath: imagePath,
518+
};
519+
520+
// Execute
521+
const result = (await tool.execute!(args, mockToolCallOptions)) as FileReadToolResult;
522+
523+
// Assert execute result
524+
expect(result.success).toBe(true);
525+
if (result.success) {
526+
expect(result.mime_type).toBe("image/jpeg");
527+
528+
// Test toModelOutput transformation
529+
const modelOutput = tool.toModelOutput!(result);
530+
expect(modelOutput.type).toBe("content");
531+
if (modelOutput.type === "content") {
532+
expect(modelOutput.value).toHaveLength(1);
533+
expect(modelOutput.value[0].type).toBe("media");
534+
if (modelOutput.value[0].type === "media") {
535+
expect(modelOutput.value[0].mediaType).toBe("image/jpeg");
536+
expect(modelOutput.value[0].data).toBe(jpegBuffer.toString("base64"));
537+
}
538+
}
539+
}
540+
});
541+
542+
it("should return json for text files via toModelOutput", async () => {
543+
// Setup
544+
const content = "line one\nline two";
545+
await fs.writeFile(testFilePath, content);
546+
547+
using testEnv = createTestFileReadTool({ cwd: testDir });
548+
const tool = testEnv.tool;
549+
const args: FileReadToolArgs = {
550+
filePath: testFilePath,
551+
};
552+
553+
// Execute
554+
const result = (await tool.execute!(args, mockToolCallOptions)) as FileReadToolResult;
555+
556+
// Assert
557+
expect(result.success).toBe(true);
558+
if (result.success) {
559+
// Test toModelOutput transformation
560+
const modelOutput = tool.toModelOutput!(result);
561+
expect(modelOutput.type).toBe("json");
562+
if (modelOutput.type === "json") {
563+
expect(modelOutput.value).toEqual(result);
564+
}
565+
}
566+
});
391567
});

src/services/tools/file_read.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { tool } from "ai";
22
import * as fs from "fs/promises";
33
import * as path from "path";
4+
import * as mime from "mime-types";
45
import type { FileReadToolResult } from "@/types/tools";
56
import type { ToolConfiguration, ToolFactory } from "@/utils/tools/tools";
67
import { TOOL_DEFINITIONS } from "@/utils/tools/toolDefinitions";
@@ -15,6 +16,26 @@ export const createFileReadTool: ToolFactory = (config: ToolConfiguration) => {
1516
return tool({
1617
description: TOOL_DEFINITIONS.file_read.description,
1718
inputSchema: TOOL_DEFINITIONS.file_read.schema,
19+
toModelOutput: (output: FileReadToolResult) => {
20+
// If this is an image file with a mime type, return it as media content
21+
if (output.success && output.mime_type && output.mime_type.startsWith("image/")) {
22+
return {
23+
type: "content",
24+
value: [
25+
{
26+
type: "media",
27+
data: output.content,
28+
mediaType: output.mime_type,
29+
},
30+
],
31+
};
32+
}
33+
// Otherwise return as JSON (text files)
34+
return {
35+
type: "json",
36+
value: output,
37+
};
38+
},
1839
execute: async (
1940
{ filePath, offset, limit },
2041
{ abortSignal: _abortSignal }
@@ -53,7 +74,26 @@ export const createFileReadTool: ToolFactory = (config: ToolConfiguration) => {
5374
};
5475
}
5576

56-
// Read full file content
77+
// Detect MIME type
78+
const mimeType = mime.lookup(resolvedPath) || undefined;
79+
80+
// Check if this is a binary image file
81+
if (mimeType && mimeType.startsWith("image/")) {
82+
// Read as binary and encode as base64 for images
83+
const buffer = await fs.readFile(resolvedPath);
84+
const base64Content = buffer.toString("base64");
85+
86+
return {
87+
success: true,
88+
file_size: stats.size,
89+
modifiedTime: stats.mtime.toISOString(),
90+
lines_read: 0, // Images don't have lines
91+
content: base64Content,
92+
mime_type: mimeType,
93+
};
94+
}
95+
96+
// Read full file content as text for non-image files
5797
const fullContent = await fs.readFile(resolvedPath, { encoding: "utf-8" });
5898

5999
const startLineNumber = offset ?? 1;

src/types/tools.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ export type FileReadToolResult =
5050
modifiedTime: string;
5151
lines_read: number;
5252
content: string;
53+
mime_type?: string;
5354
}
5455
| {
5556
success: false;

0 commit comments

Comments
 (0)