Skip to content

Commit 225adc4

Browse files
authored
feat: allow read tool to handle images (#3052)
1 parent eb4b572 commit 225adc4

File tree

7 files changed

+159
-83
lines changed

7 files changed

+159
-83
lines changed

packages/opencode/src/provider/models.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ export namespace ModelsDev {
2828
context: z.number(),
2929
output: z.number(),
3030
}),
31+
modalities: z
32+
.object({
33+
input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
34+
output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
35+
})
36+
.optional(),
3137
experimental: z.boolean().optional(),
3238
options: z.record(z.string(), z.any()),
3339
provider: z.object({ npm: z.string() }).optional(),

packages/opencode/src/provider/provider.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,11 @@ export namespace Provider {
279279
context: 0,
280280
output: 0,
281281
},
282+
modalities: model.modalities ??
283+
existing?.modalities ?? {
284+
input: ["text"],
285+
output: ["text"],
286+
},
282287
provider: model.provider ?? existing?.provider,
283288
}
284289
parsed.models[modelID] = parsedModel

packages/opencode/src/session/message-v2.ts

Lines changed: 97 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -17,71 +17,6 @@ export namespace MessageV2 {
1717
}),
1818
)
1919

20-
export const ToolStatePending = z
21-
.object({
22-
status: z.literal("pending"),
23-
})
24-
.meta({
25-
ref: "ToolStatePending",
26-
})
27-
28-
export type ToolStatePending = z.infer<typeof ToolStatePending>
29-
30-
export const ToolStateRunning = z
31-
.object({
32-
status: z.literal("running"),
33-
input: z.any(),
34-
title: z.string().optional(),
35-
metadata: z.record(z.string(), z.any()).optional(),
36-
time: z.object({
37-
start: z.number(),
38-
}),
39-
})
40-
.meta({
41-
ref: "ToolStateRunning",
42-
})
43-
export type ToolStateRunning = z.infer<typeof ToolStateRunning>
44-
45-
export const ToolStateCompleted = z
46-
.object({
47-
status: z.literal("completed"),
48-
input: z.record(z.string(), z.any()),
49-
output: z.string(),
50-
title: z.string(),
51-
metadata: z.record(z.string(), z.any()),
52-
time: z.object({
53-
start: z.number(),
54-
end: z.number(),
55-
compacted: z.number().optional(),
56-
}),
57-
})
58-
.meta({
59-
ref: "ToolStateCompleted",
60-
})
61-
export type ToolStateCompleted = z.infer<typeof ToolStateCompleted>
62-
63-
export const ToolStateError = z
64-
.object({
65-
status: z.literal("error"),
66-
input: z.record(z.string(), z.any()),
67-
error: z.string(),
68-
metadata: z.record(z.string(), z.any()).optional(),
69-
time: z.object({
70-
start: z.number(),
71-
end: z.number(),
72-
}),
73-
})
74-
.meta({
75-
ref: "ToolStateError",
76-
})
77-
export type ToolStateError = z.infer<typeof ToolStateError>
78-
79-
export const ToolState = z
80-
.discriminatedUnion("status", [ToolStatePending, ToolStateRunning, ToolStateCompleted, ToolStateError])
81-
.meta({
82-
ref: "ToolState",
83-
})
84-
8520
const PartBase = z.object({
8621
id: z.string(),
8722
sessionID: z.string(),
@@ -134,17 +69,6 @@ export namespace MessageV2 {
13469
})
13570
export type ReasoningPart = z.infer<typeof ReasoningPart>
13671

137-
export const ToolPart = PartBase.extend({
138-
type: z.literal("tool"),
139-
callID: z.string(),
140-
tool: z.string(),
141-
state: ToolState,
142-
metadata: z.record(z.string(), z.any()).optional(),
143-
}).meta({
144-
ref: "ToolPart",
145-
})
146-
export type ToolPart = z.infer<typeof ToolPart>
147-
14872
const FilePartSourceBase = z.object({
14973
text: z
15074
.object({
@@ -228,6 +152,83 @@ export namespace MessageV2 {
228152
})
229153
export type StepFinishPart = z.infer<typeof StepFinishPart>
230154

155+
export const ToolStatePending = z
156+
.object({
157+
status: z.literal("pending"),
158+
})
159+
.meta({
160+
ref: "ToolStatePending",
161+
})
162+
163+
export type ToolStatePending = z.infer<typeof ToolStatePending>
164+
165+
export const ToolStateRunning = z
166+
.object({
167+
status: z.literal("running"),
168+
input: z.any(),
169+
title: z.string().optional(),
170+
metadata: z.record(z.string(), z.any()).optional(),
171+
time: z.object({
172+
start: z.number(),
173+
}),
174+
})
175+
.meta({
176+
ref: "ToolStateRunning",
177+
})
178+
export type ToolStateRunning = z.infer<typeof ToolStateRunning>
179+
180+
export const ToolStateCompleted = z
181+
.object({
182+
status: z.literal("completed"),
183+
input: z.record(z.string(), z.any()),
184+
output: z.string(),
185+
title: z.string(),
186+
metadata: z.record(z.string(), z.any()),
187+
time: z.object({
188+
start: z.number(),
189+
end: z.number(),
190+
compacted: z.number().optional(),
191+
}),
192+
attachments: FilePart.array().optional(),
193+
})
194+
.meta({
195+
ref: "ToolStateCompleted",
196+
})
197+
export type ToolStateCompleted = z.infer<typeof ToolStateCompleted>
198+
199+
export const ToolStateError = z
200+
.object({
201+
status: z.literal("error"),
202+
input: z.record(z.string(), z.any()),
203+
error: z.string(),
204+
metadata: z.record(z.string(), z.any()).optional(),
205+
time: z.object({
206+
start: z.number(),
207+
end: z.number(),
208+
}),
209+
})
210+
.meta({
211+
ref: "ToolStateError",
212+
})
213+
export type ToolStateError = z.infer<typeof ToolStateError>
214+
215+
export const ToolState = z
216+
.discriminatedUnion("status", [ToolStatePending, ToolStateRunning, ToolStateCompleted, ToolStateError])
217+
.meta({
218+
ref: "ToolState",
219+
})
220+
221+
export const ToolPart = PartBase.extend({
222+
type: z.literal("tool"),
223+
callID: z.string(),
224+
tool: z.string(),
225+
state: ToolState,
226+
metadata: z.record(z.string(), z.any()).optional(),
227+
}).meta({
228+
ref: "ToolPart",
229+
})
230+
export type ToolPart = z.infer<typeof ToolPart>
231+
231232
const Base = z.object({
232233
id: z.string(),
233234
sessionID: z.string(),
@@ -531,7 +532,25 @@ export namespace MessageV2 {
531532
},
532533
]
533534
if (part.type === "tool") {
534-
if (part.state.status === "completed")
535+
if (part.state.status === "completed") {
536+
if (part.state.attachments?.length) {
537+
result.push({
538+
id: Identifier.ascending("message"),
539+
role: "user",
540+
parts: [
541+
{
542+
type: "text",
543+
text: `Tool ${part.tool} returned an attachment:`,
544+
},
545+
...part.state.attachments.map((attachment) => ({
546+
type: "file" as const,
547+
url: attachment.url,
548+
mediaType: attachment.mime,
549+
filename: attachment.filename,
550+
})),
551+
],
552+
})
553+
}
535554
return [
536555
{
537556
type: ("tool-" + part.tool) as `tool-${string}`,
@@ -542,6 +561,7 @@ export namespace MessageV2 {
542561
callProviderMetadata: part.metadata,
543562
},
544563
]
564+
}
545565
if (part.state.status === "error")
546566
return [
547567
{

packages/opencode/src/session/prompt.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,10 @@ export namespace SessionPrompt {
457457
abort: options.abortSignal!,
458458
messageID: input.processor.message.id,
459459
callID: options.toolCallId,
460+
extra: {
461+
modelID: input.modelID,
462+
providerID: input.providerID,
463+
},
460464
agent: input.agent.name,
461465
metadata: async (val) => {
462466
const match = input.processor.partFromToolCall(options.toolCallId)
@@ -989,6 +993,7 @@ export namespace SessionPrompt {
989993
start: match.state.time.start,
990994
end: Date.now(),
991995
},
996+
attachments: value.output.attachments,
992997
},
993998
})
994999
delete toolcalls[value.toolCallId]

packages/opencode/src/tool/read.ts

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import { FileTime } from "../file/time"
77
import DESCRIPTION from "./read.txt"
88
import { Filesystem } from "../util/filesystem"
99
import { Instance } from "../project/instance"
10+
import { Provider } from "../provider/provider"
11+
import { Identifier } from "../id/id"
1012

1113
const DEFAULT_READ_LIMIT = 2000
1214
const MAX_LINE_LENGTH = 2000
@@ -23,6 +25,8 @@ export const ReadTool = Tool.define("read", {
2325
if (!path.isAbsolute(filepath)) {
2426
filepath = path.join(process.cwd(), filepath)
2527
}
28+
const title = path.relative(Instance.worktree, filepath)
29+
2630
if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
2731
throw new Error(`File ${filepath} is not in the current working directory`)
2832
}
@@ -48,12 +52,45 @@ export const ReadTool = Tool.define("read", {
4852
throw new Error(`File not found: ${filepath}`)
4953
}
5054

51-
const limit = params.limit ?? DEFAULT_READ_LIMIT
52-
const offset = params.offset || 0
5355
const isImage = isImageFile(filepath)
54-
if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`)
56+
const supportsImages = await (async () => {
57+
if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false
58+
const providerID = ctx.extra["providerID"] as string
59+
const modelID = ctx.extra["modelID"] as string
60+
const model = await Provider.getModel(providerID, modelID).catch(() => undefined)
61+
if (!model) return false
62+
return model.info.modalities?.input?.includes("image") ?? false
63+
})()
64+
if (isImage) {
65+
if (!supportsImages) {
66+
throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
67+
}
68+
const mime = file.type
69+
const msg = "Image read successfully"
70+
return {
71+
title,
72+
output: msg,
73+
metadata: {
74+
preview: msg,
75+
},
76+
attachments: [
77+
{
78+
id: Identifier.ascending("part"),
79+
sessionID: ctx.sessionID,
80+
messageID: ctx.messageID,
81+
type: "file",
82+
mime,
83+
url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
84+
},
85+
],
86+
}
87+
}
88+
5589
const isBinary = await isBinaryFile(filepath, file)
5690
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
91+
92+
const limit = params.limit ?? DEFAULT_READ_LIMIT
93+
const offset = params.offset || 0
5794
const lines = await file.text().then((text) => text.split("\n"))
5895
const raw = lines.slice(offset, offset + limit).map((line) => {
5996
return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
@@ -76,7 +113,7 @@ export const ReadTool = Tool.define("read", {
76113
FileTime.read(ctx.sessionID, filepath)
77114

78115
return {
79-
title: path.relative(Instance.worktree, filepath),
116+
title,
80117
output,
81118
metadata: {
82119
preview,

packages/opencode/src/tool/read.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ Usage:
77
- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
88
- Any lines longer than 2000 characters will be truncated
99
- Results are returned using cat -n format, with line numbers starting at 1
10-
- This tool cannot read binary files, including images
11-
- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
10+
- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
1211
- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
12+
- You can read image files using this tool.

packages/opencode/src/tool/tool.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import z from "zod/v4"
2+
import type { MessageV2 } from "../session/message-v2"
23

34
export namespace Tool {
45
interface Metadata {
56
[key: string]: any
67
}
8+
79
export type Context<M extends Metadata = Metadata> = {
810
sessionID: string
911
messageID: string
@@ -25,6 +27,7 @@ export namespace Tool {
2527
title: string
2628
metadata: M
2729
output: string
30+
attachments?: MessageV2.FilePart[]
2831
}>
2932
}>
3033
}

0 commit comments

Comments
 (0)