Skip to content

Commit a3bb4a3

Browse files
pdf support in read tool (sst#5222)
Co-authored-by: ammi1378 <[email protected]>
1 parent 06ba1f7 commit a3bb4a3

File tree

3 files changed

+40
-169
lines changed

3 files changed

+40
-169
lines changed

packages/opencode/src/provider/transform.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@ import type { APICallError, ModelMessage } from "ai"
22
import { unique } from "remeda"
33
import type { JSONSchema } from "zod/v4/core"
44
import type { Provider } from "./provider"
5+
import type { ModelsDev } from "./models"
6+
7+
type Modality = NonNullable<ModelsDev.Model["modalities"]>["input"][number]
8+
9+
function mimeToModality(mime: string): Modality | undefined {
10+
if (mime.startsWith("image/")) return "image"
11+
if (mime.startsWith("audio/")) return "audio"
12+
if (mime.startsWith("video/")) return "video"
13+
if (mime === "application/pdf") return "pdf"
14+
return undefined
15+
}
516

617
export namespace ProviderTransform {
718
function normalizeMessages(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
@@ -148,7 +159,32 @@ export namespace ProviderTransform {
148159
return msgs
149160
}
150161

162+
function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
163+
return msgs.map((msg) => {
164+
if (msg.role !== "user" || !Array.isArray(msg.content)) return msg
165+
166+
const filtered = msg.content.map((part) => {
167+
if (part.type !== "file" && part.type !== "image") return part
168+
169+
const mime = part.type === "image" ? part.image.toString().split(";")[0].replace("data:", "") : part.mediaType
170+
const filename = part.type === "file" ? part.filename : undefined
171+
const modality = mimeToModality(mime)
172+
if (!modality) return part
173+
if (model.capabilities.input[modality]) return part
174+
175+
const name = filename ? `"${filename}"` : modality
176+
return {
177+
type: "text" as const,
178+
text: `ERROR: Cannot read ${name} (this model does not support ${modality} input). Inform the user.`,
179+
}
180+
})
181+
182+
return { ...msg, content: filtered }
183+
})
184+
}
185+
151186
export function message(msgs: ModelMessage[], model: Provider.Model) {
187+
msgs = unsupportedParts(msgs, model)
152188
msgs = normalizeMessages(msgs, model)
153189
if (model.providerID === "anthropic" || model.api.id.includes("anthropic") || model.api.id.includes("claude")) {
154190
msgs = applyCaching(msgs, model.providerID)

packages/opencode/src/session/message-v2.ts

Lines changed: 0 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -411,147 +411,6 @@ export namespace MessageV2 {
411411
})
412412
export type WithParts = z.infer<typeof WithParts>
413413

414-
export function fromV1(v1: Message.Info) {
415-
if (v1.role === "assistant") {
416-
const info: Assistant = {
417-
id: v1.id,
418-
parentID: "",
419-
sessionID: v1.metadata.sessionID,
420-
role: "assistant",
421-
time: {
422-
created: v1.metadata.time.created,
423-
completed: v1.metadata.time.completed,
424-
},
425-
cost: v1.metadata.assistant!.cost,
426-
path: v1.metadata.assistant!.path,
427-
summary: v1.metadata.assistant!.summary,
428-
tokens: v1.metadata.assistant!.tokens,
429-
modelID: v1.metadata.assistant!.modelID,
430-
providerID: v1.metadata.assistant!.providerID,
431-
mode: "build",
432-
error: v1.metadata.error,
433-
}
434-
const parts = v1.parts.flatMap((part): Part[] => {
435-
const base = {
436-
id: Identifier.ascending("part"),
437-
messageID: v1.id,
438-
sessionID: v1.metadata.sessionID,
439-
}
440-
if (part.type === "text") {
441-
return [
442-
{
443-
...base,
444-
type: "text",
445-
text: part.text,
446-
},
447-
]
448-
}
449-
if (part.type === "step-start") {
450-
return [
451-
{
452-
...base,
453-
type: "step-start",
454-
},
455-
]
456-
}
457-
if (part.type === "tool-invocation") {
458-
return [
459-
{
460-
...base,
461-
type: "tool",
462-
callID: part.toolInvocation.toolCallId,
463-
tool: part.toolInvocation.toolName,
464-
state: (() => {
465-
if (part.toolInvocation.state === "partial-call") {
466-
return {
467-
status: "pending",
468-
input: {},
469-
raw: "",
470-
}
471-
}
472-
473-
const { title, time, ...metadata } = v1.metadata.tool[part.toolInvocation.toolCallId] ?? {}
474-
if (part.toolInvocation.state === "call") {
475-
return {
476-
status: "running",
477-
input: part.toolInvocation.args,
478-
time: {
479-
start: time?.start,
480-
},
481-
}
482-
}
483-
484-
if (part.toolInvocation.state === "result") {
485-
return {
486-
status: "completed",
487-
input: part.toolInvocation.args,
488-
output: part.toolInvocation.result,
489-
title,
490-
time,
491-
metadata,
492-
}
493-
}
494-
throw new Error("unknown tool invocation state")
495-
})(),
496-
},
497-
]
498-
}
499-
return []
500-
})
501-
return {
502-
info,
503-
parts,
504-
}
505-
}
506-
507-
if (v1.role === "user") {
508-
const info: User = {
509-
id: v1.id,
510-
sessionID: v1.metadata.sessionID,
511-
role: "user",
512-
time: {
513-
created: v1.metadata.time.created,
514-
},
515-
agent: "build",
516-
model: {
517-
providerID: "opencode",
518-
modelID: "opencode",
519-
},
520-
}
521-
const parts = v1.parts.flatMap((part): Part[] => {
522-
const base = {
523-
id: Identifier.ascending("part"),
524-
messageID: v1.id,
525-
sessionID: v1.metadata.sessionID,
526-
}
527-
if (part.type === "text") {
528-
return [
529-
{
530-
...base,
531-
type: "text",
532-
text: part.text,
533-
},
534-
]
535-
}
536-
if (part.type === "file") {
537-
return [
538-
{
539-
...base,
540-
type: "file",
541-
mime: part.mediaType,
542-
filename: part.filename,
543-
url: part.url,
544-
},
545-
]
546-
}
547-
return []
548-
})
549-
return { info, parts }
550-
}
551-
552-
throw new Error("unknown message type")
553-
}
554-
555414
export function toModelMessage(
556415
input: {
557416
info: Info

packages/opencode/src/tool/read.ts

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import { FileTime } from "../file/time"
77
import DESCRIPTION from "./read.txt"
88
import { Filesystem } from "../util/filesystem"
99
import { Instance } from "../project/instance"
10-
import { Provider } from "../provider/provider"
1110
import { Identifier } from "../id/id"
1211
import { Permission } from "../permission"
1312
import { Agent } from "@/agent/agent"
@@ -94,15 +93,11 @@ export const ReadTool = Tool.define("read", {
9493
throw new Error(`File not found: ${filepath}`)
9594
}
9695

97-
const isImage = isImageFile(filepath)
98-
const model = ctx.extra?.model as Provider.Model | undefined
99-
const supportsImages = model?.capabilities.input.image ?? false
100-
if (isImage) {
101-
if (!supportsImages) {
102-
throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
103-
}
96+
const isImage = file.type.startsWith("image/")
97+
const isPdf = file.type === "application/pdf"
98+
if (isImage || isPdf) {
10499
const mime = file.type
105-
const msg = "Image read successfully"
100+
const msg = `${isImage ? "Image" : "PDF"} read successfully`
106101
return {
107102
title,
108103
output: msg,
@@ -164,25 +159,6 @@ export const ReadTool = Tool.define("read", {
164159
},
165160
})
166161

167-
function isImageFile(filePath: string): string | false {
168-
const ext = path.extname(filePath).toLowerCase()
169-
switch (ext) {
170-
case ".jpg":
171-
case ".jpeg":
172-
return "JPEG"
173-
case ".png":
174-
return "PNG"
175-
case ".gif":
176-
return "GIF"
177-
case ".bmp":
178-
return "BMP"
179-
case ".webp":
180-
return "WebP"
181-
default:
182-
return false
183-
}
184-
}
185-
186162
async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
187163
const ext = path.extname(filepath).toLowerCase()
188164
// binary check for common non-text extensions

0 commit comments

Comments
 (0)