Skip to content

Commit e7531e5

Browse files
committed
fix: complete PR #8225 - add missing webview URI to base64 conversion
- Add normalizeImageRefsToDataUrls() function to convert webview URIs to base64 data URLs - Add formatImagesIntoBlocksAsync() for async image processing in backend - Update Task.ts to use async conversion when storing images in backend messages - Backend now stores base64 (for API calls), frontend displays webview URIs (memory efficient) - Fixes OpenRouter and other providers not being able to see attached images - Maintains PR goals: webview memory efficiency + working image functionality
1 parent f34243e commit e7531e5

File tree

4 files changed

+182
-2
lines changed

4 files changed

+182
-2
lines changed

src/core/prompts/responses.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import * as path from "path"
33
import * as diff from "diff"
44
import { RooIgnoreController, LOCK_TEXT_SYMBOL } from "../ignore/RooIgnoreController"
55
import { RooProtectedController } from "../protect/RooProtectedController"
6+
import { normalizeImageRefsToDataUrls } from "../../integrations/misc/imageDataUrl"
67

78
export const formatResponse = {
89
toolDenied: () => `The user denied this operation.`,
@@ -200,6 +201,23 @@ const formatImagesIntoBlocks = (images?: string[]): Anthropic.ImageBlockParam[]
200201
: []
201202
}
202203

204+
/**
205+
* Async version that converts webview URIs to base64 data URLs before creating image blocks
206+
* This is the missing piece from PR #8225 - allows frontend to use webview URIs while
207+
* backend stores base64 for API calls.
208+
*/
209+
export const formatImagesIntoBlocksAsync = async (images?: string[]): Promise<Anthropic.ImageBlockParam[]> => {
210+
if (!images || images.length === 0) {
211+
return []
212+
}
213+
214+
// Convert any webview URIs to base64 data URLs
215+
const dataUrls = await normalizeImageRefsToDataUrls(images)
216+
217+
// Now use the regular function to create image blocks
218+
return formatImagesIntoBlocks(dataUrls)
219+
}
220+
203221
const toolUseInstructionsReminder = `# Reminder: Instructions for Tool Use
204222
205223
Tool uses are formatted using XML-style tags. The tool name itself becomes the XML tag name. Each parameter is enclosed within its own set of tags. Here's the structure:

src/core/task/Task.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
12151215
await this.say("text", task, images)
12161216
this.isInitialized = true
12171217

1218-
let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(images)
1218+
// Convert webview URIs to base64 for backend storage
1219+
const { formatImagesIntoBlocksAsync } = await import("../prompts/responses")
1220+
let imageBlocks: Anthropic.ImageBlockParam[] = await formatImagesIntoBlocksAsync(images)
12191221

12201222
// Task starting
12211223

@@ -1480,7 +1482,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
14801482
}
14811483

14821484
if (responseImages && responseImages.length > 0) {
1483-
newUserContent.push(...formatResponse.imageBlocks(responseImages))
1485+
// Convert webview URIs to base64 for backend storage
1486+
const { formatImagesIntoBlocksAsync } = await import("../prompts/responses")
1487+
const responseImageBlocks = await formatImagesIntoBlocksAsync(responseImages)
1488+
newUserContent.push(...responseImageBlocks)
14841489
}
14851490

14861491
// Ensure we have at least some content to send to the API.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest"
2+
import { normalizeImageRefsToDataUrls } from "../imageDataUrl"
3+
import * as fs from "fs/promises"
4+
5+
// Mock fs module
6+
vi.mock("fs/promises")
7+
8+
describe("normalizeImageRefsToDataUrls", () => {
9+
beforeEach(() => {
10+
vi.clearAllMocks()
11+
})
12+
13+
it("should pass through data URLs unchanged", async () => {
14+
const dataUrl =
15+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
16+
const result = await normalizeImageRefsToDataUrls([dataUrl])
17+
18+
expect(result).toEqual([dataUrl])
19+
})
20+
21+
it("should convert webview URIs to data URLs", async () => {
22+
const webviewUri = "file:///path/to/test.png"
23+
const mockBuffer = Buffer.from("test image data")
24+
25+
vi.mocked(fs.readFile).mockResolvedValue(mockBuffer)
26+
27+
const result = await normalizeImageRefsToDataUrls([webviewUri])
28+
29+
expect(result).toHaveLength(1)
30+
expect(result[0]).toMatch(/^data:image\/png;base64,/)
31+
expect(fs.readFile).toHaveBeenCalledWith("/path/to/test.png")
32+
})
33+
34+
it("should handle mixed arrays of data URLs and webview URIs", async () => {
35+
const dataUrl = "data:image/jpeg;base64,test123"
36+
const webviewUri = "file:///path/to/test.png"
37+
const mockBuffer = Buffer.from("test image data")
38+
39+
vi.mocked(fs.readFile).mockResolvedValue(mockBuffer)
40+
41+
const result = await normalizeImageRefsToDataUrls([dataUrl, webviewUri])
42+
43+
expect(result).toHaveLength(2)
44+
expect(result[0]).toBe(dataUrl) // Data URL unchanged
45+
expect(result[1]).toMatch(/^data:image\/png;base64,/) // Webview URI converted
46+
})
47+
48+
it("should handle errors gracefully by skipping problematic images", async () => {
49+
const validDataUrl = "data:image/png;base64,valid"
50+
const invalidWebviewUri = "file:///nonexistent/test.png"
51+
52+
vi.mocked(fs.readFile).mockRejectedValue(new Error("File not found"))
53+
54+
const result = await normalizeImageRefsToDataUrls([validDataUrl, invalidWebviewUri])
55+
56+
expect(result).toEqual([validDataUrl]) // Only valid ones returned
57+
})
58+
59+
it("should handle empty arrays", async () => {
60+
const result = await normalizeImageRefsToDataUrls([])
61+
expect(result).toEqual([])
62+
})
63+
})
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import * as fs from "fs/promises"
2+
import * as path from "path"
3+
4+
/**
5+
* Converts webview URIs to base64 data URLs for API calls.
6+
* This is the missing piece from PR #8225 that allows webview URIs
7+
* to be used in frontend while converting to base64 for API calls.
8+
*/
9+
export async function normalizeImageRefsToDataUrls(imageRefs: string[]): Promise<string[]> {
10+
const results: string[] = []
11+
12+
for (const imageRef of imageRefs) {
13+
// If it's already a data URL, keep it as is
14+
if (imageRef.startsWith("data:image/")) {
15+
results.push(imageRef)
16+
continue
17+
}
18+
19+
// Convert webview URI to file path and then to base64
20+
try {
21+
const filePath = webviewUriToFilePath(imageRef)
22+
const buffer = await fs.readFile(filePath)
23+
const base64 = buffer.toString("base64")
24+
const mimeType = getMimeTypeFromPath(filePath)
25+
const dataUrl = `data:${mimeType};base64,${base64}`
26+
results.push(dataUrl)
27+
} catch (error) {
28+
console.error("Failed to convert webview URI to base64:", imageRef, error)
29+
// Skip this image
30+
}
31+
}
32+
33+
return results
34+
}
35+
36+
/**
37+
* Converts a webview URI to a file system path
38+
*/
39+
function webviewUriToFilePath(webviewUri: string): string {
40+
// Handle vscode-resource URIs like:
41+
// vscode-resource://vscode-webview/path/to/file
42+
if (webviewUri.includes("vscode-resource://")) {
43+
// Extract the path portion after vscode-resource://vscode-webview/
44+
const match = webviewUri.match(/vscode-resource:\/\/[^\/]+(.+)/)
45+
if (match) {
46+
return decodeURIComponent(match[1])
47+
}
48+
}
49+
50+
// Handle file:// URIs
51+
if (webviewUri.startsWith("file://")) {
52+
return decodeURIComponent(webviewUri.substring(7))
53+
}
54+
55+
// Handle VS Code webview URIs that contain encoded paths
56+
if (webviewUri.includes("vscode-userdata") || webviewUri.includes("vscode-cdn.net")) {
57+
// Try to decode the URI and extract the file path
58+
const decoded = decodeURIComponent(webviewUri)
59+
// Look for a file path pattern in the decoded URI
60+
const pathMatch = decoded.match(/(?:Users|C:)([^?#]+\.(?:png|jpg|jpeg|gif|webp))/i)
61+
if (pathMatch) {
62+
const extractedPath = pathMatch[0]
63+
return extractedPath
64+
}
65+
}
66+
67+
// As a last resort, try treating it as a file path
68+
return webviewUri
69+
}
70+
71+
/**
72+
* Gets the MIME type from a file path
73+
*/
74+
function getMimeTypeFromPath(filePath: string): string {
75+
const ext = path.extname(filePath).toLowerCase()
76+
77+
switch (ext) {
78+
case ".png":
79+
return "image/png"
80+
case ".jpg":
81+
case ".jpeg":
82+
return "image/jpeg"
83+
case ".gif":
84+
return "image/gif"
85+
case ".webp":
86+
return "image/webp"
87+
case ".svg":
88+
return "image/svg+xml"
89+
case ".bmp":
90+
return "image/bmp"
91+
default:
92+
return "image/png"
93+
}
94+
}

0 commit comments

Comments
 (0)