Skip to content

Commit b22a618

Browse files
roomote[bot]roomotedaniel-lxs
authored
feat: add optional input image parameter to image generation tool (#7525)
Co-authored-by: Roo Code <[email protected]> Co-authored-by: Daniel Riccio <[email protected]>
1 parent 1d46bd1 commit b22a618

File tree

5 files changed

+425
-10
lines changed

5 files changed

+425
-10
lines changed

src/api/providers/openrouter.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
275275
* @param prompt The text prompt for image generation
276276
* @param model The model to use for generation
277277
* @param apiKey The OpenRouter API key (must be explicitly provided)
278+
* @param inputImage Optional base64 encoded input image data URL
278279
* @returns The generated image data and format, or an error
279280
*/
280-
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
281+
async generateImage(
282+
prompt: string,
283+
model: string,
284+
apiKey: string,
285+
inputImage?: string,
286+
): Promise<ImageGenerationResult> {
281287
if (!apiKey) {
282288
return {
283289
success: false,
@@ -299,7 +305,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
299305
messages: [
300306
{
301307
role: "user",
302-
content: prompt,
308+
content: inputImage
309+
? [
310+
{
311+
type: "text",
312+
text: prompt,
313+
},
314+
{
315+
type: "image_url",
316+
image_url: {
317+
url: inputImage,
318+
},
319+
},
320+
]
321+
: prompt,
303322
},
304323
],
305324
modalities: ["image", "text"],

src/core/prompts/tools/generate-image.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,35 @@ import { ToolArgs } from "./types"
22

33
export function getGenerateImageDescription(args: ToolArgs): string {
44
return `## generate_image
5-
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
5+
Description: Request to generate or edit an image using AI models through OpenRouter API. This tool can create new images from text prompts or modify existing images based on your instructions. When an input image is provided, the AI will apply the requested edits, transformations, or enhancements to that image.
66
Parameters:
7-
- prompt: (required) The text prompt describing the image to generate
8-
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
7+
- prompt: (required) The text prompt describing what to generate or how to edit the image
8+
- path: (required) The file path where the generated/edited image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
9+
- image: (optional) The file path to an input image to edit or transform (relative to the current workspace directory ${args.cwd}). Supported formats: PNG, JPG, JPEG, GIF, WEBP.
910
Usage:
1011
<generate_image>
1112
<prompt>Your image description here</prompt>
1213
<path>path/to/save/image.png</path>
14+
<image>path/to/input/image.jpg</image>
1315
</generate_image>
1416
1517
Example: Requesting to generate a sunset image
1618
<generate_image>
1719
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
1820
<path>images/sunset.png</path>
21+
</generate_image>
22+
23+
Example: Editing an existing image
24+
<generate_image>
25+
<prompt>Transform this image into a watercolor painting style</prompt>
26+
<path>images/watercolor-output.png</path>
27+
<image>images/original-photo.jpg</image>
28+
</generate_image>
29+
30+
Example: Upscaling and enhancing an image
31+
<generate_image>
32+
<prompt>Upscale this image to higher resolution, enhance details, improve clarity and sharpness while maintaining the original content and composition</prompt>
33+
<path>images/enhanced-photo.png</path>
34+
<image>images/low-res-photo.jpg</image>
1935
</generate_image>`
2036
}
Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest"
2+
import { generateImageTool } from "../generateImageTool"
3+
import { ToolUse } from "../../../shared/tools"
4+
import { Task } from "../../task/Task"
5+
import * as fs from "fs/promises"
6+
import * as pathUtils from "../../../utils/pathUtils"
7+
import * as fileUtils from "../../../utils/fs"
8+
import { formatResponse } from "../../prompts/responses"
9+
import { EXPERIMENT_IDS } from "../../../shared/experiments"
10+
import { OpenRouterHandler } from "../../../api/providers/openrouter"
11+
12+
// Mock dependencies
13+
vi.mock("fs/promises")
14+
vi.mock("../../../utils/pathUtils")
15+
vi.mock("../../../utils/fs")
16+
vi.mock("../../../utils/safeWriteJson")
17+
vi.mock("../../../api/providers/openrouter")
18+
19+
describe("generateImageTool", () => {
20+
let mockCline: any
21+
let mockAskApproval: any
22+
let mockHandleError: any
23+
let mockPushToolResult: any
24+
let mockRemoveClosingTag: any
25+
26+
beforeEach(() => {
27+
vi.clearAllMocks()
28+
29+
// Setup mock Cline instance
30+
mockCline = {
31+
cwd: "/test/workspace",
32+
consecutiveMistakeCount: 0,
33+
recordToolError: vi.fn(),
34+
recordToolUsage: vi.fn(),
35+
sayAndCreateMissingParamError: vi.fn().mockResolvedValue("Missing parameter error"),
36+
say: vi.fn(),
37+
rooIgnoreController: {
38+
validateAccess: vi.fn().mockReturnValue(true),
39+
},
40+
rooProtectedController: {
41+
isWriteProtected: vi.fn().mockReturnValue(false),
42+
},
43+
providerRef: {
44+
deref: vi.fn().mockReturnValue({
45+
getState: vi.fn().mockResolvedValue({
46+
experiments: {
47+
[EXPERIMENT_IDS.IMAGE_GENERATION]: true,
48+
},
49+
apiConfiguration: {
50+
openRouterImageGenerationSettings: {
51+
openRouterApiKey: "test-api-key",
52+
selectedModel: "google/gemini-2.5-flash-image-preview",
53+
},
54+
},
55+
}),
56+
}),
57+
},
58+
fileContextTracker: {
59+
trackFileContext: vi.fn(),
60+
},
61+
didEditFile: false,
62+
}
63+
64+
mockAskApproval = vi.fn().mockResolvedValue(true)
65+
mockHandleError = vi.fn()
66+
mockPushToolResult = vi.fn()
67+
mockRemoveClosingTag = vi.fn((tag, content) => content || "")
68+
69+
// Mock file system operations
70+
vi.mocked(fileUtils.fileExistsAtPath).mockResolvedValue(true)
71+
vi.mocked(fs.readFile).mockResolvedValue(Buffer.from("fake-image-data"))
72+
vi.mocked(fs.mkdir).mockResolvedValue(undefined)
73+
vi.mocked(fs.writeFile).mockResolvedValue(undefined)
74+
vi.mocked(pathUtils.isPathOutsideWorkspace).mockReturnValue(false)
75+
})
76+
77+
describe("partial block handling", () => {
78+
it("should return early when block is partial", async () => {
79+
const partialBlock: ToolUse = {
80+
type: "tool_use",
81+
name: "generate_image",
82+
params: {
83+
prompt: "Generate a test image",
84+
path: "test-image.png",
85+
},
86+
partial: true,
87+
}
88+
89+
await generateImageTool(
90+
mockCline as Task,
91+
partialBlock,
92+
mockAskApproval,
93+
mockHandleError,
94+
mockPushToolResult,
95+
mockRemoveClosingTag,
96+
)
97+
98+
// Should not process anything when partial
99+
expect(mockAskApproval).not.toHaveBeenCalled()
100+
expect(mockPushToolResult).not.toHaveBeenCalled()
101+
expect(mockCline.say).not.toHaveBeenCalled()
102+
})
103+
104+
it("should return early when block is partial even with image parameter", async () => {
105+
const partialBlock: ToolUse = {
106+
type: "tool_use",
107+
name: "generate_image",
108+
params: {
109+
prompt: "Upscale this image",
110+
path: "upscaled-image.png",
111+
image: "source-image.png",
112+
},
113+
partial: true,
114+
}
115+
116+
await generateImageTool(
117+
mockCline as Task,
118+
partialBlock,
119+
mockAskApproval,
120+
mockHandleError,
121+
mockPushToolResult,
122+
mockRemoveClosingTag,
123+
)
124+
125+
// Should not process anything when partial
126+
expect(mockAskApproval).not.toHaveBeenCalled()
127+
expect(mockPushToolResult).not.toHaveBeenCalled()
128+
expect(mockCline.say).not.toHaveBeenCalled()
129+
expect(fs.readFile).not.toHaveBeenCalled()
130+
})
131+
132+
it("should process when block is not partial", async () => {
133+
const completeBlock: ToolUse = {
134+
type: "tool_use",
135+
name: "generate_image",
136+
params: {
137+
prompt: "Generate a test image",
138+
path: "test-image.png",
139+
},
140+
partial: false,
141+
}
142+
143+
// Mock the OpenRouterHandler generateImage method
144+
const mockGenerateImage = vi.fn().mockResolvedValue({
145+
success: true,
146+
imageData: "",
147+
})
148+
149+
vi.mocked(OpenRouterHandler).mockImplementation(
150+
() =>
151+
({
152+
generateImage: mockGenerateImage,
153+
}) as any,
154+
)
155+
156+
await generateImageTool(
157+
mockCline as Task,
158+
completeBlock,
159+
mockAskApproval,
160+
mockHandleError,
161+
mockPushToolResult,
162+
mockRemoveClosingTag,
163+
)
164+
165+
// Should process the complete block
166+
expect(mockAskApproval).toHaveBeenCalled()
167+
expect(mockGenerateImage).toHaveBeenCalled()
168+
expect(mockPushToolResult).toHaveBeenCalled()
169+
})
170+
})
171+
172+
describe("missing parameters", () => {
173+
it("should handle missing prompt parameter", async () => {
174+
const block: ToolUse = {
175+
type: "tool_use",
176+
name: "generate_image",
177+
params: {
178+
path: "test-image.png",
179+
},
180+
partial: false,
181+
}
182+
183+
await generateImageTool(
184+
mockCline as Task,
185+
block,
186+
mockAskApproval,
187+
mockHandleError,
188+
mockPushToolResult,
189+
mockRemoveClosingTag,
190+
)
191+
192+
expect(mockCline.consecutiveMistakeCount).toBe(1)
193+
expect(mockCline.recordToolError).toHaveBeenCalledWith("generate_image")
194+
expect(mockCline.sayAndCreateMissingParamError).toHaveBeenCalledWith("generate_image", "prompt")
195+
expect(mockPushToolResult).toHaveBeenCalledWith("Missing parameter error")
196+
})
197+
198+
it("should handle missing path parameter", async () => {
199+
const block: ToolUse = {
200+
type: "tool_use",
201+
name: "generate_image",
202+
params: {
203+
prompt: "Generate a test image",
204+
},
205+
partial: false,
206+
}
207+
208+
await generateImageTool(
209+
mockCline as Task,
210+
block,
211+
mockAskApproval,
212+
mockHandleError,
213+
mockPushToolResult,
214+
mockRemoveClosingTag,
215+
)
216+
217+
expect(mockCline.consecutiveMistakeCount).toBe(1)
218+
expect(mockCline.recordToolError).toHaveBeenCalledWith("generate_image")
219+
expect(mockCline.sayAndCreateMissingParamError).toHaveBeenCalledWith("generate_image", "path")
220+
expect(mockPushToolResult).toHaveBeenCalledWith("Missing parameter error")
221+
})
222+
})
223+
224+
describe("experiment validation", () => {
225+
it("should error when image generation experiment is disabled", async () => {
226+
// Disable the experiment
227+
mockCline.providerRef.deref().getState.mockResolvedValue({
228+
experiments: {
229+
[EXPERIMENT_IDS.IMAGE_GENERATION]: false,
230+
},
231+
})
232+
233+
const block: ToolUse = {
234+
type: "tool_use",
235+
name: "generate_image",
236+
params: {
237+
prompt: "Generate a test image",
238+
path: "test-image.png",
239+
},
240+
partial: false,
241+
}
242+
243+
await generateImageTool(
244+
mockCline as Task,
245+
block,
246+
mockAskApproval,
247+
mockHandleError,
248+
mockPushToolResult,
249+
mockRemoveClosingTag,
250+
)
251+
252+
expect(mockPushToolResult).toHaveBeenCalledWith(
253+
formatResponse.toolError(
254+
"Image generation is an experimental feature that must be enabled in settings. Please enable 'Image Generation' in the Experimental Settings section.",
255+
),
256+
)
257+
})
258+
})
259+
260+
describe("input image validation", () => {
261+
it("should handle non-existent input image", async () => {
262+
vi.mocked(fileUtils.fileExistsAtPath).mockResolvedValue(false)
263+
264+
const block: ToolUse = {
265+
type: "tool_use",
266+
name: "generate_image",
267+
params: {
268+
prompt: "Upscale this image",
269+
path: "upscaled.png",
270+
image: "non-existent.png",
271+
},
272+
partial: false,
273+
}
274+
275+
await generateImageTool(
276+
mockCline as Task,
277+
block,
278+
mockAskApproval,
279+
mockHandleError,
280+
mockPushToolResult,
281+
mockRemoveClosingTag,
282+
)
283+
284+
expect(mockCline.say).toHaveBeenCalledWith("error", expect.stringContaining("Input image not found"))
285+
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Input image not found"))
286+
})
287+
288+
it("should handle unsupported image format", async () => {
289+
const block: ToolUse = {
290+
type: "tool_use",
291+
name: "generate_image",
292+
params: {
293+
prompt: "Upscale this image",
294+
path: "upscaled.png",
295+
image: "test.bmp", // Unsupported format
296+
},
297+
partial: false,
298+
}
299+
300+
await generateImageTool(
301+
mockCline as Task,
302+
block,
303+
mockAskApproval,
304+
mockHandleError,
305+
mockPushToolResult,
306+
mockRemoveClosingTag,
307+
)
308+
309+
expect(mockCline.say).toHaveBeenCalledWith("error", expect.stringContaining("Unsupported image format"))
310+
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Unsupported image format"))
311+
})
312+
})
313+
})

0 commit comments

Comments
 (0)