Skip to content

Commit 812f2f4

Browse files
committed
fix: improve generate_image tool partial block handling and clarify image editing capabilities
- Fix partial block detection to properly wait for complete tool calls before processing - Update tool documentation to explicitly mention image editing/transformation capabilities - Add comprehensive tests for generateImageTool including partial block handling - Clarify that the tool can edit existing images, not just use them as starting points
1 parent ccb6721 commit 812f2f4

File tree

3 files changed

+327
-7
lines changed

3 files changed

+327
-7
lines changed

src/core/prompts/tools/generate-image.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ import { ToolArgs } from "./types"
22

33
export function getGenerateImageDescription(args: ToolArgs): string {
44
return `## generate_image
5-
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path. Optionally, you can provide an input image to use as a reference or starting point for the generation.
5+
Description: Request to generate or edit an image using AI models through OpenRouter API. This tool can create new images from text prompts or modify existing images based on your instructions. When an input image is provided, the AI will apply the requested edits, transformations, or enhancements to that image.
66
Parameters:
7-
- prompt: (required) The text prompt describing the image to generate
8-
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
9-
- image: (optional) The file path to an input image to use as a reference or starting point (relative to the current workspace directory ${args.cwd}). Supported formats: PNG, JPG, JPEG, GIF, WEBP.
7+
- prompt: (required) The text prompt describing what to generate or how to edit the image
8+
- path: (required) The file path where the generated/edited image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
9+
- image: (optional) The file path to an input image to edit or transform (relative to the current workspace directory ${args.cwd}). Supported formats: PNG, JPG, JPEG, GIF, WEBP.
1010
Usage:
1111
<generate_image>
1212
<prompt>Your image description here</prompt>
@@ -20,10 +20,17 @@ Example: Requesting to generate a sunset image
2020
<path>images/sunset.png</path>
2121
</generate_image>
2222
23-
Example: Generating an image with an input reference
23+
Example: Editing an existing image
2424
<generate_image>
2525
<prompt>Transform this image into a watercolor painting style</prompt>
2626
<path>images/watercolor-output.png</path>
2727
<image>images/original-photo.jpg</image>
28+
</generate_image>
29+
30+
Example: Upscaling and enhancing an image
31+
<generate_image>
32+
<prompt>Upscale this image to higher resolution, enhance details, improve clarity and sharpness while maintaining the original content and composition</prompt>
33+
<path>images/enhanced-photo.png</path>
34+
<image>images/low-res-photo.jpg</image>
2835
</generate_image>`
2936
}
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest"
2+
import { generateImageTool } from "../generateImageTool"
3+
import { ToolUse } from "../../../shared/tools"
4+
import { Task } from "../../task/Task"
5+
import * as fs from "fs/promises"
6+
import * as pathUtils from "../../../utils/pathUtils"
7+
import * as fileUtils from "../../../utils/fs"
8+
import { formatResponse } from "../../prompts/responses"
9+
import { EXPERIMENT_IDS } from "../../../shared/experiments"
10+
11+
// Mock dependencies
12+
vi.mock("fs/promises")
13+
vi.mock("../../../utils/pathUtils")
14+
vi.mock("../../../utils/fs")
15+
vi.mock("../../../utils/safeWriteJson")
16+
vi.mock("../../../api/providers/openrouter")
17+
18+
describe("generateImageTool", () => {
19+
let mockCline: any
20+
let mockAskApproval: any
21+
let mockHandleError: any
22+
let mockPushToolResult: any
23+
let mockRemoveClosingTag: any
24+
25+
beforeEach(() => {
26+
vi.clearAllMocks()
27+
28+
// Setup mock Cline instance
29+
mockCline = {
30+
cwd: "/test/workspace",
31+
consecutiveMistakeCount: 0,
32+
recordToolError: vi.fn(),
33+
recordToolUsage: vi.fn(),
34+
sayAndCreateMissingParamError: vi.fn().mockResolvedValue("Missing parameter error"),
35+
say: vi.fn(),
36+
rooIgnoreController: {
37+
validateAccess: vi.fn().mockReturnValue(true),
38+
},
39+
rooProtectedController: {
40+
isWriteProtected: vi.fn().mockReturnValue(false),
41+
},
42+
providerRef: {
43+
deref: vi.fn().mockReturnValue({
44+
getState: vi.fn().mockResolvedValue({
45+
experiments: {
46+
[EXPERIMENT_IDS.IMAGE_GENERATION]: true,
47+
},
48+
apiConfiguration: {
49+
openRouterImageGenerationSettings: {
50+
openRouterApiKey: "test-api-key",
51+
selectedModel: "google/gemini-2.5-flash-image-preview",
52+
},
53+
},
54+
}),
55+
}),
56+
},
57+
fileContextTracker: {
58+
trackFileContext: vi.fn(),
59+
},
60+
didEditFile: false,
61+
}
62+
63+
mockAskApproval = vi.fn().mockResolvedValue(true)
64+
mockHandleError = vi.fn()
65+
mockPushToolResult = vi.fn()
66+
mockRemoveClosingTag = vi.fn((tag, content) => content || "")
67+
68+
// Mock file system operations
69+
vi.mocked(fileUtils.fileExistsAtPath).mockResolvedValue(true)
70+
vi.mocked(fs.readFile).mockResolvedValue(Buffer.from("fake-image-data"))
71+
vi.mocked(fs.mkdir).mockResolvedValue(undefined)
72+
vi.mocked(fs.writeFile).mockResolvedValue(undefined)
73+
vi.mocked(pathUtils.isPathOutsideWorkspace).mockReturnValue(false)
74+
})
75+
76+
describe("partial block handling", () => {
77+
it("should return early when block is partial", async () => {
78+
const partialBlock: ToolUse = {
79+
type: "tool_use",
80+
name: "generate_image",
81+
params: {
82+
prompt: "Generate a test image",
83+
path: "test-image.png",
84+
},
85+
partial: true,
86+
}
87+
88+
await generateImageTool(
89+
mockCline as Task,
90+
partialBlock,
91+
mockAskApproval,
92+
mockHandleError,
93+
mockPushToolResult,
94+
mockRemoveClosingTag,
95+
)
96+
97+
// Should not process anything when partial
98+
expect(mockAskApproval).not.toHaveBeenCalled()
99+
expect(mockPushToolResult).not.toHaveBeenCalled()
100+
expect(mockCline.say).not.toHaveBeenCalled()
101+
})
102+
103+
it("should return early when block is partial even with image parameter", async () => {
104+
const partialBlock: ToolUse = {
105+
type: "tool_use",
106+
name: "generate_image",
107+
params: {
108+
prompt: "Upscale this image",
109+
path: "upscaled-image.png",
110+
image: "source-image.png",
111+
},
112+
partial: true,
113+
}
114+
115+
await generateImageTool(
116+
mockCline as Task,
117+
partialBlock,
118+
mockAskApproval,
119+
mockHandleError,
120+
mockPushToolResult,
121+
mockRemoveClosingTag,
122+
)
123+
124+
// Should not process anything when partial
125+
expect(mockAskApproval).not.toHaveBeenCalled()
126+
expect(mockPushToolResult).not.toHaveBeenCalled()
127+
expect(mockCline.say).not.toHaveBeenCalled()
128+
expect(fs.readFile).not.toHaveBeenCalled()
129+
})
130+
131+
it("should process when block is not partial", async () => {
132+
const completeBlock: ToolUse = {
133+
type: "tool_use",
134+
name: "generate_image",
135+
params: {
136+
prompt: "Generate a test image",
137+
path: "test-image.png",
138+
},
139+
partial: false,
140+
}
141+
142+
// Mock OpenRouter handler
143+
const mockOpenRouterHandler = {
144+
generateImage: vi.fn().mockResolvedValue({
145+
success: true,
146+
imageData: "",
147+
}),
148+
}
149+
150+
vi.doMock("../../../api/providers/openrouter", () => ({
151+
OpenRouterHandler: vi.fn().mockImplementation(() => mockOpenRouterHandler),
152+
}))
153+
154+
await generateImageTool(
155+
mockCline as Task,
156+
completeBlock,
157+
mockAskApproval,
158+
mockHandleError,
159+
mockPushToolResult,
160+
mockRemoveClosingTag,
161+
)
162+
163+
// Should process the complete block
164+
expect(mockAskApproval).toHaveBeenCalled()
165+
expect(mockPushToolResult).toHaveBeenCalled()
166+
})
167+
})
168+
169+
describe("missing parameters", () => {
170+
it("should handle missing prompt parameter", async () => {
171+
const block: ToolUse = {
172+
type: "tool_use",
173+
name: "generate_image",
174+
params: {
175+
path: "test-image.png",
176+
},
177+
partial: false,
178+
}
179+
180+
await generateImageTool(
181+
mockCline as Task,
182+
block,
183+
mockAskApproval,
184+
mockHandleError,
185+
mockPushToolResult,
186+
mockRemoveClosingTag,
187+
)
188+
189+
expect(mockCline.consecutiveMistakeCount).toBe(1)
190+
expect(mockCline.recordToolError).toHaveBeenCalledWith("generate_image")
191+
expect(mockCline.sayAndCreateMissingParamError).toHaveBeenCalledWith("generate_image", "prompt")
192+
expect(mockPushToolResult).toHaveBeenCalledWith("Missing parameter error")
193+
})
194+
195+
it("should handle missing path parameter", async () => {
196+
const block: ToolUse = {
197+
type: "tool_use",
198+
name: "generate_image",
199+
params: {
200+
prompt: "Generate a test image",
201+
},
202+
partial: false,
203+
}
204+
205+
await generateImageTool(
206+
mockCline as Task,
207+
block,
208+
mockAskApproval,
209+
mockHandleError,
210+
mockPushToolResult,
211+
mockRemoveClosingTag,
212+
)
213+
214+
expect(mockCline.consecutiveMistakeCount).toBe(1)
215+
expect(mockCline.recordToolError).toHaveBeenCalledWith("generate_image")
216+
expect(mockCline.sayAndCreateMissingParamError).toHaveBeenCalledWith("generate_image", "path")
217+
expect(mockPushToolResult).toHaveBeenCalledWith("Missing parameter error")
218+
})
219+
})
220+
221+
describe("experiment validation", () => {
222+
it("should error when image generation experiment is disabled", async () => {
223+
// Disable the experiment
224+
mockCline.providerRef.deref().getState.mockResolvedValue({
225+
experiments: {
226+
[EXPERIMENT_IDS.IMAGE_GENERATION]: false,
227+
},
228+
})
229+
230+
const block: ToolUse = {
231+
type: "tool_use",
232+
name: "generate_image",
233+
params: {
234+
prompt: "Generate a test image",
235+
path: "test-image.png",
236+
},
237+
partial: false,
238+
}
239+
240+
await generateImageTool(
241+
mockCline as Task,
242+
block,
243+
mockAskApproval,
244+
mockHandleError,
245+
mockPushToolResult,
246+
mockRemoveClosingTag,
247+
)
248+
249+
expect(mockPushToolResult).toHaveBeenCalledWith(
250+
formatResponse.toolError(
251+
"Image generation is an experimental feature that must be enabled in settings. Please enable 'Image Generation' in the Experimental Settings section.",
252+
),
253+
)
254+
})
255+
})
256+
257+
describe("input image validation", () => {
258+
it("should handle non-existent input image", async () => {
259+
vi.mocked(fileUtils.fileExistsAtPath).mockResolvedValue(false)
260+
261+
const block: ToolUse = {
262+
type: "tool_use",
263+
name: "generate_image",
264+
params: {
265+
prompt: "Upscale this image",
266+
path: "upscaled.png",
267+
image: "non-existent.png",
268+
},
269+
partial: false,
270+
}
271+
272+
await generateImageTool(
273+
mockCline as Task,
274+
block,
275+
mockAskApproval,
276+
mockHandleError,
277+
mockPushToolResult,
278+
mockRemoveClosingTag,
279+
)
280+
281+
expect(mockCline.say).toHaveBeenCalledWith("error", expect.stringContaining("Input image not found"))
282+
expect(mockPushToolResult).toHaveBeenCalledWith(
283+
formatResponse.toolError(expect.stringContaining("Input image not found")),
284+
)
285+
})
286+
287+
it("should handle unsupported image format", async () => {
288+
const block: ToolUse = {
289+
type: "tool_use",
290+
name: "generate_image",
291+
params: {
292+
prompt: "Upscale this image",
293+
path: "upscaled.png",
294+
image: "test.bmp", // Unsupported format
295+
},
296+
partial: false,
297+
}
298+
299+
await generateImageTool(
300+
mockCline as Task,
301+
block,
302+
mockAskApproval,
303+
mockHandleError,
304+
mockPushToolResult,
305+
mockRemoveClosingTag,
306+
)
307+
308+
expect(mockCline.say).toHaveBeenCalledWith("error", expect.stringContaining("Unsupported image format"))
309+
expect(mockPushToolResult).toHaveBeenCalledWith(
310+
formatResponse.toolError(expect.stringContaining("Unsupported image format")),
311+
)
312+
})
313+
})
314+
})

src/core/tools/generateImageTool.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ export async function generateImageTool(
4040
return
4141
}
4242

43-
if (block.partial && (!prompt || !relPath)) {
44-
// Wait for complete parameters
43+
if (block.partial) {
4544
return
4645
}
4746

0 commit comments

Comments
 (0)