Skip to content

Commit 155f9b6

Browse files
committed
feat: add optional input image parameter to image generation tool
- Added optional image parameter to generate_image tool - Updated tool description to document the new parameter - Modified generateImageTool to read and validate input images - Updated OpenRouter API to include input image in requests - Added comprehensive test coverage for new functionality
1 parent 1d46bd1 commit 155f9b6

File tree

5 files changed

+541
-6
lines changed

5 files changed

+541
-6
lines changed

src/api/providers/openrouter.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
275275
* @param prompt The text prompt for image generation
276276
* @param model The model to use for generation
277277
* @param apiKey The OpenRouter API key (must be explicitly provided)
278+
* @param inputImage Optional base64 encoded input image data URL
278279
* @returns The generated image data and format, or an error
279280
*/
280-
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
281+
async generateImage(
282+
prompt: string,
283+
model: string,
284+
apiKey: string,
285+
inputImage?: string,
286+
): Promise<ImageGenerationResult> {
281287
if (!apiKey) {
282288
return {
283289
success: false,
@@ -299,7 +305,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
299305
messages: [
300306
{
301307
role: "user",
302-
content: prompt,
308+
content: inputImage
309+
? [
310+
{
311+
type: "text",
312+
text: prompt,
313+
},
314+
{
315+
type: "image_url",
316+
image_url: {
317+
url: inputImage,
318+
},
319+
},
320+
]
321+
: prompt,
303322
},
304323
],
305324
modalities: ["image", "text"],

src/core/prompts/tools/generate-image.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,28 @@ import { ToolArgs } from "./types"
22

33
export function getGenerateImageDescription(args: ToolArgs): string {
44
return `## generate_image
5-
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
5+
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path. Optionally, you can provide an input image to use as a reference or starting point for the generation.
66
Parameters:
77
- prompt: (required) The text prompt describing the image to generate
88
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
9+
- image: (optional) The file path to an input image to use as a reference or starting point (relative to the current workspace directory ${args.cwd}). Supported formats: PNG, JPG, JPEG, GIF, WEBP.
910
Usage:
1011
<generate_image>
1112
<prompt>Your image description here</prompt>
1213
<path>path/to/save/image.png</path>
14+
<image>path/to/input/image.jpg</image>
1315
</generate_image>
1416
1517
Example: Requesting to generate a sunset image
1618
<generate_image>
1719
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
1820
<path>images/sunset.png</path>
21+
</generate_image>
22+
23+
Example: Generating an image with an input reference
24+
<generate_image>
25+
<prompt>Transform this image into a watercolor painting style</prompt>
26+
<path>images/watercolor-output.png</path>
27+
<image>images/original-photo.jpg</image>
1928
</generate_image>`
2029
}

0 commit comments

Comments
 (0)