Skip to content

Commit 2092fb1

Browse files
daniel-lxsmrubenscte
authored
feat: add image generation tool with OpenRouter integration (#7474)
Co-authored-by: Matt Rubens <[email protected]> Co-authored-by: cte <[email protected]>
1 parent d1122ea commit 2092fb1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+1218
-29
lines changed

packages/types/npm/package.metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@roo-code/types",
3-
"version": "1.62.0",
3+
"version": "1.63.0",
44
"description": "TypeScript type definitions for Roo Code.",
55
"publishConfig": {
66
"access": "public",

packages/types/src/experiment.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js"
66
* ExperimentId
77
*/
88

9-
export const experimentIds = ["powerSteering", "multiFileApplyDiff", "preventFocusDisruption"] as const
9+
export const experimentIds = [
10+
"powerSteering",
11+
"multiFileApplyDiff",
12+
"preventFocusDisruption",
13+
"imageGeneration",
14+
] as const
1015

1116
export const experimentIdsSchema = z.enum(experimentIds)
1217

@@ -20,6 +25,7 @@ export const experimentsSchema = z.object({
2025
powerSteering: z.boolean().optional(),
2126
multiFileApplyDiff: z.boolean().optional(),
2227
preventFocusDisruption: z.boolean().optional(),
28+
imageGeneration: z.boolean().optional(),
2329
})
2430

2531
export type Experiments = z.infer<typeof experimentsSchema>

packages/types/src/provider-settings.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ const openRouterSchema = baseProviderSettingsSchema.extend({
142142
openRouterBaseUrl: z.string().optional(),
143143
openRouterSpecificProvider: z.string().optional(),
144144
openRouterUseMiddleOutTransform: z.boolean().optional(),
145+
// Image generation settings (experimental)
146+
openRouterImageGenerationSettings: z
147+
.object({
148+
openRouterApiKey: z.string().optional(),
149+
selectedModel: z.string().optional(),
150+
})
151+
.optional(),
145152
})
146153

147154
const bedrockSchema = apiModelIdProviderModelSchema.extend({

packages/types/src/tool.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ export const toolNames = [
3434
"fetch_instructions",
3535
"codebase_search",
3636
"update_todo_list",
37+
"generate_image",
3738
] as const
3839

3940
export const toolNamesSchema = z.enum(toolNames)

pnpm-lock.yaml

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/api/providers/openrouter.ts

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,33 @@ import { DEFAULT_HEADERS } from "./constants"
2626
import { BaseProvider } from "./base-provider"
2727
import type { SingleCompletionHandler } from "../index"
2828

29+
// Image generation types
30+
interface ImageGenerationResponse {
31+
choices?: Array<{
32+
message?: {
33+
content?: string
34+
images?: Array<{
35+
type?: string
36+
image_url?: {
37+
url?: string
38+
}
39+
}>
40+
}
41+
}>
42+
error?: {
43+
message?: string
44+
type?: string
45+
code?: string
46+
}
47+
}
48+
49+
export interface ImageGenerationResult {
50+
success: boolean
51+
imageData?: string
52+
imageFormat?: string
53+
error?: string
54+
}
55+
2956
// Add custom interface for OpenRouter params.
3057
type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
3158
transforms?: string[]
@@ -242,4 +269,105 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
242269
const completion = response as OpenAI.Chat.ChatCompletion
243270
return completion.choices[0]?.message?.content || ""
244271
}
272+
273+
/**
274+
* Generate an image using OpenRouter's image generation API
275+
* @param prompt The text prompt for image generation
276+
* @param model The model to use for generation
277+
* @param apiKey The OpenRouter API key (must be explicitly provided)
278+
* @returns The generated image data and format, or an error
279+
*/
280+
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
281+
if (!apiKey) {
282+
return {
283+
success: false,
284+
error: "OpenRouter API key is required for image generation",
285+
}
286+
}
287+
288+
try {
289+
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
290+
method: "POST",
291+
headers: {
292+
Authorization: `Bearer ${apiKey}`,
293+
"Content-Type": "application/json",
294+
"HTTP-Referer": "https://github.com/RooVetGit/Roo-Code",
295+
"X-Title": "Roo Code",
296+
},
297+
body: JSON.stringify({
298+
model,
299+
messages: [
300+
{
301+
role: "user",
302+
content: prompt,
303+
},
304+
],
305+
modalities: ["image", "text"],
306+
}),
307+
})
308+
309+
if (!response.ok) {
310+
const errorText = await response.text()
311+
let errorMessage = `Failed to generate image: ${response.status} ${response.statusText}`
312+
try {
313+
const errorJson = JSON.parse(errorText)
314+
if (errorJson.error?.message) {
315+
errorMessage = `Failed to generate image: ${errorJson.error.message}`
316+
}
317+
} catch {
318+
// Use default error message
319+
}
320+
return {
321+
success: false,
322+
error: errorMessage,
323+
}
324+
}
325+
326+
const result: ImageGenerationResponse = await response.json()
327+
328+
if (result.error) {
329+
return {
330+
success: false,
331+
error: `Failed to generate image: ${result.error.message}`,
332+
}
333+
}
334+
335+
// Extract the generated image from the response
336+
const images = result.choices?.[0]?.message?.images
337+
if (!images || images.length === 0) {
338+
return {
339+
success: false,
340+
error: "No image was generated in the response",
341+
}
342+
}
343+
344+
const imageData = images[0]?.image_url?.url
345+
if (!imageData) {
346+
return {
347+
success: false,
348+
error: "Invalid image data in response",
349+
}
350+
}
351+
352+
// Extract base64 data from data URL
353+
const base64Match = imageData.match(/^data:image\/(png|jpeg|jpg);base64,(.+)$/)
354+
if (!base64Match) {
355+
return {
356+
success: false,
357+
error: "Invalid image format received",
358+
}
359+
}
360+
361+
return {
362+
success: true,
363+
imageData: imageData,
364+
imageFormat: base64Match[1],
365+
}
366+
} catch (error) {
367+
return {
368+
success: false,
369+
error: error instanceof Error ? error.message : "Unknown error occurred",
370+
}
371+
}
372+
}
245373
}

src/core/assistant-message/presentAssistantMessage.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import { attemptCompletionTool } from "../tools/attemptCompletionTool"
2828
import { newTaskTool } from "../tools/newTaskTool"
2929

3030
import { updateTodoListTool } from "../tools/updateTodoListTool"
31+
import { generateImageTool } from "../tools/generateImageTool"
3132

3233
import { formatResponse } from "../prompts/responses"
3334
import { validateToolUse } from "../tools/validateToolUse"
@@ -221,6 +222,8 @@ export async function presentAssistantMessage(cline: Task) {
221222
const modeName = getModeBySlug(mode, customModes)?.name ?? mode
222223
return `[${block.name} in ${modeName} mode: '${message}']`
223224
}
225+
case "generate_image":
226+
return `[${block.name} for '${block.params.path}']`
224227
}
225228
}
226229

@@ -546,6 +549,9 @@ export async function presentAssistantMessage(cline: Task) {
546549
askFinishSubTaskApproval,
547550
)
548551
break
552+
case "generate_image":
553+
await generateImageTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag)
554+
break
549555
}
550556

551557
break
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { ToolArgs } from "./types"
2+
3+
export function getGenerateImageDescription(args: ToolArgs): string {
4+
return `## generate_image
5+
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
6+
Parameters:
7+
- prompt: (required) The text prompt describing the image to generate
8+
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
9+
Usage:
10+
<generate_image>
11+
<prompt>Your image description here</prompt>
12+
<path>path/to/save/image.png</path>
13+
</generate_image>
14+
15+
Example: Requesting to generate a sunset image
16+
<generate_image>
17+
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
18+
<path>images/sunset.png</path>
19+
</generate_image>`
20+
}

src/core/prompts/tools/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { getSwitchModeDescription } from "./switch-mode"
2525
import { getNewTaskDescription } from "./new-task"
2626
import { getCodebaseSearchDescription } from "./codebase-search"
2727
import { getUpdateTodoListDescription } from "./update-todo-list"
28+
import { getGenerateImageDescription } from "./generate-image"
2829
import { CodeIndexManager } from "../../../services/code-index/manager"
2930

3031
// Map of tool names to their description functions
@@ -56,6 +57,7 @@ const toolDescriptionMap: Record<string, (args: ToolArgs) => string | undefined>
5657
apply_diff: (args) =>
5758
args.diffStrategy ? args.diffStrategy.getToolDescription({ cwd: args.cwd, toolOptions: args.toolOptions }) : "",
5859
update_todo_list: (args) => getUpdateTodoListDescription(args),
60+
generate_image: (args) => getGenerateImageDescription(args),
5961
}
6062

6163
export function getToolDescriptionsForMode(
@@ -129,6 +131,11 @@ export function getToolDescriptionsForMode(
129131
tools.delete("update_todo_list")
130132
}
131133

134+
// Conditionally exclude generate_image if experiment is not enabled
135+
if (!experiments?.imageGeneration) {
136+
tools.delete("generate_image")
137+
}
138+
132139
// Map tool descriptions for allowed tools
133140
const descriptions = Array.from(tools).map((toolName) => {
134141
const descriptionFn = toolDescriptionMap[toolName]
@@ -164,4 +171,5 @@ export {
164171
getInsertContentDescription,
165172
getSearchAndReplaceDescription,
166173
getCodebaseSearchDescription,
174+
getGenerateImageDescription,
167175
}

0 commit comments

Comments
 (0)