RooCodeInc · daniel-lxs · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025 · roomote
@@ -44,8 +44,11 @@ export const globalSettingsSchema = z.object({
 	dismissedUpsells: z.array(z.string()).optional(),
 
 	// Image generation settings (experimental) - flattened for simplicity
+	imageGenerationProvider: z.enum(["openrouter", "gemini"]).optional(),
 	openRouterImageApiKey: z.string().optional(),
 	openRouterImageGenerationSelectedModel: z.string().optional(),
+	geminiImageApiKey: z.string().optional(),
+	geminiImageGenerationSelectedModel: z.string().optional(),
 
 	condensingApiConfigId: z.string().optional(),
 	customCondensingPrompt: z.string().optional(),
@@ -210,7 +213,8 @@ export const SECRET_STATE_KEYS = [
 
 // Global secrets that are part of GlobalSettings (not ProviderSettings)
 export const GLOBAL_SECRET_KEYS = [
-	"openRouterImageApiKey", // For image generation
+	"openRouterImageApiKey", // For OpenRouter image generation
+	"geminiImageApiKey", // For Gemini image generation
 ] as const
 
 // Type for the actual secret storage keys

@@ -0,0 +1,75 @@
+import { z } from "zod"
+
+/**
+ * Image Generation Provider
+ */
+export const imageGenerationProviders = ["openrouter", "gemini"] as const
+export const imageGenerationProviderSchema = z.enum(imageGenerationProviders)
+export type ImageGenerationProvider = z.infer<typeof imageGenerationProviderSchema>
+
+/**
+ * Image Generation Model Info
+ */
+export interface ImageGenerationModelInfo {
+	provider: ImageGenerationProvider
+	modelId: string
+	label: string
+	supportsEditMode?: boolean // Whether the model supports image editing (text + image input)
+	maxInputSize?: number // Maximum input image size in MB
+	outputFormats?: string[] // Supported output formats
+}
+
+/**
+ * Image Generation Models by Provider
+ */
+export const IMAGE_GENERATION_MODELS: Record<ImageGenerationProvider, ImageGenerationModelInfo[]> = {
+	openrouter: [
+		{
+			provider: "openrouter",
+			modelId: "google/gemini-2.5-flash-image-preview",
+			label: "Gemini 2.5 Flash Image Preview",
+			supportsEditMode: true,
+			outputFormats: ["png", "jpeg"],
+		},
+		{
+			provider: "openrouter",
+			modelId: "google/gemini-2.5-flash-image-preview:free",
+			label: "Gemini 2.5 Flash Image Preview (Free)",
+			supportsEditMode: true,
+			outputFormats: ["png", "jpeg"],
+		},
+	],
+	gemini: [
+		{
+			provider: "gemini",
+			modelId: "gemini-2.5-flash-image-preview",
+			label: "Gemini 2.5 Flash Image Preview",
+			supportsEditMode: true,
+			outputFormats: ["png", "jpeg"],
+		},
+	],
+}
+
+/**
+ * Helper function to get all models for a specific provider
+ */
+export function getImageGenerationModelsForProvider(provider: ImageGenerationProvider): ImageGenerationModelInfo[] {
+	return IMAGE_GENERATION_MODELS[provider] || []
+}
+
+/**
+ * Helper function to get all available image generation models
+ */
+export function getAllImageGenerationModels(): ImageGenerationModelInfo[] {
+	return Object.values(IMAGE_GENERATION_MODELS).flat()
+}
+
+/**
+ * Image Generation Result
+ */
+export interface ImageGenerationResult {
+	success: boolean
+	imageData?: string // Base64 encoded image data URL
+	imageFormat?: string // Format of the generated image (png, jpeg, etc.)
+	error?: string
+}
@@ -7,6 +7,7 @@ export * from "./experiment.js"
 export * from "./followup.js"
 export * from "./global-settings.js"
 export * from "./history.js"
+export * from "./image-generation.js"
 export * from "./ipc.js"
 export * from "./marketplace.js"
 export * from "./mcp.js"

@@ -5,10 +5,17 @@ import {
 	type GenerateContentParameters,
 	type GenerateContentConfig,
 	type GroundingMetadata,
+	type Content,
 } from "@google/genai"
 import type { JWTInput } from "google-auth-library"
 
-import { type ModelInfo, type GeminiModelId, geminiDefaultModelId, geminiModels } from "@roo-code/types"
+import {
+	type ModelInfo,
+	type GeminiModelId,
+	geminiDefaultModelId,
+	geminiModels,
+	type ImageGenerationResult,
+} from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 import { safeJsonParse } from "../../shared/safeJsonParse"
@@ -335,4 +342,139 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 
 		return totalCost
 	}
+
+	/**
+	 * Generate an image using Gemini's image generation API
+	 * @param prompt The text prompt for image generation
+	 * @param model The model to use for generation
+	 * @param apiKey The Gemini API key (if not using vertex)
+	 * @param inputImage Optional base64 encoded input image data URL for editing
+	 * @returns The generated image data and format, or an error
+	 */
+	async generateImage(
+		prompt: string,
+		model: string,
+		apiKey?: string,
+		inputImage?: string,
+	): Promise<ImageGenerationResult> {
+		try {
+			// Create a temporary client with the provided API key if needed
+			let client: GoogleGenAI
+			if (apiKey && !this.options.vertexProjectId) {
+				// Use provided API key for standard Gemini
+				client = new GoogleGenAI({ apiKey })
+			} else {
+				// Use existing client (either vertex or standard with already configured key)
+				client = this.client
+			}
+
+			// Prepare the content for generation
+			const contents: Content[] = []
+
+			if (inputImage) {
+				// For image editing mode, include both text and image
+				const base64Match = inputImage.match(/^data:image\/(png|jpeg|jpg);base64,(.+)$/)
+				if (!base64Match) {
+					return {
+						success: false,
+						error: "Invalid input image format. Expected base64 data URL.",
+					}
+				}
+
+				const mimeType = base64Match[1] === "jpg" ? "image/jpeg" : `image/${base64Match[1]}`
+				const base64Data = base64Match[2]
+
+				contents.push({
+					role: "user",
+					parts: [
+						{ text: prompt },
+						{
+							inlineData: {
+								mimeType,
+								data: base64Data,
+							},
+						},
+					],
+				})
+			} else {
+				// For text-to-image mode
+				contents.push({
+					role: "user",
+					parts: [{ text: prompt }],
+				})
+			}
+
+			const config: GenerateContentConfig = {
+				httpOptions: this.options.googleGeminiBaseUrl
+					? { baseUrl: this.options.googleGeminiBaseUrl }
+					: undefined,
+				temperature: 1.0, // Higher temperature for more creative image generation
+			}
+
+			const params: GenerateContentParameters = {
+				model,
+				contents,
+				config,
+			}
+
+			const result = await client.models.generateContent(params)
+
+			// Extract the generated image from the response
+			if (!result.candidates || result.candidates.length === 0) {
+				return {
+					success: false,
+					error: "No candidates returned in the response",
+				}
+			}
+
+			const candidate = result.candidates[0]
+			if (!candidate.content || !candidate.content.parts) {
+				return {
+					success: false,
+					error: "No content parts in the response",
+				}
+			}
+
+			// Find the image part in the response
+			let imageData: string | undefined
+			let imageFormat = "png" // Default format
+
+			for (const part of candidate.content.parts) {
+				if (part.inlineData) {
+					const mimeType = part.inlineData.mimeType
+					const data = part.inlineData.data
+
+					if (mimeType?.startsWith("image/")) {
+						// Extract format from mime type
+						imageFormat = mimeType.replace("image/", "").replace("jpeg", "jpg")
+
+						// Convert to data URL format
+						imageData = `data:${mimeType};base64,${data}`
+						break
+					}
+				}
+			}
+
+			if (!imageData) {
+				return {
+					success: false,
+					error: "No image data found in the response",
+				}
+			}
+
+			return {
+				success: true,
+				imageData,
+				imageFormat,
+			}
+		} catch (error) {
+			const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"
+			console.error("Gemini image generation error:", errorMessage)
+
+			return {
+				success: false,
+				error: `Failed to generate image: ${errorMessage}`,
+			}
+		}
+	}
 }
@@ -9,9 +9,8 @@ import { getReadablePath } from "../../utils/path"
 import { isPathOutsideWorkspace } from "../../utils/pathUtils"
 import { EXPERIMENT_IDS, experiments } from "../../shared/experiments"
 import { OpenRouterHandler } from "../../api/providers/openrouter"
-
-// Hardcoded list of image generation models for now
-const IMAGE_GENERATION_MODELS = ["google/gemini-2.5-flash-image", "openai/gpt-5-image", "openai/gpt-5-image-mini"]
+import { GeminiHandler } from "../../api/providers/gemini"
+import { ImageGenerationProvider, getImageGenerationModelsForProvider } from "@roo-code/types"
 
 export async function generateImageTool(
 	cline: Task,
@@ -128,25 +127,60 @@ export async function generateImageTool(
 	// Check if file is write-protected
 	const isWriteProtected = cline.rooProtectedController?.isWriteProtected(relPath) || false
 
-	// Get OpenRouter API key from global settings (experimental image generation)
-	const openRouterApiKey = state?.openRouterImageApiKey
+	// Get the selected provider from settings (default to openrouter)
+	const selectedProvider = (state?.imageGenerationProvider || "openrouter") as ImageGenerationProvider
 
-	if (!openRouterApiKey) {
-		await cline.say(
-			"error",
-			"OpenRouter API key is required for image generation. Please configure it in the Image Generation experimental settings.",
-		)
-		pushToolResult(
-			formatResponse.toolError(
+	// Get selected model from settings based on provider
+	let selectedModel: string
+	let apiKey: string | undefined
+
+	if (selectedProvider === "openrouter") {
+		apiKey = state?.openRouterImageApiKey
+		if (!apiKey) {
+			await cline.say(
+				"error",
 				"OpenRouter API key is required for image generation. Please configure it in the Image Generation experimental settings.",
-			),
-		)
+			)
+			pushToolResult(
+				formatResponse.toolError(
+					"OpenRouter API key is required for image generation. Please configure it in the Image Generation experimental settings.",
+				),
+			)
+			return
+		}
+		// Get selected model or use default for OpenRouter
+		const models = getImageGenerationModelsForProvider("openrouter")
+		selectedModel =
+			state?.openRouterImageGenerationSelectedModel ||
+			(models[0]?.modelId ?? "google/gemini-2.5-flash-image-preview")
+	} else if (selectedProvider === "gemini") {
+		// For Gemini, we can use the existing Gemini API key from the provider settings
+		// Check for a dedicated image generation API key first, then fall back to the provider's API key
+		apiKey =
+			state?.geminiImageApiKey ||
+			(state?.apiConfiguration?.apiProvider === "gemini" ? state?.apiConfiguration?.geminiApiKey : undefined)
+		if (!apiKey) {
+			await cline.say(
+				"error",
+				"Gemini API key is required for image generation. Please configure it in the Image Generation experimental settings or in the Gemini provider settings.",
+			)
+			pushToolResult(
+				formatResponse.toolError(
+					"Gemini API key is required for image generation. Please configure it in the Image Generation experimental settings or in the Gemini provider settings.",
+				),
+			)
+			return
+		}
+		// Get selected model or use default for Gemini
+		const models = getImageGenerationModelsForProvider("gemini")
+		selectedModel =
+			state?.geminiImageGenerationSelectedModel || (models[0]?.modelId ?? "gemini-2.5-flash-image-preview")
+	} else {
+		await cline.say("error", `Unsupported image generation provider: ${selectedProvider}`)
+		pushToolResult(formatResponse.toolError(`Unsupported image generation provider: ${selectedProvider}`))
 		return
 	}
 
-	// Get selected model from settings or use default
-	const selectedModel = state?.openRouterImageGenerationSelectedModel || IMAGE_GENERATION_MODELS[0]
-
 	// Determine if the path is outside the workspace
 	const fullPath = path.resolve(cline.cwd, removeClosingTag("path", relPath))
 	const isOutsideWorkspace = isPathOutsideWorkspace(fullPath)
@@ -176,16 +210,28 @@ export async function generateImageTool(
 				return
 			}
 
-			// Create a temporary OpenRouter handler with minimal options
-			const openRouterHandler = new OpenRouterHandler({} as any)
-
-			// Call the generateImage method with the explicit API key and optional input image
-			const result = await openRouterHandler.generateImage(
-				prompt,
-				selectedModel,
-				openRouterApiKey,
-				inputImageData,
-			)
+			// Generate image based on provider
+			let result
+
+			if (selectedProvider === "openrouter") {
+				// Create a temporary OpenRouter handler with minimal options
+				const openRouterHandler = new OpenRouterHandler({} as any)
+
+				// Call the generateImage method with the explicit API key and optional input image
+				result = await openRouterHandler.generateImage(prompt, selectedModel, apiKey!, inputImageData)
+			} else if (selectedProvider === "gemini") {
+				// Create a temporary Gemini handler with minimal options
+				const geminiHandler = new GeminiHandler({ geminiApiKey: apiKey } as any)
+
+				// Call the generateImage method with the optional input image
+				result = await geminiHandler.generateImage(prompt, selectedModel, apiKey, inputImageData)
+			} else {
+				// This should not happen due to earlier check, but for type safety
+				result = {
+					success: false,
+					error: `Unsupported provider: ${selectedProvider}`,
+				}
+			}
 
 			if (!result.success) {
 				await cline.say("error", result.error || "Failed to generate image")