Skip to content

Commit 0deef6e

Browse files
authored
feat: openai image generate (#287)
1 parent 1d1bd88 commit 0deef6e

File tree

7 files changed

+205
-23
lines changed

7 files changed

+205
-23
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
**Multi-AI Support** - Integrates all major LLMs: OpenAI, Anthropic, Google, xAI, Ollama, and more
1919
**Powerful Tools** - MCP protocol, web search, JS/Python code execution, data visualization
20+
**Image Generation** - Create and edit images with AI models (OpenAI, Google Gemini, xAI)
2021
**Automation** - Custom agents, visual workflows, artifact generation
2122
**Collaboration** - Share agents, workflows, and MCP configurations with your team
2223
**Voice Assistant** - Realtime voice chat with full MCP tool integration
@@ -210,13 +211,12 @@ Built-in web search powered by [Exa AI](https://exa.ai). Search the web with sem
210211

211212
#### 🎨 Image Generation
212213

213-
214214
<img width="1034" height="940" loading="lazy" alt="image-generation" src="https://github.com/user-attachments/assets/b081c837-8948-4f4d-a2f4-c8630cf0eaa2" />
215215

216216
Built-in image generation and editing capabilities powered by AI models. Create, edit, and modify images directly in your chats.
217217

218218
- **Supported Operations:** Image generation, editing, and composition
219-
- **Current Models:** Gemini Nano Banana, OpenAI (coming soon)
219+
- **Current Models:** Gemini Nano Banana, OpenAI
220220

221221
#### ⚡️ JS,PYTHON Executor
222222

src/app/api/chat/route.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import {
4747
import { getSession } from "auth/server";
4848
import { colorize } from "consola/utils";
4949
import { generateUUID } from "lib/utils";
50-
import { nanoBananaTool } from "lib/ai/tools/image";
50+
import { nanoBananaTool, openaiImageTool } from "lib/ai/tools/image";
5151
import { ImageToolName } from "lib/ai/tools";
5252

5353
const logger = globalLogger.withDefaults({
@@ -209,7 +209,12 @@ export async function POST(request: Request) {
209209
);
210210

211211
const IMAGE_TOOL: Record<string, Tool> = useImageTool
212-
? { [ImageToolName]: nanoBananaTool }
212+
? {
213+
[ImageToolName]:
214+
imageTool?.model === "google"
215+
? nanoBananaTool
216+
: openaiImageTool,
217+
}
213218
: {};
214219
const vercelAITooles = safe({
215220
...MCP_TOOLS,

src/components/prompt-input.tsx

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ export default function PromptInput({
317317
);
318318

319319
const handleGenerateImage = useCallback(
320-
(provider?: "google") => {
320+
(provider?: "google" | "openai") => {
321321
if (!provider) {
322322
appStoreMutate({
323323
threadImageToolModel: {},
@@ -450,26 +450,24 @@ export default function PromptInput({
450450
// Handle ESC key to clear mentions
451451
useEffect(() => {
452452
const handleKeyDown = (e: KeyboardEvent) => {
453-
if (e.key === "Escape" && mentions.length > 0 && threadId) {
453+
if (
454+
e.key === "Escape" &&
455+
threadId &&
456+
(mentions.length > 0 || imageToolModel)
457+
) {
454458
e.preventDefault();
455459
e.stopPropagation();
456-
appStoreMutate((prev) => ({
457-
threadMentions: {
458-
...prev.threadMentions,
459-
[threadId]: [],
460-
},
460+
appStoreMutate(() => ({
461+
threadMentions: {},
461462
agentId: undefined,
463+
threadImageToolModel: {},
462464
}));
463465
editorRef.current?.commands.focus();
464466
}
465467
};
466468
window.addEventListener("keydown", handleKeyDown);
467469
return () => window.removeEventListener("keydown", handleKeyDown);
468-
}, [mentions.length, threadId, appStoreMutate]);
469-
470-
useEffect(() => {
471-
if (!editorRef.current) return;
472-
}, [editorRef.current]);
470+
}, [mentions.length, threadId, appStoreMutate, imageToolModel]);
473471

474472
return (
475473
<div className="max-w-3xl mx-auto fade-in animate-in">
@@ -598,6 +596,14 @@ export default function PromptInput({
598596
<GeminiIcon className="mr-2 size-4" />
599597
Gemini (Nano Banana)
600598
</DropdownMenuItem>
599+
<DropdownMenuItem
600+
disabled={modelInfo?.isToolCallUnsupported}
601+
onClick={() => handleGenerateImage("openai")}
602+
className="cursor-pointer"
603+
>
604+
<OpenAIIcon className="mr-2 size-4" />
605+
OpenAI
606+
</DropdownMenuItem>
601607
</DropdownMenuSubContent>
602608
</DropdownMenuPortal>
603609
</DropdownMenuSub>
@@ -625,6 +631,7 @@ export default function PromptInput({
625631
side="top"
626632
onSelectWorkflow={onSelectWorkflow}
627633
onSelectAgent={onSelectAgent}
634+
onGenerateImage={handleGenerateImage}
628635
mentions={mentions}
629636
/>
630637
</>

src/components/tool-invocation/image-generator.tsx

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,12 @@ function PureImageGeneratorToolInvocation({
7676
return (
7777
<div className="flex flex-col gap-4">
7878
<TextShimmer>{getModeText(mode)}</TextShimmer>
79-
<div className="w-full h-96 overflow-hidden relative">
79+
<div className="w-full h-96 overflow-hidden rounded-lg">
8080
<LetterGlitch />
81-
<div className="z-10 absolute inset-0 w-full h-1/12 bg-gradient-to-b to-90% from-background to-transparent pointer-events-none" />
82-
<div className="z-10 absolute inset-0 w-1/12 h-full bg-gradient-to-r from-background to-transparent pointer-events-none" />
83-
<div className="z-10 absolute left-0 bottom-0 w-full h-1/12 bg-gradient-to-t from-background to-transparent pointer-events-none" />
84-
<div className="z-10 absolute right-0 bottom-0 w-1/12 h-full bg-gradient-to-l from-background to-transparent pointer-events-none" />
8581
</div>
82+
<p className="text-xs text-muted-foreground text-center">
83+
Image generation may take up to 1 minute.
84+
</p>
8685
</div>
8786
);
8887
}

src/components/tool-select-dropdown.tsx

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
CodeIcon,
1212
GlobeIcon,
1313
HardDriveUploadIcon,
14+
ImagesIcon,
1415
InfoIcon,
1516
Loader,
1617
MessageCircle,
@@ -77,6 +78,9 @@ import { mutate } from "swr";
7778
import { handleErrorWithToast } from "ui/shared-toast";
7879
import { useAgents } from "@/hooks/queries/use-agents";
7980
import { redriectMcpOauth } from "lib/ai/mcp/oauth-redirect";
81+
import { GeminiIcon } from "ui/gemini-icon";
82+
import { useChatModels } from "@/hooks/queries/use-chat-models";
83+
import { OpenAIIcon } from "ui/openai-icon";
8084

8185
interface ToolSelectDropdownProps {
8286
align?: "start" | "end" | "center";
@@ -85,6 +89,7 @@ interface ToolSelectDropdownProps {
8589
mentions?: ChatMention[];
8690
onSelectWorkflow?: (workflow: WorkflowSummary) => void;
8791
onSelectAgent?: (agent: AgentSummary) => void;
92+
onGenerateImage?: (provider?: "google" | "openai") => void;
8893
className?: string;
8994
}
9095

@@ -103,6 +108,7 @@ export function ToolSelectDropdown({
103108
side,
104109
onSelectWorkflow,
105110
onSelectAgent,
111+
onGenerateImage,
106112
mentions,
107113
className,
108114
}: ToolSelectDropdownProps) {
@@ -119,6 +125,18 @@ export function ToolSelectDropdown({
119125

120126
const t = useTranslations("Chat.Tool");
121127
const { isLoading } = useMcpList();
128+
const { data: providers } = useChatModels();
129+
const [globalModel] = appStore(useShallow((state) => [state.chatModel]));
130+
131+
const modelInfo = useMemo(() => {
132+
const provider = providers?.find(
133+
(provider) => provider.provider === globalModel?.provider,
134+
);
135+
const model = provider?.models.find(
136+
(model) => model.name === globalModel?.model,
137+
);
138+
return model;
139+
}, [providers, globalModel]);
122140

123141
useWorkflowToolList({
124142
refreshInterval: 1000 * 60 * 5,
@@ -236,6 +254,13 @@ export function ToolSelectDropdown({
236254
<div className="py-1">
237255
<DropdownMenuSeparator />
238256
</div>
257+
<ImageGeneratorSelector
258+
onGenerateImage={onGenerateImage}
259+
modelInfo={modelInfo}
260+
/>
261+
<div className="py-1">
262+
<DropdownMenuSeparator />
263+
</div>
239264
<div className="py-2">
240265
<ToolPresets />
241266
<div className="py-1">
@@ -1021,3 +1046,44 @@ function AgentSelector({
10211046
</DropdownMenuGroup>
10221047
);
10231048
}
1049+
1050+
function ImageGeneratorSelector({
1051+
onGenerateImage,
1052+
modelInfo,
1053+
}: {
1054+
onGenerateImage?: (provider?: "google" | "openai") => void;
1055+
modelInfo?: { isToolCallUnsupported?: boolean };
1056+
}) {
1057+
const t = useTranslations("Chat");
1058+
1059+
return (
1060+
<DropdownMenuGroup>
1061+
<DropdownMenuSub>
1062+
<DropdownMenuSubTrigger className="text-xs flex items-center gap-2 font-semibold cursor-pointer">
1063+
<ImagesIcon className="size-3.5" />
1064+
{t("generateImage")}
1065+
</DropdownMenuSubTrigger>
1066+
<DropdownMenuPortal>
1067+
<DropdownMenuSubContent>
1068+
<DropdownMenuItem
1069+
disabled={modelInfo?.isToolCallUnsupported}
1070+
onClick={() => onGenerateImage?.("google")}
1071+
className="cursor-pointer"
1072+
>
1073+
<GeminiIcon className="mr-2 size-4" />
1074+
Gemini (Nano Banana)
1075+
</DropdownMenuItem>
1076+
<DropdownMenuItem
1077+
disabled={modelInfo?.isToolCallUnsupported}
1078+
onClick={() => onGenerateImage?.("openai")}
1079+
className="cursor-pointer"
1080+
>
1081+
<OpenAIIcon className="mr-2 size-4" />
1082+
OpenAI
1083+
</DropdownMenuItem>
1084+
</DropdownMenuSubContent>
1085+
</DropdownMenuPortal>
1086+
</DropdownMenuSub>
1087+
</DropdownMenuGroup>
1088+
);
1089+
}

src/lib/ai/image/generate-image.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,15 @@ export async function generateImageWithXAI(
7474
export const generateImageWithNanoBanana = async (
7575
options: GenerateImageOptions,
7676
): Promise<GeneratedImageResult> => {
77+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY;
78+
if (!apiKey) {
79+
throw new Error("GOOGLE_GENERATIVE_AI_API_KEY is not set");
80+
}
81+
7782
const ai = new GoogleGenAI({
78-
apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY,
83+
apiKey: apiKey,
7984
});
85+
8086
const geminiMessages: GeminiMessage[] = await safe(options.messages || [])
8187
.map((messages) => Promise.all(messages.map(convertToGeminiMessage)))
8288
.watch(watchError(logger.error))

src/lib/ai/tools/image/index.ts

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1-
import { FilePart, ModelMessage, ToolResultPart, tool as createTool } from "ai";
1+
import {
2+
FilePart,
3+
ImagePart,
4+
ModelMessage,
5+
ToolResultPart,
6+
tool as createTool,
7+
generateText,
8+
} from "ai";
29
import { generateImageWithNanoBanana } from "lib/ai/image/generate-image";
310
import { serverFileStorage } from "lib/file-storage";
411
import { safe, watchError } from "ts-safe";
512
import z from "zod";
613
import { ImageToolName } from "..";
714
import logger from "logger";
15+
import { openai } from "@ai-sdk/openai";
816

917
export type ImageToolResult = {
1018
images: {
@@ -99,6 +107,97 @@ export const nanoBananaTool = createTool({
99107
},
100108
});
101109

110+
export const openaiImageTool = createTool({
111+
name: ImageToolName,
112+
description: `Generate, edit, or composite images based on the conversation context. This tool automatically analyzes recent messages to create images without requiring explicit input parameters. It includes all user-uploaded images from the recent conversation and only the most recent AI-generated image to avoid confusion. Use the 'mode' parameter to specify the operation type: 'create' for new images, 'edit' for modifying existing images, or 'composite' for combining multiple images. Use this when the user requests image creation, modification, or visual content generation.`,
113+
inputSchema: z.object({
114+
mode: z
115+
.enum(["create", "edit", "composite"])
116+
.optional()
117+
.default("create")
118+
.describe(
119+
"Image generation mode: 'create' for new images, 'edit' for modifying existing images, 'composite' for combining multiple images",
120+
),
121+
}),
122+
execute: async ({ mode }, { messages, abortSignal }) => {
123+
const apiKey = process.env.OPENAI_API_KEY;
124+
if (!apiKey) {
125+
throw new Error("OPENAI_API_KEY is not set");
126+
}
127+
128+
let hasFoundImage = false;
129+
const latestMessages = messages
130+
.slice(-6)
131+
.reverse()
132+
.flatMap((m) => {
133+
if (m.role != "tool") return m;
134+
if (hasFoundImage) return null; // Skip if we already found an image
135+
const fileParts = m.content.flatMap(convertToImageToolPartToImagePart);
136+
if (fileParts.length === 0) return null;
137+
hasFoundImage = true; // Mark that we found the most recent image
138+
return [
139+
{
140+
role: "user",
141+
content: fileParts,
142+
},
143+
m,
144+
] as ModelMessage[];
145+
})
146+
.filter((v) => Boolean(v?.content?.length))
147+
.reverse() as ModelMessage[];
148+
const result = await generateText({
149+
model: openai("gpt-4.1-mini"),
150+
abortSignal,
151+
messages: latestMessages,
152+
tools: {
153+
image_generation: openai.tools.imageGeneration({
154+
outputFormat: "webp",
155+
model: "gpt-image-1",
156+
}),
157+
},
158+
toolChoice: "required",
159+
});
160+
161+
for (const toolResult of result.staticToolResults) {
162+
if (toolResult.toolName === "image_generation") {
163+
const base64Image = toolResult.output.result;
164+
const uploadedImage = await serverFileStorage
165+
.upload(Buffer.from(base64Image, "base64"), {
166+
contentType: "image/webp",
167+
})
168+
.catch(() => {
169+
throw new Error(
170+
"Image generation was successful, but file upload failed. Please check your file upload configuration and try again.",
171+
);
172+
});
173+
return {
174+
images: [{ url: uploadedImage.sourceUrl, mimeType: "image/webp" }],
175+
mode,
176+
model: "gpt-4.1",
177+
guide:
178+
"The image has been successfully generated and is now displayed above. If you need any edits, modifications, or adjustments to the image, please let me know.",
179+
};
180+
}
181+
}
182+
return {
183+
images: [],
184+
mode,
185+
model: "gpt-4.1",
186+
guide: "",
187+
};
188+
},
189+
});
190+
191+
function convertToImageToolPartToImagePart(part: ToolResultPart): ImagePart[] {
192+
if (part.toolName !== ImageToolName) return [];
193+
const result = part.output.value as ImageToolResult;
194+
return result.images.map((image) => ({
195+
type: "image",
196+
image: image.url,
197+
mediaType: image.mimeType,
198+
}));
199+
}
200+
102201
function convertToImageToolPartToFilePart(part: ToolResultPart): FilePart[] {
103202
if (part.toolName !== ImageToolName) return [];
104203
const result = part.output.value as ImageToolResult;

0 commit comments

Comments
 (0)