|
1 | | -import { FilePart, ModelMessage, ToolResultPart, tool as createTool } from "ai"; |
| 1 | +import { |
| 2 | + FilePart, |
| 3 | + ImagePart, |
| 4 | + ModelMessage, |
| 5 | + ToolResultPart, |
| 6 | + tool as createTool, |
| 7 | + generateText, |
| 8 | +} from "ai"; |
2 | 9 | import { generateImageWithNanoBanana } from "lib/ai/image/generate-image"; |
3 | 10 | import { serverFileStorage } from "lib/file-storage"; |
4 | 11 | import { safe, watchError } from "ts-safe"; |
5 | 12 | import z from "zod"; |
6 | 13 | import { ImageToolName } from ".."; |
7 | 14 | import logger from "logger"; |
| 15 | +import { openai } from "@ai-sdk/openai"; |
8 | 16 |
|
9 | 17 | export type ImageToolResult = { |
10 | 18 | images: { |
@@ -99,6 +107,97 @@ export const nanoBananaTool = createTool({ |
99 | 107 | }, |
100 | 108 | }); |
101 | 109 |
|
| 110 | +export const openaiImageTool = createTool({ |
| 111 | + name: ImageToolName, |
| 112 | + description: `Generate, edit, or composite images based on the conversation context. This tool automatically analyzes recent messages to create images without requiring explicit input parameters. It includes all user-uploaded images from the recent conversation and only the most recent AI-generated image to avoid confusion. Use the 'mode' parameter to specify the operation type: 'create' for new images, 'edit' for modifying existing images, or 'composite' for combining multiple images. Use this when the user requests image creation, modification, or visual content generation.`, |
| 113 | + inputSchema: z.object({ |
| 114 | + mode: z |
| 115 | + .enum(["create", "edit", "composite"]) |
| 116 | + .optional() |
| 117 | + .default("create") |
| 118 | + .describe( |
| 119 | + "Image generation mode: 'create' for new images, 'edit' for modifying existing images, 'composite' for combining multiple images", |
| 120 | + ), |
| 121 | + }), |
| 122 | + execute: async ({ mode }, { messages, abortSignal }) => { |
| 123 | + const apiKey = process.env.OPENAI_API_KEY; |
| 124 | + if (!apiKey) { |
| 125 | + throw new Error("OPENAI_API_KEY is not set"); |
| 126 | + } |
| 127 | + |
| 128 | + let hasFoundImage = false; |
| 129 | + const latestMessages = messages |
| 130 | + .slice(-6) |
| 131 | + .reverse() |
| 132 | + .flatMap((m) => { |
| 133 | + if (m.role != "tool") return m; |
| 134 | + if (hasFoundImage) return null; // Skip if we already found an image |
| 135 | + const fileParts = m.content.flatMap(convertToImageToolPartToImagePart); |
| 136 | + if (fileParts.length === 0) return null; |
| 137 | + hasFoundImage = true; // Mark that we found the most recent image |
| 138 | + return [ |
| 139 | + { |
| 140 | + role: "user", |
| 141 | + content: fileParts, |
| 142 | + }, |
| 143 | + m, |
| 144 | + ] as ModelMessage[]; |
| 145 | + }) |
| 146 | + .filter((v) => Boolean(v?.content?.length)) |
| 147 | + .reverse() as ModelMessage[]; |
| 148 | + const result = await generateText({ |
| 149 | + model: openai("gpt-4.1-mini"), |
| 150 | + abortSignal, |
| 151 | + messages: latestMessages, |
| 152 | + tools: { |
| 153 | + image_generation: openai.tools.imageGeneration({ |
| 154 | + outputFormat: "webp", |
| 155 | + model: "gpt-image-1", |
| 156 | + }), |
| 157 | + }, |
| 158 | + toolChoice: "required", |
| 159 | + }); |
| 160 | + |
| 161 | + for (const toolResult of result.staticToolResults) { |
| 162 | + if (toolResult.toolName === "image_generation") { |
| 163 | + const base64Image = toolResult.output.result; |
| 164 | + const uploadedImage = await serverFileStorage |
| 165 | + .upload(Buffer.from(base64Image, "base64"), { |
| 166 | + contentType: "image/webp", |
| 167 | + }) |
| 168 | + .catch(() => { |
| 169 | + throw new Error( |
| 170 | + "Image generation was successful, but file upload failed. Please check your file upload configuration and try again.", |
| 171 | + ); |
| 172 | + }); |
| 173 | + return { |
| 174 | + images: [{ url: uploadedImage.sourceUrl, mimeType: "image/webp" }], |
| 175 | + mode, |
| 176 | + model: "gpt-4.1", |
| 177 | + guide: |
| 178 | + "The image has been successfully generated and is now displayed above. If you need any edits, modifications, or adjustments to the image, please let me know.", |
| 179 | + }; |
| 180 | + } |
| 181 | + } |
| 182 | + return { |
| 183 | + images: [], |
| 184 | + mode, |
| 185 | + model: "gpt-4.1", |
| 186 | + guide: "", |
| 187 | + }; |
| 188 | + }, |
| 189 | +}); |
| 190 | + |
| 191 | +function convertToImageToolPartToImagePart(part: ToolResultPart): ImagePart[] { |
| 192 | + if (part.toolName !== ImageToolName) return []; |
| 193 | + const result = part.output.value as ImageToolResult; |
| 194 | + return result.images.map((image) => ({ |
| 195 | + type: "image", |
| 196 | + image: image.url, |
| 197 | + mediaType: image.mimeType, |
| 198 | + })); |
| 199 | +} |
| 200 | + |
102 | 201 | function convertToImageToolPartToFilePart(part: ToolResultPart): FilePart[] { |
103 | 202 | if (part.toolName !== ImageToolName) return []; |
104 | 203 | const result = part.output.value as ImageToolResult; |
|
0 commit comments