docs: add object mode to ai

twlite · twlite · commit c37cb151e03e · 2025-06-09T21:22:38.000+05:45
diff --git a/apps/test-bot/src/ai.ts b/apps/test-bot/src/ai.ts
@@ -9,7 +9,10 @@ const model = google.languageModel('gemini-2.0-flash');
 
 configureAI({
   selectAiModel: async () => {
-    return { model };
+    return {
+      model,
+      objectMode: true,
+    };
   },
   messageFilter: async (message) => {
     return (
diff --git a/apps/website/docs/guide/13-ai-powered-commands/01-introduction.mdx b/apps/website/docs/guide/13-ai-powered-commands/01-introduction.mdx
@@ -54,7 +54,14 @@ configureAI({
   // commandkit will call this function
   // to determine which AI model to use
   selectAiModel: async () => {
-    return { model };
+    return {
+      model,
+      // OPTIONAL: provider specific options
+      options,
+      // OPTIONAL: whether to use the object mode. Default is false.
+      // If set to true, the AI will be able to generate object responses, such as creating polls or sending embeds.
+      objectMode: false,
+    };
   },
   messageFilter: async (message) => {
     // only respond to messages in guilds that mention the bot
@@ -156,3 +163,17 @@ AI can also call multiple commands in a single message. Eg:
 ```
 
 The above prompt will call the built-in `getUserInfo` tool and the `balance` command.
+
+## Object Mode Example
+
+Simply set the `objectMode` to `true` in the `configureAI` function to enable object mode. This allows the AI to generate object responses, such as creating polls or sending embeds.
+
+```text
+@bot create a poll titled "What's your favorite game?" and answers should be
+- minecraft
+- fortnite
+- pubg
+- clash of clans
+```
+
+![object mode example](/img/ai-object-mode.png)
diff --git a/apps/website/static/img/ai-object-mode.png b/apps/website/static/img/ai-object-mode.png
diff --git a/packages/ai/src/plugin.ts b/packages/ai/src/plugin.ts
@@ -3,9 +3,10 @@ import { AiPluginOptions, MessageFilter, SelectAiModel } from './types';
 import { LoadedCommand, Logger } from 'commandkit';
 import { AiContext } from './context';
 import { Collection, Events, Message, TextChannel } from 'discord.js';
-import { tool, Tool, generateText } from 'ai';
+import { tool, Tool, generateText, Output } from 'ai';
 import { z } from 'zod';
 import { getAiWorkerContext, runInAiWorkerContext } from './ai-context-worker';
+import { AiResponseSchema } from './schema';
 
 type WithAI<T extends LoadedCommand> = T & {
   data: {
@@ -201,7 +202,10 @@ export class AiPlugin extends RuntimePlugin<AiPluginOptions> {
       Tools are basically like commands that you can execute to perform specific actions based on user input.
       Keep the response short and concise, and only use tools when necessary. Keep the response length under 2000 characters.
       Do not include your own text in the response unless necessary. For text formatting, you can use discord's markdown syntax.
-      ${message.inGuild() ? `\nYou are currently in a guild named ${message.guild.name} whose id is ${message.guildId}. While in guild, you can fetch member information if needed.` : '\nYou are currently in a direct message with the user.'}`;
+      ${message.inGuild() ? `\nYou are currently in a guild named ${message.guild.name} whose id is ${message.guildId}. While in guild, you can fetch member information if needed.` : '\nYou are currently in a direct message with the user.'}
+      If the user asks you to create a poll or embeds, create a text containing the poll or embed information. If structured response is possible, use the structured response format.
+      If the user asks you to perform a task that requires a tool, use the tool to perform the task and return the result.
+      `;
 
     const userInfo = `<user>
     <id>${message.author.id}</id>
@@ -215,22 +219,116 @@ export class AiPlugin extends RuntimePlugin<AiPluginOptions> {
       const stopTyping = await this.startTyping(channel);
 
       try {
-        const { model, options } = await aiModelSelector(message);
-        const result = await generateText({
-          abortSignal: AbortSignal.timeout(60_000),
+        const {
           model,
-          tools: { ...this.toolsRecord, ...this.defaultTools },
-          prompt: `${userInfo}\nUser: ${message.content}\nAI:`,
-          system: systemPrompt,
-          maxSteps: 5,
-          providerOptions: options,
-        });
+          options,
+          objectMode = false,
+        } = await aiModelSelector(message);
+
+        const originalPrompt = `${userInfo}\nUser: ${message.content}\nAI:`;
+
+        const call = ({
+          prompt = originalPrompt,
+          includeTools = true,
+          disableObjectMode = false,
+        }) =>
+          generateText({
+            abortSignal: AbortSignal.timeout(60_000),
+            model,
+            ...(includeTools && {
+              tools: { ...this.toolsRecord, ...this.defaultTools },
+            }),
+            prompt,
+            system: systemPrompt,
+            maxSteps: 5,
+            providerOptions: options,
+            ...(objectMode && !disableObjectMode
+              ? {
+                  experimental_output: Output.object({
+                    schema: AiResponseSchema,
+                  }),
+                }
+              : {}),
+          });
+
+        let result: any;
+
+        try {
+          result = await call({});
+        } catch {
+          if (objectMode) {
+            const r1 = await call({
+              includeTools: true,
+              disableObjectMode: true,
+            });
+
+            if (!r1.text) throw new Error('No text response from AI');
+
+            const r2 = await call({
+              includeTools: false,
+              disableObjectMode: false,
+              prompt: `Original context: ${r1.text} ${r1.text}\n\nGenerate a structured response based on the previous response`,
+            });
+
+            result = r2;
+          }
+        }
 
         stopTyping();
 
-        if (!!result.text) {
+        let structuredResult: z.infer<typeof AiResponseSchema> | null = null;
+
+        try {
+          const val =
+            'experimental_output' in result && result.experimental_output;
+
+          if (val) {
+            structuredResult = val;
+          }
+        } catch {}
+
+        if (structuredResult) {
+          const { poll, content, embeds } = structuredResult;
+
+          if (!poll && !content && !embeds) {
+            Logger.warn(
+              'AI response did not include any content, embeds, or poll.',
+            );
+            return;
+          }
+
+          await message.reply({
+            content: content?.substring(0, 2000),
+            embeds: embeds?.map((embed) => ({
+              title: embed.title,
+              description: embed.description,
+              url: embed.url,
+              color: embed.color,
+              image: embed.image?.url ? { url: embed.image.url } : undefined,
+              thumbnail: embed.thumbnail?.url
+                ? { url: embed.thumbnail.url }
+                : undefined,
+              fields: embed.fields?.map((field) => ({
+                name: field.name,
+                value: field.value,
+                inline: field.inline,
+              })),
+            })),
+            poll: poll
+              ? {
+                  allowMultiselect: poll.allow_multiselect,
+                  answers: poll.answers.map((answer) => ({
+                    text: answer.text,
+                    emoji: answer.emoji,
+                  })),
+                  duration: poll.duration,
+                  question: { text: poll.question.text },
+                }
+              : undefined,
+          });
+        } else if (!!result.text) {
           await message.reply({
-            content: result.text,
+            content: result.text.substring(0, 2000),
             allowedMentions: { parse: [] },
           });
         }
diff --git a/packages/ai/src/schema.ts b/packages/ai/src/schema.ts
@@ -0,0 +1,136 @@
+import { z } from 'zod';
+
+const pollMediaObject = z
+  .object({
+    text: z.string().trim().describe('The question text of the poll'),
+    emoji: z
+      .string()
+      .trim()
+      .optional()
+      .describe('An optional emoji associated with the poll question. Eg: 👍'),
+  })
+  .describe(
+    'An object representing the media for a poll question, containing the text of the question. Emoji cannot be used in question text.',
+  );
+
+export const AiResponseSchema = z
+  .object({
+    content: z
+      .string()
+      .trim()
+      .optional()
+      .describe(
+        'The content of the message. This can be plain text or markdown. This is an optional field.',
+      ),
+    embeds: z
+      .array(
+        z.object({
+          title: z
+            .string()
+            .trim()
+            .optional()
+            .describe('The title of the embed. This is an optional field.'),
+          description: z
+            .string()
+            .trim()
+            .optional()
+            .describe(
+              'The description of the embed. This is an optional field.',
+            ),
+          url: z
+            .string()
+            .optional()
+            .describe(
+              'The URL of the embed. No need to specify this if it is not needed. It is not a required field.',
+            ),
+          color: z
+            .number()
+            .int()
+            .min(0)
+            .max(16777215)
+            .optional()
+            .describe(
+              'The color of the embed in RGB format. This is an optional field.',
+            ),
+          image: z
+            .object({
+              url: z
+                .string()
+                .optional()
+                .describe(
+                  'The URL of the image in the embed. This is an optional field.',
+                ),
+            })
+            .optional(),
+          thumbnail: z
+            .object({
+              url: z
+                .string()
+                .optional()
+                .describe(
+                  'The URL of the thumbnail in the embed. This is an optional field.',
+                ),
+            })
+            .optional(),
+          fields: z
+            .array(
+              z.object({
+                name: z
+                  .string()
+                  .trim()
+                  .describe(
+                    'The name of the field. This is an optional field.',
+                  ),
+                value: z
+                  .string()
+                  .trim()
+                  .describe(
+                    'The value of the field. This is an optional field.',
+                  ),
+                inline: z
+                  .boolean()
+                  .optional()
+                  .default(false)
+                  .describe(
+                    'Whether the field is inline. This is an optional field. It defaults to false.',
+                  ),
+              }),
+            )
+            .max(25)
+            .min(0)
+            .optional()
+            .describe('An array of fields in the embed'),
+        }),
+      )
+      .max(10)
+      .min(0)
+      .optional()
+      .describe('An array of embeds to include in the message'),
+    poll: z
+      .object({
+        question: pollMediaObject,
+        answers: z
+          .array(pollMediaObject)
+          .min(1)
+          .max(10)
+          .describe('An array of answers for the poll'),
+        allow_multiselect: z
+          .boolean()
+          .optional()
+          .default(false)
+          .describe('Whether the poll allows multiple selections'),
+        duration: z
+          .number()
+          .int()
+          .min(1)
+          .max(32)
+          .optional()
+          .default(24)
+          .describe('The duration of the poll in hours'),
+      })
+      .optional()
+      .describe('An object representing a poll to include in the message'),
+  })
+  .describe(
+    'The schema for an AI response message, including content and embeds. At least one of content, embeds, or poll must be present.',
+  );
diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts
@@ -9,6 +9,7 @@ export type MessageFilter = (message: Message) => Promise<boolean>;
 export type SelectAiModel = (message: Message) => Promise<{
   model: LanguageModelV1;
   options?: ProviderMetadata;
+  objectMode?: boolean;
 }>;
 
 export interface AiPluginOptions {}