[ACTION] Extend Vertex AI MCP server with Veo3 support

jcortes · jcortes · commit 5f354dd85833 · 2025-07-23T20:16:33.000-05:00
diff --git a/components/google_vertex_ai/actions/analyze-image-video/analyze-image-video.mjs b/components/google_vertex_ai/actions/analyze-image-video/analyze-image-video.mjs
@@ -4,7 +4,7 @@ export default {
   key: "google_vertex_ai-analyze-image-video",
   name: "Analyze Image/Video",
   description: "Examines an image or video following given instructions. Results will contain the analysis findings. [See the documentation](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.publishers.models/generateContent)",
-  version: "0.0.1",
+  version: "0.0.2",
   type: "action",
   props: {
     vertexAi,
diff --git a/components/google_vertex_ai/actions/analyze-text-sentiment/analyze-text-sentiment.mjs b/components/google_vertex_ai/actions/analyze-text-sentiment/analyze-text-sentiment.mjs
@@ -4,7 +4,7 @@ export default {
   key: "google_vertex_ai-analyze-text-sentiment",
   name: "Analyze Text Sentiment",
   description: "Analyzes a specified text for its underlying sentiment. [See the documentation](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.publishers.models/generateContent)",
-  version: "0.0.1",
+  version: "0.0.2",
   type: "action",
   props: {
     vertexAi,
diff --git a/components/google_vertex_ai/actions/classify-text/classify-text.mjs b/components/google_vertex_ai/actions/classify-text/classify-text.mjs
@@ -4,7 +4,7 @@ export default {
   key: "google_vertex_ai-classify-text",
   name: "Classify Text",
   description: "Groups a provided text into predefined categories. [See the documentation](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.publishers.models/generateContent)",
-  version: "0.0.1",
+  version: "0.0.2",
   type: "action",
   props: {
     vertexAi,
diff --git a/components/google_vertex_ai/actions/common/generate-video.mjs b/components/google_vertex_ai/actions/common/generate-video.mjs
@@ -0,0 +1,160 @@
+import app from "../../google_vertex_ai.app.mjs";
+
+export default {
+  props: {
+    app,
+    projectId: {
+      propDefinition: [
+        app,
+        "projectId",
+      ],
+    },
+    model: {
+      propDefinition: [
+        app,
+        "model",
+      ],
+    },
+    prompt: {
+      type: "string",
+      label: "Prompt",
+      description: "The text prompt to guide video generation. For Veo 3, you can include audio cues like dialogue in quotes, sound effects, and ambient noise descriptions.",
+    },
+    aspectRatio: {
+      type: "string",
+      label: "Aspect Ratio",
+      description: "The aspect ratio of the generated video",
+      options: [
+        {
+          label: "16:9 (Landscape)",
+          value: "16:9",
+        },
+        {
+          label: "9:16 (Portrait) - Veo 2 only",
+          value: "9:16",
+        },
+      ],
+      default: "16:9",
+    },
+    durationSeconds: {
+      type: "integer",
+      label: "Duration (seconds)",
+      description: "The length of the video in seconds. Veo 2: 5-8 seconds, Veo 3: 8 seconds",
+      default: 8,
+      min: 5,
+      max: 8,
+    },
+    enhancePrompt: {
+      type: "boolean",
+      label: "Enhance Prompt",
+      description: "Use Gemini to enhance your prompts",
+      default: true,
+    },
+    generateAudio: {
+      type: "boolean",
+      label: "Generate Audio",
+      description: "Generate audio for the video (Veo 3 only)",
+      default: true,
+    },
+    negativePrompt: {
+      type: "string",
+      label: "Negative Prompt",
+      description: "A text string that describes anything you want to discourage the model from generating",
+      optional: true,
+    },
+    personGeneration: {
+      type: "string",
+      label: "Person Generation",
+      description: "The safety setting that controls whether people or face generation is allowed",
+      options: [
+        {
+          label: "Allow Adult",
+          value: "allow_adult",
+        },
+        {
+          label: "Don't Allow",
+          value: "dont_allow",
+        },
+      ],
+      default: "allow_adult",
+      optional: true,
+    },
+    resolution: {
+      type: "string",
+      label: "Resolution",
+      description: "The resolution of the generated video (Veo 3 models only)",
+      options: [
+        {
+          label: "720p",
+          value: "720p",
+        },
+        {
+          label: "1080p",
+          value: "1080p",
+        },
+      ],
+      default: "720p",
+      optional: true,
+    },
+    sampleCount: {
+      type: "integer",
+      label: "Sample Count",
+      description: "The number of output videos requested",
+      default: 1,
+      min: 1,
+      max: 4,
+    },
+    storageUri: {
+      type: "string",
+      label: "Storage URI",
+      description: "A Cloud Storage bucket URI to store the output video, in the format `gs://BUCKET_NAME/SUBDIRECTORY`. If a Cloud Storage bucket isn't provided, base64-encoded video bytes are returned in the response.",
+      optional: true,
+    },
+  },
+  methods: {
+    async pollOperation({
+      $, projectId, model, operationName,
+    }) {
+      const pollInterval = 45000; // 45 seconds
+      const maxAttempts = 6;
+
+      for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+        try {
+          const response = await this.app.fetchOperation({
+            $,
+            projectId,
+            model,
+            data: {
+              operationName,
+            },
+          });
+
+          if (response.done) {
+            return response;
+          }
+
+          console.log(`Video generation in progress... (attempt ${attempt}/${maxAttempts})`);
+
+          if (attempt < maxAttempts) {
+            await new Promise((resolve) => setTimeout(resolve, pollInterval));
+          }
+        } catch (error) {
+          throw new Error(`Error polling operation: ${error.message}`);
+        }
+      }
+
+      throw new Error(`Video generation not completed after ${maxAttempts} polling attempts`);
+    },
+    async streamToBase64(stream) {
+      return new Promise((resolve, reject) => {
+        const chunks = [];
+        stream.on("data", (chunk) => chunks.push(chunk));
+        stream.on("end", () => {
+          const buffer = Buffer.concat(chunks);
+          resolve(buffer.toString("base64"));
+        });
+        stream.on("error", reject);
+      });
+    },
+  },
+};
diff --git a/components/google_vertex_ai/actions/generate-video-from-image/generate-video-from-image.mjs b/components/google_vertex_ai/actions/generate-video-from-image/generate-video-from-image.mjs
@@ -0,0 +1,95 @@
+import { getFileStreamAndMetadata } from "@pipedream/platform";
+import common from "../common/generate-video.mjs";
+
+export default {
+  ...common,
+  key: "google_vertex_ai-generate-video-from-image",
+  name: "Generate Video from Image",
+  description: "Generate a video from an image with optional text prompt using Google Vertex AI Veo models. [See the documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation)",
+  version: "0.0.1",
+  type: "action",
+  props: {
+    ...common.props,
+    image: {
+      type: "string",
+      label: "Image Path Or URL",
+      description: "Provide either a file URL or a path to a file in the `/tmp` directory (for example, `/tmp/image.jpg`). Supported formats: JPEG, PNG. For best quality, use 720p or higher (1280 x 720 pixels) with 16:9 or 9:16 aspect ratio.",
+    },
+  },
+  async run({ $ }) {
+    const {
+      projectId,
+      model,
+      image,
+      prompt,
+      aspectRatio,
+      durationSeconds,
+      enhancePrompt,
+      generateAudio,
+      negativePrompt,
+      personGeneration,
+      resolution,
+      sampleCount,
+      storageUri,
+    } = this;
+
+    const {
+      stream, metadata,
+    } = await getFileStreamAndMetadata(image);
+    const imageBase64 = await this.streamToBase64(stream);
+
+    const operationResponse = await this.app.generateVideosLongRunning({
+      $,
+      projectId,
+      model,
+      data: {
+        instances: [
+          {
+            prompt,
+            image: {
+              bytesBase64Encoded: imageBase64,
+              mimeType: metadata.contentType,
+            },
+          },
+        ],
+        parameters: {
+          aspectRatio,
+          durationSeconds,
+          enhancePrompt,
+          sampleCount,
+          negativePrompt,
+          personGeneration,
+          generateAudio,
+          resolution,
+          storageUri,
+        },
+      },
+    });
+
+    if (!operationResponse.name) {
+      throw new Error("Failed to start video generation operation");
+    }
+
+    // Poll the operation until completion
+    const completedOperation = await this.pollOperation({
+      $,
+      projectId,
+      model,
+      operationName: operationResponse.name,
+    });
+
+    if (completedOperation.error) {
+      throw new Error(`Video generation failed: ${JSON.stringify(completedOperation.error)}`);
+    }
+
+    if (!completedOperation.response) {
+      throw new Error("No response received from completed operation");
+    }
+
+    const videoCount = completedOperation.response?.videos?.length || 0;
+
+    $.export("$summary", `Successfully generated ${videoCount} video(s) from image`);
+
+    return completedOperation;
+  },
+};
diff --git a/components/google_vertex_ai/actions/generate-video-from-text/generate-video-from-text.mjs b/components/google_vertex_ai/actions/generate-video-from-text/generate-video-from-text.mjs
@@ -0,0 +1,76 @@
+import common from "../common/generate-video.mjs";
+
+export default {
+  ...common,
+  key: "google_vertex_ai-generate-video-from-text",
+  name: "Generate Video from Text",
+  description: "Generate a video from a text prompt using Google Vertex AI Veo models. [See the documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation)",
+  version: "0.0.1",
+  type: "action",
+  async run({ $ }) {
+    const {
+      projectId,
+      model,
+      prompt,
+      aspectRatio,
+      durationSeconds,
+      enhancePrompt,
+      generateAudio,
+      negativePrompt,
+      personGeneration,
+      resolution,
+      sampleCount,
+      storageUri,
+    } = this;
+
+    const operationResponse = await this.app.generateVideosLongRunning({
+      $,
+      projectId,
+      model,
+      data: {
+        instances: [
+          {
+            prompt,
+          },
+        ],
+        parameters: {
+          aspectRatio,
+          durationSeconds,
+          enhancePrompt,
+          sampleCount,
+          negativePrompt,
+          personGeneration,
+          generateAudio,
+          resolution,
+          storageUri,
+        },
+      },
+    });
+
+    if (!operationResponse.name) {
+      throw new Error("Failed to start video generation operation");
+    }
+
+    // Poll the operation until completion
+    const completedOperation = await this.pollOperation({
+      $,
+      projectId,
+      model,
+      operationName: operationResponse.name,
+    });
+
+    if (completedOperation.error) {
+      throw new Error(`Video generation failed: ${JSON.stringify(completedOperation.error)}`);
+    }
+
+    if (!completedOperation.response) {
+      throw new Error("No response received from completed operation");
+    }
+
+    const videoCount = completedOperation.response?.videos?.length || 0;
+
+    $.export("$summary", `Successfully generated ${videoCount} video(s)`);
+
+    return completedOperation;
+  },
+};
diff --git a/components/google_vertex_ai/google_vertex_ai.app.mjs b/components/google_vertex_ai/google_vertex_ai.app.mjs
diff --git a/components/google_vertex_ai/package.json b/components/google_vertex_ai/package.json
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml