refactor: update genai samples (#3588)

pattishin · web-flow · commit e6153a970da2 · 2023-12-13T00:54:32.000-08:00
* refactor: filling out multimodal with image sample and clean up

* refactor: filling out multimodal video sample

* refactor: adding tests for multimodal samples

* fix: resolving linting errors
diff --git a/generative-ai/snippets/countTokens.js b/generative-ai/snippets/countTokens.js
@@ -26,7 +26,7 @@ async function countTokens(
    */
   // const projectId = 'your-project-id';
   // const location = 'us-central1';
-  // const model = 'gemini-pro';
+  // const model = 'chosen-genai-model';
 
   // Initialize Vertex with your Cloud project and location
   const vertex_ai = new VertexAI({project: projectId, location: location});
diff --git a/generative-ai/snippets/nonStreamingChat.js b/generative-ai/snippets/nonStreamingChat.js
@@ -14,26 +14,18 @@
 
 const {VertexAI} = require('@google-cloud/vertexai');
 
-function wait(time) {
-  return new Promise(resolve => {
-    setTimeout(resolve, time);
-  });
-}
-
 async function createNonStreamingChat(
   projectId = 'PROJECT_ID',
   location = 'LOCATION_ID',
   model = 'MODEL'
 ) {
-  // TODO: Find better method. Setting delay to give api time to respond, otherwise it will 404
-  // await wait(10);
-
-  // [START aiplatform_gemini_multiturn_chat]
+  // [START aiplatform_gemini_multiturn_chat_nonstreaming]
   /**
    * TODO(developer): Uncomment these variables before running the sample.
    */
   // const projectId = 'your-project-id';
   // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
 
   // Initialize Vertex with your Cloud project and location
   const vertexAI = new VertexAI({project: projectId, location: location});
@@ -64,7 +56,7 @@ async function createNonStreamingChat(
   const response3 = result3.response.candidates[0].content.parts[0].text;
   console.log('Chat bot: ', response3);
 
-  // [END aiplatform_gemini_multiturn_chat]
+  // [END aiplatform_gemini_multiturn_chat_nonstreaming]
 }
 
 createNonStreamingChat(...process.argv.slice(2)).catch(err => {
diff --git a/generative-ai/snippets/nonStreamingContent.js b/generative-ai/snippets/nonStreamingContent.js
@@ -19,13 +19,14 @@ async function createNonStreamingContent(
   location = 'LOCATION_ID',
   model = 'MODEL'
 ) {
-  // [START aiplatform_gemini_function_calling]
+  // [START aiplatform_gemini_content_nonstreaming]
 
   /**
    * TODO(developer): Uncomment these variables before running the sample.
    */
   // const projectId = 'your-project-id';
   // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
 
   // Initialize Vertex with your Cloud project and location
   const vertexAI = new VertexAI({project: projectId, location: location});
@@ -55,7 +56,7 @@ async function createNonStreamingContent(
 
   console.log(fullTextResponse);
 
-  // [END aiplatform_gemini_function_calling]
+  // [END aiplatform_gemini_content_nonstreaming]
 }
 
 createNonStreamingContent(...process.argv.slice(2)).catch(err => {
diff --git a/generative-ai/snippets/nonStreamingMultipartContent.js b/generative-ai/snippets/nonStreamingMultipartContent.js
@@ -41,9 +41,9 @@ async function createNonStreamingMultipartContent(
 
   // For images, the SDK supports both Google Cloud Storage URI and base64 strings
   const filePart = {
-    file_data: {
-      file_uri: image,
-      mime_type: mimeType,
+    fileSata: {
+      fileUri: image,
+      mimeType: mimeType,
     },
   };
 
@@ -57,8 +57,8 @@ async function createNonStreamingMultipartContent(
 
   console.log('Prompt Text:');
   console.log(request.contents[0].parts[0].text);
-  console.log('Non-Streaming Response Text:');
 
+  console.log('Non-Streaming Response Text:');
   // Create the response stream
   const responseStream =
     await generativeVisionModel.generateContentStream(request);
diff --git a/generative-ai/snippets/package.json b/generative-ai/snippets/package.json
@@ -15,6 +15,7 @@
   "dependencies": {
     "@google-cloud/aiplatform": "^3.0.0",
     "@google-cloud/vertexai": "github:googleapis/nodejs-vertexai",
+    "axios": "^1.6.2",
     "supertest": "^6.3.3"
   },
   "devDependencies": {
diff --git a/generative-ai/snippets/safetySettings.js b/generative-ai/snippets/safetySettings.js
@@ -18,14 +18,18 @@ const {
   HarmBlockThreshold,
 } = require('@google-cloud/vertexai');
 
-async function createStreamContent() {
+async function setSafetySettings(
+  projectId = 'PROJECT_ID',
+  location = 'LOCATION_ID',
+  model = 'MODEL'
+) {
   // [START aiplatform_gemini_safety_settings]
   /**
    * TODO(developer): Uncomment these variables before running the sample.
    */
-  const projectId = 'cloud-llm-preview1';
-  const location = 'us-central1';
-  const model = 'gemini-pro';
+  // const projectId = 'your-project-id';
+  // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
 
   // Initialize Vertex with your Cloud project and location
   const vertexAI = new VertexAI({project: projectId, location: location});
@@ -66,7 +70,7 @@ async function createStreamContent() {
   // [END aiplatform_gemini_safety_settings]
 }
 
-createStreamContent(...process.argv.slice(3)).catch(err => {
+setSafetySettings(...process.argv.slice(3)).catch(err => {
   console.error(err.message);
   process.exitCode = 1;
 });
diff --git a/generative-ai/snippets/sendMultiModalPromptWithImage.js b/generative-ai/snippets/sendMultiModalPromptWithImage.js
@@ -13,13 +13,90 @@
 // limitations under the License.
 
 const {VertexAI} = require('@google-cloud/vertexai');
+const axios = require('axios');
+
+async function getBase64(url) {
+  const image = await axios.get(url, {responseType: 'arraybuffer'});
+  return Buffer.from(image.data).toString('base64');
+}
 
 async function sendMultiModalPromptWithImage(
   projectId = 'PROJECT_ID',
   location = 'LOCATION_ID',
   model = 'MODEL'
 ) {
   // [START aiplatform_gemini_single_turn_multi_image]
+  /**
+   * TODO(developer): Uncomment these variables before running the sample.
+   */
+  // const projectId = 'your-project-id';
+  // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
+
+  // For images, the SDK supports base64 strings
+  const landmarkImage1 = await getBase64(
+    'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
+  );
+  const landmarkImage2 = await getBase64(
+    'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
+  );
+  const landmarkImage3 = await getBase64(
+    'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
+  );
+
+  // Initialize Vertex with your Cloud project and location
+  const vertexAI = new VertexAI({project: projectId, location: location});
+
+  const generativeVisionModel = vertexAI.preview.getGenerativeModel({
+    model: model,
+  });
+
+  // Pass multimodal prompt
+  const request = {
+    contents: [
+      {
+        role: 'user',
+        parts: [
+          {
+            inlineData: {
+              data: landmarkImage1,
+              mimeType: 'image/png',
+            },
+          },
+          {
+            text: 'city: Rome, Landmark: the Colosseum',
+          },
+
+          {
+            inlineData: {
+              data: landmarkImage2,
+              mimeType: 'image/png',
+            },
+          },
+          {
+            text: 'city: Beijing, Landmark: Forbidden City',
+          },
+          {
+            inlineData: {
+              data: landmarkImage3,
+              mimeType: 'image/png',
+            },
+          },
+        ],
+      },
+    ],
+  };
+
+  // Create the response
+  const response = await generativeVisionModel.generateContent(request);
+  // Wait for the response to complete
+  const aggregatedResponse = await response.response;
+  // Select the text from the response
+  const fullTextResponse =
+    aggregatedResponse.candidates[0].content.parts[0].text;
+
+  console.log(fullTextResponse);
+
   // [END aiplatform_gemini_single_turn_multi_image]
 }
 
diff --git a/generative-ai/snippets/sendMultiModalPromptWithVideo.js b/generative-ai/snippets/sendMultiModalPromptWithVideo.js
@@ -14,16 +14,60 @@
 
 const {VertexAI} = require('@google-cloud/vertexai');
 
-async function sendMultiModalPromptWithImage(
+async function sendMultiModalPromptWithVideo(
   projectId = 'PROJECT_ID',
   location = 'LOCATION_ID',
   model = 'MODEL'
 ) {
   // [START aiplatform_gemini_single_turn_video]
+  /**
+   * TODO(developer): Uncomment these variables before running the sample.
+   */
+  // const projectId = 'your-project-id';
+  // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
+
+  // Initialize Vertex with your Cloud project and location
+  const vertexAI = new VertexAI({project: projectId, location: location});
+
+  const generativeVisionModel = vertexAI.preview.getGenerativeModel({
+    model: model,
+  });
+
+  // Pass multimodal prompt
+  const request = {
+    contents: [
+      {
+        role: 'user',
+        parts: [
+          {
+            text: 'What is in the video?',
+          },
+          {
+            fileData: {
+              fileUri: 'gs://cloud-samples-data/video/animals.mp4',
+              mimeType: 'video/mp4',
+            },
+          },
+        ],
+      },
+    ],
+  };
+
+  // Create the response
+  const response = await generativeVisionModel.generateContent(request);
+  // Wait for the response to complete
+  const aggregatedResponse = await response.response;
+  // Select the text from the response
+  const fullTextResponse =
+    aggregatedResponse.candidates[0].content.parts[0].text;
+
+  console.log(fullTextResponse);
+
   // [END aiplatform_gemini_single_turn_video]
 }
 
-sendMultiModalPromptWithImage(...process.argv.slice(2)).catch(err => {
+sendMultiModalPromptWithVideo(...process.argv.slice(2)).catch(err => {
   console.error(err.message);
   process.exitCode = 1;
 });
diff --git a/generative-ai/snippets/streamChat.js b/generative-ai/snippets/streamChat.js
@@ -20,12 +20,12 @@ async function createStreamChat(
   model = 'MODEL'
 ) {
   // [START aiplatform_gemini_multiturn_chat]
-
   /**
    * TODO(developer): Uncomment these variables before running the sample.
    */
   // const projectId = 'your-project-id';
   // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
 
   // Initialize Vertex with your Cloud project and location
   const vertexAI = new VertexAI({project: projectId, location: location});
@@ -36,9 +36,10 @@ async function createStreamChat(
   });
 
   const chat = generativeModel.startChat({});
-
   const chatInput1 = 'How can I learn more about that?';
+
   console.log(`User: ${chatInput1}`);
+
   const result1 = await chat.sendMessageStream(chatInput1);
   for await (const item of result1.stream) {
     console.log(item.candidates[0].content.parts[0].text);
diff --git a/generative-ai/snippets/streamContent.js b/generative-ai/snippets/streamContent.js
@@ -19,13 +19,14 @@ async function createStreamContent(
   location = 'LOCATION_ID',
   model = 'MODEL'
 ) {
-  // [START aiplatform_gemini_function_calling]
+  // [START aiplatform_gemini_content]
 
   /**
    * TODO(developer): Uncomment these variables before running the sample.
    */
   // const projectId = 'your-project-id';
   // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
 
   // Initialize Vertex with your Cloud project and location
   const vertexAI = new VertexAI({project: projectId, location: location});
@@ -51,7 +52,7 @@ async function createStreamContent(
     process.stdout.write(item.candidates[0].content.parts[0].text);
   }
 
-  // [END aiplatform_gemini_function_calling]
+  // [END aiplatform_gemini_content]
 }
 
 createStreamContent(...process.argv.slice(2)).catch(err => {
diff --git a/generative-ai/snippets/streamMultipartContent.js b/generative-ai/snippets/streamMultipartContent.js
@@ -28,6 +28,7 @@ async function createStreamMultipartContent(
    */
   // const projectId = 'your-project-id';
   // const location = 'us-central1';
+  // const model = 'chosen-genai-model';
   // const image = 'gs://generativeai-downloads/images/scones.jpg'; // Google Cloud Storage image
   // const mimeType = 'image/jpeg';
 
@@ -41,9 +42,9 @@ async function createStreamMultipartContent(
 
   // For images, the SDK supports both Google Cloud Storage URI and base64 strings
   const filePart = {
-    file_data: {
-      file_uri: image,
-      mime_type: mimeType,
+    fileData: {
+      fileUri: image,
+      mimeType: mimeType,
     },
   };
 
diff --git a/generative-ai/snippets/test/sendMultiModalPromptWithImage.test.js b/generative-ai/snippets/test/sendMultiModalPromptWithImage.test.js
@@ -0,0 +1,35 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+const {assert} = require('chai');
+const {describe, it} = require('mocha');
+const cp = require('child_process');
+
+const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
+
+describe('Generative AI Stream MultiModal with Image', () => {
+  const project = 'cloud-llm-preview1';
+  const location = 'us-central1';
+  const model = 'gemini-pro-vision';
+
+  it('should create stream multimodal content', async () => {
+    const output = execSync(
+      `node ./sendMultiModalPromptWithImage.js ${project} ${location} ${model}`
+    );
+    // Ensure that the conversation is what we expect for these images
+    assert(output.match(/Paris/));
+  });
+});
diff --git a/generative-ai/snippets/test/sendMultiModalPromptWithVideo.test.js b/generative-ai/snippets/test/sendMultiModalPromptWithVideo.test.js