wip

dlarocque · dlarocque · commit 7da51a5e62cc · 2025-02-10T13:14:35.000-05:00
diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md
@@ -337,6 +337,11 @@ export class GenerativeModel {
     // (undocumented)
     safetySettings: SafetySetting[];
     startChat(startChatParams?: StartChatParams): ChatSession;
+    // Warning: (ae-forgotten-export) The symbol "LiveGenerationConfig" needs to be exported by the entry point index.d.ts
+    // Warning: (ae-forgotten-export) The symbol "LiveSession" needs to be exported by the entry point index.d.ts
+    //
+    // (undocumented)
+    startLiveSession(config?: LiveGenerationConfig): Promise<LiveSession>;
     // (undocumented)
     systemInstruction?: Content;
     // (undocumented)
diff --git a/packages/vertexai/src/models/generative-model.ts b/packages/vertexai/src/models/generative-model.ts
@@ -47,6 +47,7 @@ import { VertexAI } from '../public-types';
 import { ApiSettings } from '../types/internal';
 import { VertexAIService } from '../service';
 import { _isFirebaseServerApp } from '@firebase/app';
+import { LiveClientContent, LiveClientSetup, LiveGenerationConfig, LiveServerContent } from '../types/live';
 
 /**
  * Class for generative model APIs.
@@ -190,6 +191,55 @@ export class GenerativeModel {
     );
   }
 
+  async startLiveSession(config?: LiveGenerationConfig): Promise<LiveSession> {
+    const _bidiGoogleAI = true;
+    const _baseDailyUrl = 'daily-firebaseml.sandbox.googleapis.com';
+    const _apiUrl =
+        'ws/google.firebase.machinelearning.v2beta.LlmBidiService/BidiGenerateContent?key=';
+    const _baseGAIUrl = 'generativelanguage.googleapis.com';
+    const _apiGAIUrl = 'ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key=';
+    const model = 'gemini-2.0-flash-exp'
+
+    let url;
+    let modelString = '';
+    if (_bidiGoogleAI) {
+      const gaiApiKey = '';
+      url = `wss://${_baseGAIUrl}/${_apiGAIUrl}${gaiApiKey}`;
+      modelString = `models/${model}`;
+    } else {
+      url = `wss://${_baseDailyUrl}/${_apiUrl}${this._apiSettings.apiKey}`;
+      modelString =
+          `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/publishers/google/models/${model}`;
+    }
+
+    const socket = new WebSocket(url)
+
+    socket.onopen = () => {
+      const liveClientSetup: LiveClientSetup = {
+        setup: {
+          model: modelString,
+          generation_config: config
+        }
+      }
+      socket.send(JSON.stringify(liveClientSetup));
+    }
+
+    const setupComplete = new Promise((resolve, reject) => {
+      socket.onmessage = async (event) => {
+        console.log('received message in `startLiveSession`')
+        const msg = JSON.parse(await (event.data as Blob).text());
+        if (msg.setupComplete) {
+          resolve('setup complete.');
+        } else {
+          reject('first message did not contain `setup_complete`');
+        }
+      };
+    });
+
+    await setupComplete;
+    return new LiveSession(socket);
+  }
+
   /**
    * Counts the tokens in the provided request.
    */
@@ -200,3 +250,44 @@ export class GenerativeModel {
     return countTokens(this._apiSettings, this.model, formattedParams);
   }
 }
+
+export class LiveSession {
+  constructor(private socket: WebSocket) { 
+    console.log('started new LiveSession');
+    this.socket.onclose = (event) => {
+      console.log('websocket closed', event);
+    }
+
+    this.socket.onerror = (event) => {
+      console.log('websocket error:', event)
+    }
+  }
+
+  send(data: string, turnComplete: boolean) {
+    if(!this.socket.OPEN) {
+      throw new Error("Cannot send message. Live connection was closed.")
+    }
+    const msg: LiveClientContent = {
+      client_content: {
+        turns: [{
+          role: 'user',
+          parts: [{
+            text: data
+          }]
+        }],
+        turn_complete: turnComplete
+      },
+    }
+    this.socket.send(JSON.stringify(msg));
+  }
+
+  // Assumes the setup_complete message was already received
+  onMessage(callback: (content: LiveServerContent) => void) {
+    console.log("setting onMessage callback");
+    this.socket.onmessage = async (event) => {
+      console.log("triggering onMessage callback");
+      const content: LiveServerContent = JSON.parse(await (event.data as Blob).text())
+      callback(content);
+    }
+  }
+}
diff --git a/packages/vertexai/src/types/live.ts b/packages/vertexai/src/types/live.ts
@@ -0,0 +1,55 @@
+// sent in the first client message after establishing connection.
+export interface LiveClientSetup {
+  setup: {
+    model: string;
+    generation_config?: LiveGenerationConfig
+  }
+}
+
+export interface LiveGenerationConfig {
+  response_modalities: string[];
+  speech_config: {
+    voice_config: {
+      prebuilt_voice_config: {
+        voice_name: string;
+      }
+    }
+  }
+}
+
+// response from the server after setup.
+export interface LiveServerContent {
+  serverContent: {
+    // Defined if turn not complete
+    modelTurn?: {
+      parts: {
+        inlineData: {
+          mimeType: string,
+          data: string
+        }
+      }[]
+    };
+    // Defined if turn complete
+    turnComplete?: boolean
+  };
+}
+
+// user input sent in real time.
+export interface LiveClientRealtimeInput {
+  mediaChunks: {
+    mime_type: string;
+    data: Uint8Array,
+  }
+}
+
+export interface LiveClientContent {
+  client_content: {
+    turns: {
+      role: string;
+      parts: {
+        text: string
+      }[];
+    }[];
+    turn_complete: boolean;
+  }
+}