GoogleCloudPlatform · Guiners · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025
@@ -0,0 +1,80 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import asyncio
+import pyaudio
+from google import genai
+from google.genai.types import LiveConnectConfig, Modality, AudioTranscriptionConfig, Blob
+
+CHUNK = 4200
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RECORD_SECONDS = 5
+MODEL = "gemini-2.0-flash-live-preview-04-09"
+INPUT_RATE = 16000
+OUTPUT_RATE = 24000
+
+client = genai.Client()
+
+config = LiveConnectConfig(
+    response_modalities=[Modality.AUDIO],
+    input_audio_transcription=AudioTranscriptionConfig(),
+    output_audio_transcription=AudioTranscriptionConfig()
+)
+
+async def main():
+    #exit()
+    print(MODEL)
+    p = pyaudio.PyAudio()
+    async with client.aio.live.connect(model=MODEL, config=config) as session:
+
+        async def send():
+            stream = p.open(
+                format=FORMAT, channels=CHANNELS, rate=INPUT_RATE, input=True, frames_per_buffer=CHUNK)
+            while True:
+                frame = stream.read(CHUNK)
+                await session.send_realtime_input(media=Blob(data=frame, mime_type="audio/pcm"))
+                await asyncio.sleep(10 ** -12)
+
+
+        async def receive():
+            output_stream = p.open(
+                format=FORMAT, channels=CHANNELS, rate=OUTPUT_RATE, output=True, frames_per_buffer=CHUNK)
+            async for message in session.receive():
+                if message.server_content.input_transcription:
+                    print(message.server_content.model_dump(mode="json", exclude_none=True))
+                if message.server_content.output_transcription:
+                    print(message.server_content.model_dump(mode="json", exclude_none=True))
+                if message.server_content.model_turn:
+                    for part in message.server_content.model_turn.parts:
+                        if part.inline_data.data:
+                            audio_data = part.inline_data.data
+                            output_stream.write(audio_data)
+                            await asyncio.sleep(10 ** -12)
+
+
+
+
+        send_task = asyncio.create_task(send())
+        receive_task = asyncio.create_task(receive())
+        await asyncio.gather(send_task, receive_task)
+
+
+#run it in terminal
+
+
+
+
+asyncio.run(main())
diff --git a/genai/live/live_conversation_websocket_audio_with_audio.py b/genai/live/live_conversation_websocket_audio_with_audio.py
@@ -0,0 +1,129 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import os
+
+import base64
+import json
+import numpy as np
+
+from websockets.asyncio.client import connect
+from scipy.io import wavfile
+
+
+def get_bearer_token() -> str:
+    import google.auth
+    from google.auth.transport.requests import Request
+
+    creds, _ = google.auth.default(
+        scopes=["https://www.googleapis.com/auth/cloud-platform"]
+    )
+    auth_req = Request()
+    creds.refresh(auth_req)
+    bearer_token = creds.token
+    return bearer_token
+
+
+# get bearer token
+bearer_token = get_bearer_token()
+
+
+
+
+
+
+# Set model generation_config
+CONFIG = {"response_modalities": ["AUDIO"]}
+
+headers = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {bearer_token[0]}",
+}
+
+
+async def main() -> None:
+    # Connect to the server
+    async with connect(SERVICE_URL, additional_headers=headers) as ws:
+
+        # Setup the session
+        async def setup() -> None:
+            await ws.send(
+                json.dumps(
+                    {
+                        "setup": {
+                            "model": "gemini-live-2.5-flash",
+                            "generation_config": CONFIG,
+                        }
+                    }
+                )
+            )
+
+            # Receive setup response
+            raw_response = await ws.recv(decode=False)
+            setup_response = json.loads(raw_response.decode("ascii"))
+            print(f"Connected: {setup_response}")
+            return
+
+        # Send text message
+        async def send() -> bool:
+            text_input = input("Input > ")
+            if text_input.lower() in ("q", "quit", "exit"):
+                return False
+
+            msg = {
+                "client_content": {
+                    "turns": [{"role": "user", "parts": [{"text": text_input}]}],
+                    "turn_complete": True,
+                }
+            }
+
+            await ws.send(json.dumps(msg))
+            return True
+
+        # Receive server response
+        async def receive() -> None:
+            responses = []
+
+            # Receive chucks of server response
+            async for raw_response in ws:
+                response = json.loads(raw_response.decode())
+                server_content = response.pop("serverContent", None)
+                if server_content is None:
+                    break
+
+                model_turn = server_content.pop("modelTurn", None)
+                if model_turn is not None:
+                    parts = model_turn.pop("parts", None)
+                    if parts is not None:
+                        for part in parts:
+                            pcm_data = base64.b64decode(part["inlineData"]["data"])
+                            responses.append(np.frombuffer(pcm_data, dtype=np.int16))
+
+                # End of turn
+                turn_complete = server_content.pop("turnComplete", None)
+                if turn_complete:
+                    break
+
+            # Play the returned audio message
+            display(Markdown("**Response >**"))
+            display(Audio(np.concatenate(responses), rate=24000, autoplay=True))
+            return
+
+        await setup()
+
+        while True:
+            if not await send():
+                break
+            await receive()
@@ -0,0 +1,73 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+
+_memory_corpus = "projects/cloud-ai-devrel-softserve/locations/us-central1/ragCorpora/2305843009213693952"
+
+
+async def generate_content(memory_corpus: str) -> list[str]:
+    # [START googlegenaisdk_live_ground_ragengine_with_txt]
+    from google import genai
+    from google.genai.types import (
+        Content,
+        LiveConnectConfig,
+        Modality,
+        Part,
+        Tool,
+        Retrieval,
+        VertexRagStore,
+        VertexRagStoreRagResource,
+    )
+
+    client = genai.Client()
+    model_id = "gemini-2.0-flash-live-preview-04-09"
+    rag_store = VertexRagStore(
+        rag_resources=[
+            VertexRagStoreRagResource(
+                rag_corpus=memory_corpus  # Use memory corpus if you want to store context.
+            )
+        ],
+        # Set `store_context` to true to allow Live API sink context into your memory corpus.
+        store_context=True,
+    )
+    config = LiveConnectConfig(
+        response_modalities=[Modality.TEXT],
+        tools=[Tool(retrieval=Retrieval(vertex_rag_store=rag_store))],
+    )
+
+    async with client.aio.live.connect(model=model_id, config=config) as session:
+        text_input = "What year did Mariusz Pudzianowski win World's Strongest Man?"
+        print("> ", text_input, "\n")
+
+        await session.send_client_content(
+            turns=Content(role="user", parts=[Part(text=text_input)])
+        )
+
+        response = []
+
+        async for message in session.receive():
+            if message.text:
+                response.append(message.text)
+                continue
+
+    print("".join(response))
+    # Example output:
+    # >  What year did Mariusz Pudzianowski win World's Strongest Man?
+    # Mariusz Pudzianowski won World's Strongest Man in 2002, 2003, 2005, 2007, and 2008.
+    # [END googlegenaisdk_live_ground_ragengine_with_txt]
+    return response
+
+
+if __name__ == "__main__":
+    asyncio.run(generate_content(_memory_corpus))
@@ -20,7 +20,9 @@ def get_bearer_token() -> str:
     import google.auth
     from google.auth.transport.requests import Request
 
-    creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+    creds, _ = google.auth.default(
+        scopes=["https://www.googleapis.com/auth/cloud-platform"]
+    )
     auth_req = Request()
     creds.refresh(auth_req)
     bearer_token = creds.token
@@ -55,9 +57,7 @@ async def generate_content() -> str:
 
     # Websocket Configuration
     WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com"
-    WEBSOCKET_SERVICE_URL = (
-        f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent"
-    )
+    WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent"
 
     # Websocket Authentication
     headers = {
@@ -66,9 +66,7 @@ async def generate_content() -> str:
     }
 
     # Model Configuration
-    model_path = (
-        f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}"
-    )
+    model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}"
     model_generation_config = {
         "response_modalities": ["AUDIO"],
         "speech_config": {
@@ -77,7 +75,9 @@ async def generate_content() -> str:
         },
     }
 
-    async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session:
+    async with connect(
+        WEBSOCKET_SERVICE_URL, additional_headers=headers
+    ) as websocket_session:
         # 1. Send setup configuration
         websocket_config = {
             "setup": {
@@ -120,7 +120,9 @@ async def generate_content() -> str:
             server_content = response_chunk.get("serverContent")
             if not server_content:
                 # This might indicate an error or an unexpected message format
-                print(f"Received non-serverContent message or empty content: {response_chunk}")
+                print(
+                    f"Received non-serverContent message or empty content: {response_chunk}"
+                )
                 break
 
             # Collect audio chunks
@@ -129,15 +131,19 @@ async def generate_content() -> str:
                 for part in model_turn["parts"]:
                     if part["inlineData"]["mimeType"] == "audio/pcm":
                         audio_chunk = base64.b64decode(part["inlineData"]["data"])
-                        aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16))
+                        aggregated_response_parts.append(
+                            np.frombuffer(audio_chunk, dtype=np.int16)
+                        )
 
             # End of response
             if server_content.get("turnComplete"):
                 break
 
         # Save audio to a file
         if aggregated_response_parts:
-            wavfile.write("output.wav", 24000, np.concatenate(aggregated_response_parts))
+            wavfile.write(
+                "output.wav", 24000, np.concatenate(aggregated_response_parts)
+            )
         # Example response:
         #     Setup Response: {'setupComplete': {}}
         #     Input: Hello? Gemini are you there?