diff --git a/genai/live/live_conversation_audio_with_audio.py b/genai/live/live_conversation_audio_with_audio.py new file mode 100644 index 0000000000..0adf0817a5 --- /dev/null +++ b/genai/live/live_conversation_audio_with_audio.py @@ -0,0 +1,80 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import asyncio +import pyaudio +from google import genai +from google.genai.types import LiveConnectConfig, Modality, AudioTranscriptionConfig, Blob + +CHUNK = 4200 +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +RECORD_SECONDS = 5 +MODEL = "gemini-2.0-flash-live-preview-04-09" +INPUT_RATE = 16000 +OUTPUT_RATE = 24000 + +client = genai.Client() + +config = LiveConnectConfig( + response_modalities=[Modality.AUDIO], + input_audio_transcription=AudioTranscriptionConfig(), + output_audio_transcription=AudioTranscriptionConfig() +) + +async def main(): + #exit() + print(MODEL) + p = pyaudio.PyAudio() + async with client.aio.live.connect(model=MODEL, config=config) as session: + + async def send(): + stream = p.open( + format=FORMAT, channels=CHANNELS, rate=INPUT_RATE, input=True, frames_per_buffer=CHUNK) + while True: + frame = stream.read(CHUNK) + await session.send_realtime_input(media=Blob(data=frame, mime_type="audio/pcm")) + await asyncio.sleep(10 ** -12) + + + async def receive(): + output_stream = p.open( + format=FORMAT, channels=CHANNELS, rate=OUTPUT_RATE, output=True, frames_per_buffer=CHUNK) + async for message in session.receive(): + if message.server_content.input_transcription: + print(message.server_content.model_dump(mode="json", exclude_none=True)) + if message.server_content.output_transcription: + print(message.server_content.model_dump(mode="json", exclude_none=True)) + if message.server_content.model_turn: + for part in message.server_content.model_turn.parts: + if part.inline_data.data: + audio_data = part.inline_data.data + output_stream.write(audio_data) + await asyncio.sleep(10 ** -12) + + + + + send_task = asyncio.create_task(send()) + receive_task = asyncio.create_task(receive()) + await asyncio.gather(send_task, receive_task) + + +#run it in terminal + + + + +asyncio.run(main()) diff --git a/genai/live/live_conversation_websocket_audio_with_audio.py b/genai/live/live_conversation_websocket_audio_with_audio.py new file mode 100644 index 0000000000..32653ae8fc --- /dev/null +++ b/genai/live/live_conversation_websocket_audio_with_audio.py @@ -0,0 +1,127 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import os + +import base64 +import json +import numpy as np + +from websockets.asyncio.client import connect +from scipy.io import wavfile + + +def get_bearer_token() -> str: + import google.auth + from google.auth.transport.requests import Request + + creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + auth_req = Request() + creds.refresh(auth_req) + bearer_token = creds.token + return bearer_token + + +# get bearer token +bearer_token = get_bearer_token() + + + + + + +# Set model generation_config +CONFIG = {"response_modalities": ["AUDIO"]} + +headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {bearer_token[0]}", +} + + +async def main() -> None: + # Connect to the server + async with connect(SERVICE_URL, additional_headers=headers) as ws: + + # Setup the session + async def setup() -> None: + await ws.send( + json.dumps( + { + "setup": { + "model": "gemini-live-2.5-flash", + "generation_config": CONFIG, + } + } + ) + ) + + # Receive setup response + raw_response = await ws.recv(decode=False) + setup_response = json.loads(raw_response.decode("ascii")) + print(f"Connected: {setup_response}") + return + + # Send text message + async def send() -> bool: + text_input = input("Input > ") + if text_input.lower() in ("q", "quit", "exit"): + return False + + msg = { + "client_content": { + "turns": [{"role": "user", "parts": [{"text": text_input}]}], + "turn_complete": True, + } + } + + await ws.send(json.dumps(msg)) + return True + + # Receive server response + async def receive() -> None: + responses = [] + + # Receive chucks of server response + async for raw_response in ws: + response = json.loads(raw_response.decode()) + server_content = response.pop("serverContent", None) + if server_content is None: + break + + model_turn = server_content.pop("modelTurn", None) + if model_turn is not None: + parts = model_turn.pop("parts", None) + if parts is not None: + for part in parts: + pcm_data = base64.b64decode(part["inlineData"]["data"]) + responses.append(np.frombuffer(pcm_data, dtype=np.int16)) + + # End of turn + turn_complete = server_content.pop("turnComplete", None) + if turn_complete: + break + + # Play the returned audio message + display(Markdown("**Response >**")) + display(Audio(np.concatenate(responses), rate=24000, autoplay=True)) + return + + await setup() + + while True: + if not await send(): + break + await receive() diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py new file mode 100644 index 0000000000..2452a27071 --- /dev/null +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -0,0 +1,73 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import asyncio + +_memory_corpus = "projects/cloud-ai-devrel-softserve/locations/us-central1/ragCorpora/2305843009213693952" + + +async def generate_content(memory_corpus: str) -> list[str]: + # [START googlegenaisdk_live_ground_ragengine_with_txt] + from google import genai + from google.genai.types import ( + Content, + LiveConnectConfig, + Modality, + Part, + Tool, + Retrieval, + VertexRagStore, + VertexRagStoreRagResource, + ) + + client = genai.Client() + model_id = "gemini-2.0-flash-live-preview-04-09" + rag_store = VertexRagStore( + rag_resources=[ + VertexRagStoreRagResource( + rag_corpus=memory_corpus # Use memory corpus if you want to store context. + ) + ], + # Set `store_context` to true to allow Live API sink context into your memory corpus. + store_context=True, + ) + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(retrieval=Retrieval(vertex_rag_store=rag_store))], + ) + + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "What year did Mariusz Pudzianowski win World's Strongest Man?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) + + response = [] + + async for message in session.receive(): + if message.text: + response.append(message.text) + continue + + print("".join(response)) + # Example output: + # > What year did Mariusz Pudzianowski win World's Strongest Man? + # Mariusz Pudzianowski won World's Strongest Man in 2002, 2003, 2005, 2007, and 2008. + # [END googlegenaisdk_live_ground_ragengine_with_txt] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content(_memory_corpus)) diff --git a/genai/live/live_websocket_audiogen_with_txt.py b/genai/live/live_websocket_audiogen_with_txt.py index f7b6f07e5f..7caae3a90a 100644 --- a/genai/live/live_websocket_audiogen_with_txt.py +++ b/genai/live/live_websocket_audiogen_with_txt.py @@ -55,9 +55,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -66,9 +64,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = { "response_modalities": ["AUDIO"], "speech_config": { @@ -129,7 +125,9 @@ async def generate_content() -> str: for part in model_turn["parts"]: if part["inlineData"]["mimeType"] == "audio/pcm": audio_chunk = base64.b64decode(part["inlineData"]["data"]) - aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16)) + aggregated_response_parts.append( + np.frombuffer(audio_chunk, dtype=np.int16) + ) # End of response if server_content.get("turnComplete"): @@ -137,7 +135,9 @@ async def generate_content() -> str: # Save audio to a file if aggregated_response_parts: - wavfile.write("output.wav", 24000, np.concatenate(aggregated_response_parts)) + wavfile.write( + "output.wav", 24000, np.concatenate(aggregated_response_parts) + ) # Example response: # Setup Response: {'setupComplete': {}} # Input: Hello? Gemini are you there? diff --git a/genai/live/live_websocket_audiotranscript_with_txt.py b/genai/live/live_websocket_audiotranscript_with_txt.py index 5192b81ef1..065568b369 100644 --- a/genai/live/live_websocket_audiotranscript_with_txt.py +++ b/genai/live/live_websocket_audiotranscript_with_txt.py @@ -55,9 +55,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -66,9 +64,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = { "response_modalities": ["AUDIO"], "speech_config": { @@ -142,7 +138,9 @@ async def generate_content() -> str: for part in model_turn["parts"]: if part["inlineData"]["mimeType"] == "audio/pcm": audio_chunk = base64.b64decode(part["inlineData"]["data"]) - aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16)) + aggregated_response_parts.append( + np.frombuffer(audio_chunk, dtype=np.int16) + ) # End of response if server_content.get("turnComplete"): diff --git a/genai/live/live_websocket_textgen_with_audio.py b/genai/live/live_websocket_textgen_with_audio.py index de6fd9d55c..9b3c0cbfd5 100644 --- a/genai/live/live_websocket_textgen_with_audio.py +++ b/genai/live/live_websocket_textgen_with_audio.py @@ -65,9 +65,7 @@ def read_wavefile(filepath: str) -> tuple[str, str]: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -76,9 +74,7 @@ def read_wavefile(filepath: str) -> tuple[str, str]: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = {"response_modalities": ["TEXT"]} async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: @@ -105,7 +101,9 @@ def read_wavefile(filepath: str) -> tuple[str, str]: return "Error: WebSocket setup failed." # 3. Send audio message - encoded_audio_message, mime_type = read_wavefile("hello_gemini_are_you_there.wav") + encoded_audio_message, mime_type = read_wavefile( + "hello_gemini_are_you_there.wav" + ) # Example audio message: "Hello? Gemini are you there?" user_message = { diff --git a/genai/live/live_websocket_textgen_with_txt.py b/genai/live/live_websocket_textgen_with_txt.py index b36487cc9a..ab4062e7db 100644 --- a/genai/live/live_websocket_textgen_with_txt.py +++ b/genai/live/live_websocket_textgen_with_txt.py @@ -51,9 +51,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -62,9 +60,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = {"response_modalities": ["TEXT"]} async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: diff --git a/genai/live/live_with_txt.py b/genai/live/live_with_txt.py index a3c7518843..fd412af774 100644 --- a/genai/live/live_with_txt.py +++ b/genai/live/live_with_txt.py @@ -35,7 +35,9 @@ async def generate_content() -> list[str]: ) as session: text_input = "Hello? Gemini, are you there?" print("> ", text_input, "\n") - await session.send_client_content(turns=Content(role="user", parts=[Part(text=text_input)])) + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) response = [] diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index be9472583c..38102ddd02 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,3 +1,4 @@ google-genai==1.27.0 scipy==1.15.3 -websockets==15.0.1 \ No newline at end of file +websockets==15.0.1 +pyaudio==0.2.14 \ No newline at end of file diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index ce38253986..6b4cda4ade 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -25,6 +25,7 @@ import live_websocket_textgen_with_audio import live_websocket_textgen_with_txt import live_with_txt +import live_ground_ragengine_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -32,6 +33,29 @@ # os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name" +@pytest.fixture() +def mock_rag_components(mocker): + mock_client_cls = mocker.patch("google.genai.Client") + + class AsyncIterator: + def __aiter__(self): + return self + + async def __anext__(self): + if not hasattr(self, "used"): + self.used = True + return mocker.MagicMock( + text="Mariusz Pudzianowski won in 2002, 2003, 2005, 2007, and 2008." + ) + raise StopAsyncIteration + + mock_session = mocker.AsyncMock() + mock_session.__aenter__.return_value = mock_session + mock_session.receive = lambda: AsyncIterator() + + mock_client_cls.return_value.aio.live.connect.return_value = mock_session + + @pytest.mark.asyncio async def test_live_with_text() -> None: assert await live_with_txt.generate_content() @@ -55,3 +79,8 @@ async def test_live_websocket_audiogen_with_txt() -> None: @pytest.mark.asyncio async def test_live_websocket_audiotranscript_with_txt() -> None: assert await live_websocket_audiotranscript_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_ground_ragengine_with_txt(mock_rag_components) -> None: + assert await live_ground_ragengine_with_txt.generate_content("test") diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index 3381ae7ec8..a29764ec61 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -37,6 +37,8 @@ import textgen_with_video import textgen_with_youtube_video import thinking_textgen_with_txt +import textgen_code_with_pdf + os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" @@ -137,9 +139,21 @@ def test_textgen_with_youtube_video() -> None: assert response +def test_model_optimizer_textgen_with_txt() -> None: + os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" + response = model_optimizer_textgen_with_txt.generate_content() + os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" + assert response + + +def test_textgen_code_with_pdf() -> None: + response = textgen_code_with_pdf.generate_content() + assert response + + # Migrated to Model Optimser Folder # def test_model_optimizer_textgen_with_txt() -> None: # os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" # response = model_optimizer_textgen_with_txt.generate_content() # os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" -# assert response +# assert response \ No newline at end of file diff --git a/genai/text_generation/textgen_code_with_pdf.py b/genai/text_generation/textgen_code_with_pdf.py new file mode 100644 index 0000000000..da4ca76b73 --- /dev/null +++ b/genai/text_generation/textgen_code_with_pdf.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !This sample works with Google Cloud Vertex AI API only. + + +def generate_content() -> str: + # [START googlegenaisdk_textgen_code_with_pdf] + from google import genai + from google.genai.types import HttpOptions, Part + + client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + model_id = "gemini-2.5-flash" + prompt = "Convert this python code to use Google Python Style Guide." + print("> ", prompt, "\n") + pdf_uri = "https://storage.googleapis.com/cloud-samples-data/generative-ai/text/inefficient_fibonacci_series_python_code.pdf" + + pdf_file = Part.from_uri( + file_uri=pdf_uri, + mime_type="application/pdf", + ) + + response = client.models.generate_content( + model=model_id, + contents=[pdf_file, prompt], + ) + + print(response.text) + # Example response: + # > Convert this python code to use Google Python Style Guide. + # + # def generate_fibonacci_sequence(num_terms: int) -> list[int]: + # """Generates the Fibonacci sequence up to a specified number of terms. + # + # This function calculates the Fibonacci sequence starting with 0 and 1. + # It handles base cases for 0, 1, and 2 terms efficiently. + # + # # ... + # [END googlegenaisdk_textgen_code_with_pdf] + return response.text + + +if __name__ == "__main__": + generate_content() diff --git a/genai/tools/tools_vais_with_txt.py b/genai/tools/tools_vais_with_txt.py index fa4109d597..dbe327787d 100644 --- a/genai/tools/tools_vais_with_txt.py +++ b/genai/tools/tools_vais_with_txt.py @@ -29,6 +29,8 @@ def generate_content(datastore: str) -> str: # Load Data Store ID from Vertex AI Search # datastore = "projects/111111111111/locations/global/collections/default_collection/dataStores/data-store-id" + + response = client.models.generate_content( model="gemini-2.5-flash", contents="How do I make an appointment to renew my driver's license?",