Skip to content

Commit ef29ddd

Browse files
GuinersGuinersmsampathkumar
authored
feat(genai): Add Live API samples (#13521)
* adding working samples with tests * adding working samples with tests * adding working samples with tests * adding working samples with tests * adding working samples with tests * Update genai/live/live_txt_with_audio.py * Update live_audio_with_txt.py * Update genai/live/live_transcribe_with_audio.py * Update genai/live/live_func_call_with_txt.py * adding working samples with tests * adding working samples with tests * fixed websocket * fixed websocket * fixed websocket * fix(live_structured_ouput_with_txt): update requirements.txt * fix(live_transcribe_with_audio): fix type error. using `if` conditions, removing `None` objects in `response` list object. * chore(genai-live): update requirements-test.txt * codereview fix --------- Co-authored-by: Guiners <[email protected]> Co-authored-by: Sampath Kumar <[email protected]>
1 parent 2647211 commit ef29ddd

15 files changed

+583
-19
lines changed

genai/live/live_audiogen_with_txt.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav
17+
# Install helpers for converting files: pip install librosa soundfile
18+
19+
import asyncio
20+
21+
22+
async def generate_content() -> None:
23+
# [START googlegenaisdk_live_audiogen_with_txt]
24+
import numpy as np
25+
import scipy.io.wavfile as wavfile
26+
from google import genai
27+
from google.genai.types import (Content, LiveConnectConfig, Modality, Part,
28+
PrebuiltVoiceConfig, SpeechConfig,
29+
VoiceConfig)
30+
31+
client = genai.Client()
32+
model = "gemini-2.0-flash-live-preview-04-09"
33+
# For more Voice options, check https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash#live-api-native-audio
34+
voice_name = "Aoede"
35+
36+
config = LiveConnectConfig(
37+
response_modalities=[Modality.AUDIO],
38+
speech_config=SpeechConfig(
39+
voice_config=VoiceConfig(
40+
prebuilt_voice_config=PrebuiltVoiceConfig(
41+
voice_name=voice_name,
42+
)
43+
),
44+
),
45+
)
46+
47+
async with client.aio.live.connect(
48+
model=model,
49+
config=config,
50+
) as session:
51+
text_input = "Hello? Gemini are you there?"
52+
print("> ", text_input, "\n")
53+
54+
await session.send_client_content(
55+
turns=Content(role="user", parts=[Part(text=text_input)])
56+
)
57+
58+
audio_data_chunks = []
59+
async for message in session.receive():
60+
if (
61+
message.server_content.model_turn
62+
and message.server_content.model_turn.parts
63+
):
64+
for part in message.server_content.model_turn.parts:
65+
if part.inline_data:
66+
audio_data_chunks.append(
67+
np.frombuffer(part.inline_data.data, dtype=np.int16)
68+
)
69+
70+
if audio_data_chunks:
71+
print("Received audio answer. Saving to local file...")
72+
full_audio_array = np.concatenate(audio_data_chunks)
73+
74+
output_filename = "gemini_response.wav"
75+
sample_rate = 24000
76+
77+
wavfile.write(output_filename, sample_rate, full_audio_array)
78+
print(f"Audio saved to {output_filename}")
79+
80+
# Example output:
81+
# > Hello? Gemini are you there?
82+
# Received audio answer. Saving to local file...
83+
# Audio saved to gemini_response.wav
84+
# [END googlegenaisdk_live_audiogen_with_txt]
85+
return None
86+
87+
88+
if __name__ == "__main__":
89+
asyncio.run(generate_content())

genai/live/live_code_exec_with_txt.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import asyncio
16+
17+
18+
async def generate_content() -> list[str]:
19+
# [START googlegenaisdk_live_code_exec_with_txt]
20+
from google import genai
21+
from google.genai.types import (Content, LiveConnectConfig, Modality, Part,
22+
Tool, ToolCodeExecution)
23+
24+
client = genai.Client()
25+
model_id = "gemini-2.0-flash-live-preview-04-09"
26+
config = LiveConnectConfig(
27+
response_modalities=[Modality.TEXT],
28+
tools=[Tool(code_execution=ToolCodeExecution())],
29+
)
30+
async with client.aio.live.connect(model=model_id, config=config) as session:
31+
text_input = "Compute the largest prime palindrome under 10"
32+
print("> ", text_input, "\n")
33+
await session.send_client_content(
34+
turns=Content(role="user", parts=[Part(text=text_input)])
35+
)
36+
37+
response = []
38+
39+
async for chunk in session.receive():
40+
if chunk.server_content:
41+
if chunk.text is not None:
42+
response.append(chunk.text)
43+
44+
model_turn = chunk.server_content.model_turn
45+
if model_turn:
46+
for part in model_turn.parts:
47+
if part.executable_code is not None:
48+
print(part.executable_code.code)
49+
50+
if part.code_execution_result is not None:
51+
print(part.code_execution_result.output)
52+
53+
print("".join(response))
54+
# Example output:
55+
# > Compute the largest prime palindrome under 10
56+
# Final Answer: The final answer is $\boxed{7}$
57+
# [END googlegenaisdk_live_code_exec_with_txt]
58+
return response
59+
60+
61+
if __name__ == "__main__":
62+
asyncio.run(generate_content())

genai/live/live_func_call_with_txt.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import asyncio
16+
17+
from google.genai.types import FunctionResponse
18+
19+
20+
async def generate_content() -> list[FunctionResponse]:
21+
# [START googlegenaisdk_live_func_call_with_txt]
22+
from google import genai
23+
from google.genai.types import (Content, FunctionDeclaration,
24+
FunctionResponse, LiveConnectConfig,
25+
Modality, Part, Tool)
26+
27+
client = genai.Client()
28+
model_id = "gemini-2.0-flash-live-preview-04-09"
29+
30+
# Simple function definitions
31+
turn_on_the_lights = FunctionDeclaration(name="turn_on_the_lights")
32+
turn_off_the_lights = FunctionDeclaration(name="turn_off_the_lights")
33+
34+
config = LiveConnectConfig(
35+
response_modalities=[Modality.TEXT],
36+
tools=[Tool(function_declarations=[turn_on_the_lights, turn_off_the_lights])],
37+
)
38+
async with client.aio.live.connect(model=model_id, config=config) as session:
39+
text_input = "Turn on the lights please"
40+
print("> ", text_input, "\n")
41+
await session.send_client_content(
42+
turns=Content(role="user", parts=[Part(text=text_input)])
43+
)
44+
45+
function_responses = []
46+
47+
async for chunk in session.receive():
48+
if chunk.server_content:
49+
if chunk.text is not None:
50+
print(chunk.text)
51+
52+
elif chunk.tool_call:
53+
54+
for fc in chunk.tool_call.function_calls:
55+
function_response = FunctionResponse(
56+
name=fc.name,
57+
response={
58+
"result": "ok"
59+
}, # simple, hard-coded function response
60+
)
61+
function_responses.append(function_response)
62+
print(function_response.response["result"])
63+
64+
await session.send_tool_response(function_responses=function_responses)
65+
66+
# Example output:
67+
# > Turn on the lights please
68+
# ok
69+
# [END googlegenaisdk_live_func_call_with_txt]
70+
return function_responses
71+
72+
73+
if __name__ == "__main__":
74+
asyncio.run(generate_content())
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import asyncio
17+
18+
19+
async def generate_content() -> list[str]:
20+
# [START googlegenaisdk_live_ground_googsearch_with_txt]
21+
from google import genai
22+
from google.genai.types import (Content, GoogleSearch, LiveConnectConfig,
23+
Modality, Part, Tool)
24+
25+
client = genai.Client()
26+
model_id = "gemini-2.0-flash-live-preview-04-09"
27+
config = LiveConnectConfig(
28+
response_modalities=[Modality.TEXT],
29+
tools=[Tool(google_search=GoogleSearch())],
30+
)
31+
async with client.aio.live.connect(model=model_id, config=config) as session:
32+
text_input = "When did the last Brazil vs. Argentina soccer match happen?"
33+
await session.send_client_content(
34+
turns=Content(role="user", parts=[Part(text=text_input)])
35+
)
36+
37+
response = []
38+
39+
async for chunk in session.receive():
40+
if chunk.server_content:
41+
if chunk.text is not None:
42+
response.append(chunk.text)
43+
44+
# The model might generate and execute Python code to use Search
45+
model_turn = chunk.server_content.model_turn
46+
if model_turn:
47+
for part in model_turn.parts:
48+
if part.executable_code is not None:
49+
print(part.executable_code.code)
50+
51+
if part.code_execution_result is not None:
52+
print(part.code_execution_result.output)
53+
54+
print("".join(response))
55+
# Example output:
56+
# > When did the last Brazil vs. Argentina soccer match happen?
57+
# The last Brazil vs. Argentina soccer match was on March 25, 2025, a 2026 World Cup qualifier, where Argentina defeated Brazil 4-1.
58+
# [END googlegenaisdk_live_ground_googsearch_with_txt]
59+
return response
60+
61+
62+
if __name__ == "__main__":
63+
asyncio.run(generate_content())
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav
15+
# Install helpers for converting files: pip install librosa soundfile
16+
17+
from pydantic import BaseModel
18+
19+
20+
class CalendarEvent(BaseModel):
21+
name: str
22+
date: str
23+
participants: list[str]
24+
25+
26+
def generate_content() -> CalendarEvent:
27+
# [START googlegenaisdk_live_structured_ouput_with_txt]
28+
import os
29+
30+
import google.auth.transport.requests
31+
import openai
32+
from google.auth import default
33+
from openai.types.chat import (ChatCompletionSystemMessageParam,
34+
ChatCompletionUserMessageParam)
35+
36+
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
37+
location = "us-central1"
38+
39+
# Programmatically get an access token
40+
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
41+
credentials.refresh(google.auth.transport.requests.Request())
42+
# Note: the credential lives for 1 hour by default (https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed.
43+
44+
##############################
45+
# Choose one of the following:
46+
##############################
47+
48+
# If you are calling a Gemini model, set the ENDPOINT_ID variable to use openapi.
49+
ENDPOINT_ID = "openapi"
50+
51+
# If you are calling a self-deployed model from Model Garden, set the
52+
# ENDPOINT_ID variable and set the client's base URL to use your endpoint.
53+
# ENDPOINT_ID = "YOUR_ENDPOINT_ID"
54+
55+
# OpenAI Client
56+
client = openai.OpenAI(
57+
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}",
58+
api_key=credentials.token,
59+
)
60+
61+
completion = client.beta.chat.completions.parse(
62+
model="google/gemini-2.0-flash-001",
63+
messages=[
64+
ChatCompletionSystemMessageParam(
65+
role="system", content="Extract the event information."
66+
),
67+
ChatCompletionUserMessageParam(
68+
role="user",
69+
content="Alice and Bob are going to a science fair on Friday.",
70+
),
71+
],
72+
response_format=CalendarEvent,
73+
)
74+
75+
response = completion.choices[0].message.parsed
76+
print(response)
77+
78+
# System message: Extract the event information.
79+
# User message: Alice and Bob are going to a science fair on Friday.
80+
# Output message: name='science fair' date='Friday' participants=['Alice', 'Bob']
81+
# [END googlegenaisdk_live_structured_ouput_with_txt]
82+
return response
83+
84+
85+
if __name__ == "__main__":
86+
generate_content()

0 commit comments

Comments
 (0)