|
14 | 14 | # limitations under the License. |
15 | 15 |
|
16 | 16 | """ |
| 17 | +This script lets you to talk to a Gemini native audio model using the Live API. |
| 18 | +
|
| 19 | +Important: **Use headphones**. This script uses the system default audio |
| 20 | +input and output, which often won't include echo cancellation. So to prevent |
| 21 | +the model from interrupting itself it is important that you use headphones. |
| 22 | +
|
17 | 23 | ## Setup |
18 | 24 |
|
19 | 25 | To install the dependencies for this script, run: |
|
23 | 29 | pip install -U google-genai pyaudio |
24 | 30 | ``` |
25 | 31 |
|
| 32 | +If Python < 3.11, also install `pip install taskgroup`. |
| 33 | +
|
26 | 34 | ## API key |
27 | 35 |
|
28 | | -Ensure the `GOOGLE_API_KEY` environment variable is set to the api-key |
| 36 | +Ensure the `GEMINI_API_KEY` environment variable is set to the api-key |
29 | 37 | you obtained from Google AI Studio. |
30 | 38 |
|
31 | 39 | ## Run |
|
53 | 61 | asyncio.TaskGroup = taskgroup.TaskGroup |
54 | 62 | asyncio.ExceptionGroup = exceptiongroup.ExceptionGroup |
55 | 63 |
|
| 64 | + |
56 | 65 | FORMAT = pyaudio.paInt16 |
57 | 66 | CHANNELS = 1 |
58 | 67 | SEND_SAMPLE_RATE = 16000 |
|
62 | 71 | pya = pyaudio.PyAudio() |
63 | 72 |
|
64 | 73 |
|
65 | | -client = genai.Client() # GOOGLE_API_KEY must be set as env variable |
| 74 | +client = genai.Client(http_options={"api_version": "v1alpha"}) # GEMINI_API_KEY must be set as env variable |
66 | 75 |
|
67 | | -MODEL = "gemini-2.5-flash-preview-native-audio-dialog" |
68 | | -CONFIG = {"response_modalities": ["AUDIO"]} |
| 76 | +system_instruction = """ |
| 77 | +You are a helpful and friendly AI assistant. |
| 78 | +Your default tone is helpful, engaging, and clear, with a touch of optimistic wit. |
| 79 | +Anticipate user needs by clarifying ambiguous questions and always conclude your responses |
| 80 | +with an engaging follow-up question to keep the conversation flowing. |
| 81 | +""" |
| 82 | + |
| 83 | +MODEL = "gemini-2.5-flash-native-audio-preview-09-2025" |
| 84 | +CONFIG = { |
| 85 | + "system_instruction": system_instruction, |
| 86 | + "response_modalities": ["AUDIO"], |
| 87 | + "proactivity": {'proactive_audio': True} |
| 88 | +} |
69 | 89 |
|
70 | 90 |
|
71 | 91 | class AudioLoop: |
72 | 92 | def __init__(self): |
73 | 93 | self.audio_in_queue = None |
74 | 94 | self.out_queue = None |
75 | | - |
76 | 95 | self.session = None |
77 | | - |
78 | 96 | self.audio_stream = None |
79 | | - |
80 | 97 | self.receive_audio_task = None |
81 | 98 | self.play_audio_task = None |
82 | 99 |
|
@@ -152,10 +169,10 @@ async def run(self): |
152 | 169 | tg.create_task(self.play_audio()) |
153 | 170 | except asyncio.CancelledError: |
154 | 171 | pass |
155 | | - except ExceptionGroup as EG: |
| 172 | + except asyncio.ExceptionGroup as eg: |
156 | 173 | if self.audio_stream: |
157 | 174 | self.audio_stream.close() |
158 | | - traceback.print_exception(EG) |
| 175 | + traceback.print_exception(eg) |
159 | 176 |
|
160 | 177 |
|
161 | 178 | if __name__ == "__main__": |
|
0 commit comments