Skip to content

Commit 4fc8756

Browse files
authored
Merge pull request #562 from deepgram/fix/agent-tags
fix: moves agent tags to settings
2 parents eca802a + 598fd9d commit 4fc8756

File tree

10 files changed

+358
-231
lines changed

10 files changed

+358
-231
lines changed

deepgram/clients/agent/v1/websocket/options.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,7 @@ class Agent(BaseResponse):
271271
greeting: Optional[str] = field(
272272
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
273273
)
274-
tags: Optional[List[str]] = field(
275-
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
276-
)
274+
277275

278276
def __post_init__(self):
279277
"""Handle conversion of dict/list data to proper Speak objects"""
@@ -350,6 +348,9 @@ class SettingsOptions(BaseResponse):
350348

351349
experimental: Optional[bool] = field(default=False)
352350
type: str = str(AgentWebSocketEvents.Settings)
351+
tags: Optional[List[str]] = field(
352+
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
353+
)
353354
audio: Audio = field(default_factory=Audio)
354355
agent: Agent = field(default_factory=Agent)
355356
mip_opt_out: Optional[bool] = field(

examples/agent/tags/main.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
2+
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
# SPDX-License-Identifier: MIT
4+
from signal import SIGINT, SIGTERM
5+
import asyncio
6+
import time
7+
from deepgram.utils import verboselogs
8+
from deepgram import (
9+
DeepgramClient,
10+
DeepgramClientOptions,
11+
AgentWebSocketEvents,
12+
SettingsOptions,
13+
)
14+
TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
15+
global warning_notice
16+
warning_notice = True
17+
async def main():
18+
try:
19+
loop = asyncio.get_event_loop()
20+
for signal in (SIGTERM, SIGINT):
21+
loop.add_signal_handler(
22+
signal,
23+
lambda: asyncio.create_task(shutdown(signal, loop, dg_connection)),
24+
)
25+
# example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
26+
config: DeepgramClientOptions = DeepgramClientOptions(
27+
options={
28+
"keepalive": "true",
29+
"microphone_record": "true",
30+
"speaker_playback": "true",
31+
},
32+
# verbose=verboselogs.DEBUG,
33+
)
34+
# Initialize Deepgram client - API key should be set in DEEPGRAM_API_KEY environment variable
35+
# For production testing, make sure your API key has proper permissions
36+
deepgram: DeepgramClient = DeepgramClient("", config)
37+
print("Initialized Deepgram client for production API testing")
38+
# Create a websocket connection to Deepgram
39+
dg_connection = deepgram.agent.asyncwebsocket.v("1")
40+
async def on_open(self, open, **kwargs):
41+
print(f"\n\n{open}\n\n")
42+
async def on_binary_data(self, data, **kwargs):
43+
global warning_notice
44+
if warning_notice:
45+
print("Received binary data")
46+
print("You can do something with the binary data here")
47+
print("OR")
48+
print(
49+
"If you want to simply play the audio, set speaker_playback to true in the options for DeepgramClientOptions"
50+
)
51+
warning_notice = False
52+
async def on_welcome(self, welcome, **kwargs):
53+
print(f"\n\n{welcome}\n\n")
54+
async def on_settings_applied(self, settings_applied, **kwargs):
55+
print(f"\n\n{settings_applied}\n\n")
56+
async def on_conversation_text(self, conversation_text, **kwargs):
57+
print(f"\n\n{conversation_text}\n\n")
58+
async def on_user_started_speaking(self, user_started_speaking, **kwargs):
59+
print(f"\n\n{user_started_speaking}\n\n")
60+
async def on_agent_thinking(self, agent_thinking, **kwargs):
61+
print(f"\n\n{agent_thinking}\n\n")
62+
async def on_agent_started_speaking(self, agent_started_speaking, **kwargs):
63+
print(f"\n\n{agent_started_speaking}\n\n")
64+
async def on_agent_audio_done(self, agent_audio_done, **kwargs):
65+
print(f"\n\n{agent_audio_done}\n\n")
66+
async def on_close(self, close, **kwargs):
67+
print(f"\n\n{close}\n\n")
68+
async def on_error(self, error, **kwargs):
69+
print(f"\n\n{error}\n\n")
70+
async def on_unhandled(self, unhandled, **kwargs):
71+
print(f"\n\n{unhandled}\n\n")
72+
dg_connection.on(AgentWebSocketEvents.Open, on_open)
73+
dg_connection.on(AgentWebSocketEvents.AudioData, on_binary_data)
74+
dg_connection.on(AgentWebSocketEvents.Welcome, on_welcome)
75+
dg_connection.on(AgentWebSocketEvents.SettingsApplied, on_settings_applied)
76+
dg_connection.on(AgentWebSocketEvents.ConversationText, on_conversation_text)
77+
dg_connection.on(
78+
AgentWebSocketEvents.UserStartedSpeaking, on_user_started_speaking
79+
)
80+
dg_connection.on(AgentWebSocketEvents.AgentThinking, on_agent_thinking)
81+
dg_connection.on(
82+
AgentWebSocketEvents.AgentStartedSpeaking, on_agent_started_speaking
83+
)
84+
dg_connection.on(AgentWebSocketEvents.AgentAudioDone, on_agent_audio_done)
85+
dg_connection.on(AgentWebSocketEvents.Close, on_close)
86+
dg_connection.on(AgentWebSocketEvents.Error, on_error)
87+
dg_connection.on(AgentWebSocketEvents.Unhandled, on_unhandled)
88+
# connect to websocket
89+
options = SettingsOptions()
90+
options.agent.think.provider.type = "open_ai"
91+
options.agent.think.provider.model = "gpt-4o-mini"
92+
options.agent.think.prompt = "You are a helpful AI assistant."
93+
options.greeting = "Hello, this is a text to speech example using Deepgram."
94+
options.agent.listen.provider.keyterms = ["hello", "goodbye"]
95+
options.agent.listen.provider.model = "nova-3"
96+
options.agent.listen.provider.type = "deepgram"
97+
options.agent.speak.provider.type = "deepgram"
98+
options.agent.speak.provider.model = "aura-2-thalia-en"
99+
options.agent.language = "en"
100+
# Add tags for production testing
101+
options.tags = ["production-test", "sdk-example", "agent-websocket", "tags-validation"]
102+
print(f"Using tags: {options.tags}")
103+
# Print the full options being sent
104+
print("Options being sent to API:")
105+
print(options.to_json())
106+
print("\n\n✅ Connection established with tags!")
107+
print(f"✅ Tags being used: {options.tags}")
108+
print("\n🎤 You can now speak into your microphone...")
109+
print("The agent will respond using the production API with tags.")
110+
print("Press Ctrl+C to stop.\n\n")
111+
if await dg_connection.start(options) is False:
112+
print("Failed to start connection")
113+
return
114+
# wait until cancelled
115+
try:
116+
while True:
117+
await asyncio.sleep(1)
118+
except asyncio.CancelledError:
119+
# This block will be executed when the shutdown coroutine cancels all tasks
120+
pass
121+
finally:
122+
await dg_connection.finish()
123+
print("Finished")
124+
except ValueError as e:
125+
print(f"Invalid value encountered: {e}")
126+
except Exception as e:
127+
print(f"An unexpected error occurred: {e}")
128+
async def shutdown(signal, loop, dg_connection):
129+
print(f"Received exit signal {signal.name}...")
130+
await dg_connection.finish()
131+
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
132+
[task.cancel() for task in tasks]
133+
print(f"Cancelling {len(tasks)} outstanding tasks")
134+
await asyncio.gather(*tasks, return_exceptions=True)
135+
loop.stop()
136+
print("Shutdown complete.")
137+
asyncio.run(main())

tests/daily_test/test_daily_agent_websocket.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,10 @@ def on_unhandled(self, unhandled, **kwargs):
433433

434434
# Handle special agent tags test case by adding tags to the config
435435
agent_config = test_case["agent_config"].copy()
436-
if test_case.get("test_agent_tags", False):
437-
agent_config["tags"] = ["test", "daily"]
438-
439436
settings.agent = agent_config
437+
438+
if test_case.get("test_agent_tags", False):
439+
settings.tags = ["test", "daily"]
440440
settings.experimental = True # Enable experimental features
441441

442442
print(f"🔧 Starting connection with settings: {settings.to_dict()}")
@@ -568,7 +568,7 @@ def on_unhandled(self, unhandled, **kwargs):
568568
expected_tags = ["test", "daily"]
569569
# Verify settings contain the expected tags
570570
settings_dict = settings.to_dict()
571-
agent_tags = settings_dict.get("agent", {}).get("tags", [])
571+
agent_tags = settings_dict.get("tags", [])
572572
assert agent_tags == expected_tags, f"Test ID: {unique} - Agent tags should match expected tags"
573573
print(f"✓ Agent tags validated: {agent_tags}")
574574

tests/response_data/agent/websocket/agent_tags-e55ef69c-events.json

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
[
22
{
33
"type": "Welcome",
4-
"timestamp": 1753228536.7372491,
4+
"timestamp": 1754089254.059805,
55
"data": {
66
"type": "Welcome",
7-
"request_id": "f86f006a-1dc6-484e-b040-3825bedf93ba"
7+
"request_id": "60cc0bbe-be55-4c34-b0c6-e9c138885967"
88
}
99
},
1010
{
1111
"type": "Open",
12-
"timestamp": 1753228536.737364,
12+
"timestamp": 1754089254.060123,
1313
"data": {
1414
"type": "Open"
1515
}
1616
},
1717
{
1818
"type": "SettingsApplied",
19-
"timestamp": 1753228536.7819788,
19+
"timestamp": 1754089254.1029801,
2020
"data": {
2121
"type": "SettingsApplied"
2222
}
2323
},
2424
{
2525
"type": "ConversationText",
26-
"timestamp": 1753228537.787007,
26+
"timestamp": 1754089255.110622,
2727
"data": {
2828
"type": "ConversationText",
2929
"role": "user",
@@ -32,23 +32,23 @@
3232
},
3333
{
3434
"type": "Unhandled",
35-
"timestamp": 1753228537.787831,
35+
"timestamp": 1754089255.1114728,
3636
"data": {
3737
"type": "Unhandled",
3838
"raw": "{\"type\":\"History\",\"role\":\"user\",\"content\":\"Hello, this is a test of agent tags functionality.\"}"
3939
}
4040
},
4141
{
4242
"type": "Unhandled",
43-
"timestamp": 1753228537.7884219,
43+
"timestamp": 1754089255.111763,
4444
"data": {
4545
"type": "Unhandled",
4646
"raw": "{\"type\":\"EndOfThought\"}"
4747
}
4848
},
4949
{
5050
"type": "ConversationText",
51-
"timestamp": 1753228538.68838,
51+
"timestamp": 1754089256.122815,
5252
"data": {
5353
"type": "ConversationText",
5454
"role": "assistant",
@@ -57,24 +57,24 @@
5757
},
5858
{
5959
"type": "Unhandled",
60-
"timestamp": 1753228538.689159,
60+
"timestamp": 1754089256.12335,
6161
"data": {
6262
"type": "Unhandled",
6363
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"Hello!\"}"
6464
}
6565
},
6666
{
6767
"type": "AgentStartedSpeaking",
68-
"timestamp": 1753228538.7265012,
68+
"timestamp": 1754089256.12362,
6969
"data": {
70-
"total_latency": 0.903870874,
71-
"tts_latency": 0.314808536,
72-
"ttt_latency": 0.589062181
70+
"total_latency": 0.962977896,
71+
"tts_latency": 0.368340208,
72+
"ttt_latency": 0.594637578
7373
}
7474
},
7575
{
7676
"type": "ConversationText",
77-
"timestamp": 1753228539.291852,
77+
"timestamp": 1754089256.6148539,
7878
"data": {
7979
"type": "ConversationText",
8080
"role": "user",
@@ -83,73 +83,73 @@
8383
},
8484
{
8585
"type": "Unhandled",
86-
"timestamp": 1753228539.292917,
86+
"timestamp": 1754089256.615833,
8787
"data": {
8888
"type": "Unhandled",
8989
"raw": "{\"type\":\"History\",\"role\":\"user\",\"content\":\"Can you confirm you are working with tags enabled?\"}"
9090
}
9191
},
9292
{
9393
"type": "Unhandled",
94-
"timestamp": 1753228539.2931762,
94+
"timestamp": 1754089256.616431,
9595
"data": {
9696
"type": "Unhandled",
9797
"raw": "{\"type\":\"EndOfThought\"}"
9898
}
9999
},
100100
{
101101
"type": "AgentAudioDone",
102-
"timestamp": 1753228539.2934241,
102+
"timestamp": 1754089256.616906,
103103
"data": {
104104
"type": "AgentAudioDone"
105105
}
106106
},
107107
{
108108
"type": "ConversationText",
109-
"timestamp": 1753228540.502542,
109+
"timestamp": 1754089257.768304,
110110
"data": {
111111
"type": "ConversationText",
112112
"role": "assistant",
113-
"content": "Yes, I can confirm that tag functionality is enabled."
113+
"content": "Yes, I can confirm that I am able to work with tags."
114114
}
115115
},
116116
{
117117
"type": "Unhandled",
118-
"timestamp": 1753228540.5037608,
118+
"timestamp": 1754089257.768838,
119119
"data": {
120120
"type": "Unhandled",
121-
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"Yes, I can confirm that tag functionality is enabled.\"}"
121+
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"Yes, I can confirm that I am able to work with tags.\"}"
122122
}
123123
},
124124
{
125125
"type": "AgentStartedSpeaking",
126-
"timestamp": 1753228540.5045602,
126+
"timestamp": 1754089257.7692642,
127127
"data": {
128-
"total_latency": 1.146776066,
129-
"tts_latency": 0.378390996,
130-
"ttt_latency": 0.76838492
128+
"total_latency": 1.157360975,
129+
"tts_latency": 0.385327765,
130+
"ttt_latency": 0.7720331
131131
}
132132
},
133133
{
134134
"type": "ConversationText",
135-
"timestamp": 1753228543.8797429,
135+
"timestamp": 1754089261.3335302,
136136
"data": {
137137
"type": "ConversationText",
138138
"role": "assistant",
139-
"content": "How can I assist you with it?"
139+
"content": "How can I assist you with them?"
140140
}
141141
},
142142
{
143143
"type": "Unhandled",
144-
"timestamp": 1753228543.881195,
144+
"timestamp": 1754089261.334396,
145145
"data": {
146146
"type": "Unhandled",
147-
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"How can I assist you with it?\"}"
147+
"raw": "{\"type\":\"History\",\"role\":\"assistant\",\"content\":\"How can I assist you with them?\"}"
148148
}
149149
},
150150
{
151151
"type": "AgentAudioDone",
152-
"timestamp": 1753228543.9538682,
152+
"timestamp": 1754089261.371368,
153153
"data": {
154154
"type": "AgentAudioDone"
155155
}

0 commit comments

Comments
 (0)