Skip to content

Commit bb834ca

Browse files
committed
more cleanup for stt
1 parent 7a3f2d2 commit bb834ca

File tree

4 files changed

+25
-11
lines changed

4 files changed

+25
-11
lines changed

agents-core/vision_agents/core/agents/agents.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ async def simple_response(
177177
"""
178178
Overwrite simple_response if you want to change how the Agent class calls the LLM
179179
"""
180+
logger.info("asking LLM to reply to %s", text)
180181
with self.tracer.start_as_current_span("simple_response") as span:
181182
response = await self.llm.simple_response(
182183
text=text, processors=self.processors, participant=participant
@@ -308,6 +309,7 @@ async def on_realtime_agent_speech_transcription(
308309

309310
@self.events.subscribe
310311
async def on_stt_transcript_event_create_response(event: STTTranscriptEvent):
312+
import pdb; pdb.set_trace()
311313
if self.realtime_mode or not self.llm:
312314
# when running in realtime mode, there is no need to send the response to the LLM
313315
return

docs/ai/instructions/ai-stt.md

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,28 @@ class MySTT(stt.STT):
88
def __init__(
99
self,
1010
api_key: Optional[str] = None,
11-
sample_rate: int = 48000,
12-
client: Optional[AsyncDeepgramClient] = None,
11+
client: Optional[MyClient] = None,
1312
):
14-
super().__init__(sample_rate=sample_rate)
13+
super().__init__(provider_name="my_stt")
14+
# be sure to allow the passing of the client object
15+
# if client is not passed, create one
16+
# pass the most common settings for the client in the init (like api key)
1517

1618

17-
async def _process_audio_impl(
18-
self, pcm_data: PcmData, user_metadata: Optional[Union[Dict[str, Any], Participant]] = None
19-
) -> Optional[List[Tuple[bool, str, Dict[str, Any]]]]:
20-
pass
19+
async def process_audio(
20+
self,
21+
pcm_data: PcmData,
22+
participant: Optional[Participant] = None,
23+
):
24+
parts = self.client.stt(pcm_data, stream=True)
25+
full_text = ""
26+
for part in parts:
27+
# parts that aren't finished
28+
self._emit_partial_transcript_event(part, participant, metadata)
29+
full_text += part
30+
31+
# the full text
32+
self._emit_transcript_event(full_text, participant, metadata)
2133

2234
```
2335

examples/01_simple_agent_example/simple_agent_example.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ async def start_agent() -> None:
3333
# Create a call
3434
call = agent.edge.client.video.call("default", str(uuid4()))
3535

36-
# Open the demo UI
37-
await agent.edge.open_demo(call)
36+
3837

3938
# Have the agent join the call/room
4039
with await agent.join(call):
@@ -54,6 +53,8 @@ async def start_agent() -> None:
5453
# run till the call ends
5554
# await agent.say("Hello, how are you?")
5655
# await asyncio.sleep(5)
56+
# Open the demo UI
57+
await agent.edge.open_demo(call)
5758

5859
await agent.simple_response("tell me something interesting in a short sentence")
5960
await agent.finish()

plugins/fish/vision_agents/plugins/fish/stt.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ async def process_audio(
152152
},
153153
)
154154

155-
# Return as final result (Fish Audio doesn't support streaming/partial results)
156-
return [(True, transcript_text, metadata)]
155+
self._emit_transcript_event(transcript_text, participant, metadata)
157156

158157
except Exception as e:
159158
logger.error(

0 commit comments

Comments
 (0)