Skip to content

Commit 6a725b0

Browse files
Merge pull request #122 from GetStream/cleanup_stt
Cleanup STT
2 parents 5f001e0 + 5088709 commit 6a725b0

File tree

20 files changed

+358
-1846
lines changed

20 files changed

+358
-1846
lines changed

agents-core/vision_agents/core/agents/agents.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ async def simple_response(
177177
"""
178178
Overwrite simple_response if you want to change how the Agent class calls the LLM
179179
"""
180+
logger.info("asking LLM to reply to %s", text)
180181
with self.tracer.start_as_current_span("simple_response") as span:
181182
response = await self.llm.simple_response(
182183
text=text, processors=self.processors, participant=participant
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .stt import STT
2+
from .events import TranscriptResponse
23

3-
__all__ = ["STT"]
4+
__all__ = ["STT", "TranscriptResponse"]

agents-core/vision_agents/core/stt/events.py

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,47 @@
44

55

66
@dataclass
7-
class STTTranscriptEvent(PluginBaseEvent):
8-
"""Event emitted when a complete transcript is available."""
9-
10-
type: str = field(default='plugin.stt_transcript', init=False)
11-
text: str = ""
7+
class TranscriptResponse:
128
confidence: Optional[float] = None
139
language: Optional[str] = None
1410
processing_time_ms: Optional[float] = None
1511
audio_duration_ms: Optional[float] = None
1612
model_name: Optional[str] = None
17-
words: Optional[list[dict[str, Any]]] = None
13+
other: Optional[dict] = None
14+
15+
@dataclass
16+
class STTTranscriptEvent(PluginBaseEvent):
17+
"""Event emitted when a complete transcript is available."""
18+
19+
type: str = field(default='plugin.stt_transcript', init=False)
20+
text: str = ""
21+
response: TranscriptResponse = field(default_factory=TranscriptResponse)
1822
is_final: bool = True
1923

2024
def __post_init__(self):
2125
if not self.text:
2226
raise ValueError("Transcript text cannot be empty")
27+
28+
# Convenience properties for backward compatibility
29+
@property
30+
def confidence(self) -> Optional[float]:
31+
return self.response.confidence
32+
33+
@property
34+
def language(self) -> Optional[str]:
35+
return self.response.language
36+
37+
@property
38+
def processing_time_ms(self) -> Optional[float]:
39+
return self.response.processing_time_ms
40+
41+
@property
42+
def audio_duration_ms(self) -> Optional[float]:
43+
return self.response.audio_duration_ms
44+
45+
@property
46+
def model_name(self) -> Optional[str]:
47+
return self.response.model_name
2348

2449

2550
@dataclass
@@ -28,13 +53,29 @@ class STTPartialTranscriptEvent(PluginBaseEvent):
2853

2954
type: str = field(default='plugin.stt_partial_transcript', init=False)
3055
text: str = ""
31-
confidence: Optional[float] = None
32-
language: Optional[str] = None
33-
processing_time_ms: Optional[float] = None
34-
audio_duration_ms: Optional[float] = None
35-
model_name: Optional[str] = None
36-
words: Optional[list[dict[str, Any]]] = None
56+
response: TranscriptResponse = field(default_factory=TranscriptResponse)
3757
is_final: bool = False
58+
59+
# Convenience properties for backward compatibility
60+
@property
61+
def confidence(self) -> Optional[float]:
62+
return self.response.confidence
63+
64+
@property
65+
def language(self) -> Optional[str]:
66+
return self.response.language
67+
68+
@property
69+
def processing_time_ms(self) -> Optional[float]:
70+
return self.response.processing_time_ms
71+
72+
@property
73+
def audio_duration_ms(self) -> Optional[float]:
74+
return self.response.audio_duration_ms
75+
76+
@property
77+
def model_name(self) -> Optional[str]:
78+
return self.response.model_name
3879

3980

4081
@dataclass

0 commit comments

Comments
 (0)