Skip to content

Commit 412ab81

Browse files
[voice agent] Fixing the missing arguments calling in NemoSTTService (#15233)
* Fixing the missing arguments calling in `NemoSTTService` Signed-off-by: SangwonSUH <[email protected]> * Remove redundant attribute assignments in `NemoSTTService` and use `self._params` directly instead. Signed-off-by: SangwonSUH <[email protected]> * Add docstrings to `NemoSTTService` and related classes Signed-off-by: SangwonSUH <[email protected]> * Fix incorrect model type check in process_frame method Signed-off-by: He Huang (Steve) <[email protected]> --------- Signed-off-by: SangwonSUH <[email protected]> Signed-off-by: He Huang (Steve) <[email protected]> Co-authored-by: He Huang (Steve) <[email protected]>
1 parent 1a3c291 commit 412ab81

File tree

1 file changed

+11
-3
lines changed
  • nemo/agents/voice_agent/pipecat/services/nemo

1 file changed

+11
-3
lines changed

nemo/agents/voice_agent/pipecat/services/nemo/stt.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
from pydantic import BaseModel
3535

3636
from nemo.agents.voice_agent.pipecat.services.nemo.streaming_asr import NemoStreamingASRService
37-
from nemo.agents.voice_agent.pipecat.services.nemo.streaming_diar import NeMoStreamingDiarService
3837

3938
try:
4039
# disable nemo logging
@@ -51,6 +50,8 @@
5150

5251

5352
class NeMoSTTInputParams(BaseModel):
53+
"""Input parameters for NeMo STT service."""
54+
5455
language: Optional[Language] = Language.EN_US
5556
att_context_size: Optional[List] = [70, 1]
5657
frame_len_in_secs: Optional[float] = 0.08 # 80ms for FastConformer model
@@ -60,6 +61,8 @@ class NeMoSTTInputParams(BaseModel):
6061

6162

6263
class NemoSTTService(STTService):
64+
"""NeMo Speech-to-Text service for Pipecat integration."""
65+
6366
def __init__(
6467
self,
6568
*,
@@ -94,7 +97,11 @@ def __init__(
9497
def _load_model(self):
9598
if self._backend == "legacy":
9699
self._model = NemoStreamingASRService(
97-
self._model_name, device=self._device, decoder_type=self._decoder_type
100+
self._model_name,
101+
self._params.att_context_size,
102+
device=self._device,
103+
decoder_type=self._decoder_type,
104+
frame_len_in_secs=self._params.frame_len_in_secs,
98105
)
99106
else:
100107
raise ValueError(f"Invalid ASR backend: {self._backend}")
@@ -258,7 +265,8 @@ async def set_model(self, model: str):
258265
self._load_model()
259266

260267
async def process_frame(self, frame: Frame, direction: FrameDirection):
261-
if isinstance(frame, VADUserStoppedSpeakingFrame) and isinstance(self._model, NeMoStreamingDiarService):
268+
"""Process incoming frames and handle VAD events."""
269+
if isinstance(frame, VADUserStoppedSpeakingFrame) and isinstance(self._model, NemoStreamingASRService):
262270
# manualy reset the state of the model when end of utterance is detected by VAD
263271
logger.debug("Resetting state of the model due to VADUserStoppedSpeakingFrame")
264272
self._model.reset_state()

0 commit comments

Comments
 (0)