Skip to content

Commit af36ef0

Browse files
authored
noise cancellation plugin support in VPA & MMA (#1551)
1 parent b7562a6 commit af36ef0

File tree

4 files changed

+27
-2
lines changed

4 files changed

+27
-2
lines changed

.changeset/mighty-dryers-mate.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"livekit-agents": patch
3+
---
4+
5+
support for livekit noise cancellation plugin in VoicePipelineAgent and MultimodalAgent

livekit-agents/livekit/agents/multimodal/multimodal_agent.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ def __init__(
165165
transcription: AgentTranscriptionOptions = AgentTranscriptionOptions(),
166166
max_text_response_retries: int = 5,
167167
loop: asyncio.AbstractEventLoop | None = None,
168+
noise_cancellation: rtc.NoiseCancellationOptions | None = None,
168169
):
169170
"""Create a new MultimodalAgent.
170171
@@ -211,6 +212,8 @@ def __init__(
211212
self._text_response_retries = 0
212213
self._max_text_response_retries = max_text_response_retries
213214

215+
self._noise_cancellation = noise_cancellation
216+
214217
@property
215218
def vad(self) -> vad.VAD | None:
216219
return self._vad
@@ -505,7 +508,12 @@ async def _micro_task(self, track: rtc.LocalAudioTrack) -> None:
505508
if sample_rate is None:
506509
sample_rate = 24000
507510

508-
input_stream = rtc.AudioStream(track, sample_rate=sample_rate, num_channels=1)
511+
input_stream = rtc.AudioStream(
512+
track,
513+
sample_rate=sample_rate,
514+
num_channels=1,
515+
noise_cancellation=self._noise_cancellation,
516+
)
509517
async for ev in input_stream:
510518
self._input_audio_ch.send_nowait(ev.frame)
511519

livekit-agents/livekit/agents/pipeline/human_input.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def __init__(
2828
stt: speech_to_text.STT,
2929
participant: rtc.RemoteParticipant,
3030
transcription: bool,
31+
noise_cancellation: rtc.NoiseCancellationOptions | None = None,
3132
) -> None:
3233
super().__init__()
3334
self._room, self._vad, self._stt, self._participant, self._transcription = (
@@ -37,6 +38,7 @@ def __init__(
3738
participant,
3839
transcription,
3940
)
41+
self._noise_cancellation = noise_cancellation
4042
self._subscribed_track: rtc.RemoteAudioTrack | None = None
4143
self._recognize_atask: asyncio.Task[None] | None = None
4244

@@ -87,7 +89,13 @@ def _subscribe_to_microphone(self, *args, **kwargs) -> None:
8789
self._recognize_atask.cancel()
8890

8991
self._recognize_atask = asyncio.create_task(
90-
self._recognize_task(rtc.AudioStream(track, sample_rate=16000))
92+
self._recognize_task(
93+
rtc.AudioStream(
94+
track,
95+
sample_rate=16000,
96+
noise_cancellation=self._noise_cancellation,
97+
)
98+
)
9199
)
92100
break
93101

livekit-agents/livekit/agents/pipeline/pipeline_agent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def __init__(
184184
stt: stt.STT,
185185
llm: LLM,
186186
tts: tts.TTS,
187+
noise_cancellation: rtc.NoiseCancellationOptions | None = None,
187188
turn_detector: _TurnDetector | None = None,
188189
chat_ctx: ChatContext | None = None,
189190
fnc_ctx: FunctionContext | None = None,
@@ -310,6 +311,8 @@ def __init__(
310311
self._last_final_transcript_time: float | None = None
311312
self._last_speech_time: float | None = None
312313

314+
self._noise_cancellation = noise_cancellation
315+
313316
@property
314317
def fnc_ctx(self) -> FunctionContext | None:
315318
return self._fnc_ctx
@@ -559,6 +562,7 @@ def _link_participant(self, identity: str) -> None:
559562
stt=self._stt,
560563
participant=participant,
561564
transcription=self._opts.transcription.user_transcription,
565+
noise_cancellation=self._noise_cancellation,
562566
)
563567

564568
def _on_start_of_speech(ev: vad.VADEvent) -> None:

0 commit comments

Comments
 (0)