noise cancellation plugin support in VPA & MMA (#1551)

bcherry · web-flow · commit af36ef0e9636 · 2025-03-04T17:21:54.000-08:00
diff --git a/.changeset/mighty-dryers-mate.md b/.changeset/mighty-dryers-mate.md
@@ -0,0 +1,5 @@
+---
+"livekit-agents": patch
+---
+
+support for livekit noise cancellation plugin in VoicePipelineAgent and MultimodalAgent
diff --git a/livekit-agents/livekit/agents/multimodal/multimodal_agent.py b/livekit-agents/livekit/agents/multimodal/multimodal_agent.py
@@ -165,6 +165,7 @@ def __init__(
         transcription: AgentTranscriptionOptions = AgentTranscriptionOptions(),
         max_text_response_retries: int = 5,
         loop: asyncio.AbstractEventLoop | None = None,
+        noise_cancellation: rtc.NoiseCancellationOptions | None = None,
     ):
         """Create a new MultimodalAgent.
 
@@ -211,6 +212,8 @@ def __init__(
         self._text_response_retries = 0
         self._max_text_response_retries = max_text_response_retries
 
+        self._noise_cancellation = noise_cancellation
+
     @property
     def vad(self) -> vad.VAD | None:
         return self._vad
@@ -505,7 +508,12 @@ async def _micro_task(self, track: rtc.LocalAudioTrack) -> None:
         if sample_rate is None:
             sample_rate = 24000
 
-        input_stream = rtc.AudioStream(track, sample_rate=sample_rate, num_channels=1)
+        input_stream = rtc.AudioStream(
+            track,
+            sample_rate=sample_rate,
+            num_channels=1,
+            noise_cancellation=self._noise_cancellation,
+        )
         async for ev in input_stream:
             self._input_audio_ch.send_nowait(ev.frame)
 
diff --git a/livekit-agents/livekit/agents/pipeline/human_input.py b/livekit-agents/livekit/agents/pipeline/human_input.py
@@ -28,6 +28,7 @@ def __init__(
         stt: speech_to_text.STT,
         participant: rtc.RemoteParticipant,
         transcription: bool,
+        noise_cancellation: rtc.NoiseCancellationOptions | None = None,
     ) -> None:
         super().__init__()
         self._room, self._vad, self._stt, self._participant, self._transcription = (
@@ -37,6 +38,7 @@ def __init__(
             participant,
             transcription,
         )
+        self._noise_cancellation = noise_cancellation
         self._subscribed_track: rtc.RemoteAudioTrack | None = None
         self._recognize_atask: asyncio.Task[None] | None = None
 
@@ -87,7 +89,13 @@ def _subscribe_to_microphone(self, *args, **kwargs) -> None:
                     self._recognize_atask.cancel()
 
                 self._recognize_atask = asyncio.create_task(
-                    self._recognize_task(rtc.AudioStream(track, sample_rate=16000))
+                    self._recognize_task(
+                        rtc.AudioStream(
+                            track,
+                            sample_rate=16000,
+                            noise_cancellation=self._noise_cancellation,
+                        )
+                    )
                 )
                 break
 
diff --git a/livekit-agents/livekit/agents/pipeline/pipeline_agent.py b/livekit-agents/livekit/agents/pipeline/pipeline_agent.py
@@ -184,6 +184,7 @@ def __init__(
         stt: stt.STT,
         llm: LLM,
         tts: tts.TTS,
+        noise_cancellation: rtc.NoiseCancellationOptions | None = None,
         turn_detector: _TurnDetector | None = None,
         chat_ctx: ChatContext | None = None,
         fnc_ctx: FunctionContext | None = None,
@@ -310,6 +311,8 @@ def __init__(
         self._last_final_transcript_time: float | None = None
         self._last_speech_time: float | None = None
 
+        self._noise_cancellation = noise_cancellation
+
     @property
     def fnc_ctx(self) -> FunctionContext | None:
         return self._fnc_ctx
@@ -559,6 +562,7 @@ def _link_participant(self, identity: str) -> None:
             stt=self._stt,
             participant=participant,
             transcription=self._opts.transcription.user_transcription,
+            noise_cancellation=self._noise_cancellation,
         )
 
         def _on_start_of_speech(ev: vad.VADEvent) -> None:

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"livekit-agents": patch
 +---
++
 +support for livekit noise cancellation plugin in VoicePipelineAgent and MultimodalAgent