code
diff --git a/‎homeassistant/components/assist_pipeline/audio_enhancer.py‎
Lines changed: 39 additions & 10 deletions b/‎homeassistant/components/assist_pipeline/audio_enhancer.py‎
Lines changed: 39 additions & 10 deletions
diff --git a/‎homeassistant/components/assist_pipeline/manifest.json‎
Lines changed: 1 addition & 1 deletion b/‎homeassistant/components/assist_pipeline/manifest.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎homeassistant/components/assist_pipeline/pipeline.py‎
Lines changed: 2 additions & 2 deletions b/‎homeassistant/components/assist_pipeline/pipeline.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎homeassistant/package_constraints.txt‎
Lines changed: 1 addition & 1 deletion b/‎homeassistant/package_constraints.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements_all.txt‎
Lines changed: 3 additions & 3 deletions b/‎requirements_all.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎requirements_test_all.txt‎
Lines changed: 3 additions & 3 deletions b/‎requirements_test_all.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎script/hassfest/docker/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎script/hassfest/docker/Dockerfile‎
Lines changed: 1 addition & 1 deletion
@@ -3,8 +3,9 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 import logging
+import math
 
-from pymicro_vad import MicroVad
+from pysilero_vad import SileroVoiceActivityDetector
 from pyspeex_noise import AudioProcessor
 
 from .const import BYTES_PER_CHUNK
@@ -42,8 +43,8 @@ def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
         """Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples."""
 
 
-class MicroVadSpeexEnhancer(AudioEnhancer):
-    """Audio enhancer that runs microVAD and speex."""
+class SileroVadSpeexEnhancer(AudioEnhancer):
+    """Audio enhancer that runs Silero VAD and speex."""
 
     def __init__(
         self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
@@ -69,21 +70,49 @@ def __init__(
                 self.noise_suppression,
             )
 
-        self.vad: MicroVad | None = None
+        self.vad: SileroVoiceActivityDetector | None = None
+
+        # We get 10ms chunks but Silero works on 32ms chunks, so we have to
+        # buffer audio. The previous speech probability is used until enough
+        # audio has been buffered.
+        self._vad_buffer: bytearray | None = None
+        self._vad_buffer_chunks = 0
+        self._vad_buffer_chunk_idx = 0
+        self._last_speech_probability: float | None = None
 
         if self.is_vad_enabled:
-            self.vad = MicroVad()
-            _LOGGER.debug("Initialized microVAD")
+            self.vad = SileroVoiceActivityDetector()
+
+            # VAD buffer is a multiple of 10ms, but Silero VAD needs 32ms.
+            self._vad_buffer_chunks = int(
+                math.ceil(self.vad.chunk_bytes() / BYTES_PER_CHUNK)
+            )
+            self._vad_leftover_bytes = self.vad.chunk_bytes() - BYTES_PER_CHUNK
+            self._vad_buffer = bytearray(self.vad.chunk_bytes())
+            _LOGGER.debug("Initialized Silero VAD")
 
     def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
         """Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
-        speech_probability: float | None = None
-
         assert len(audio) == BYTES_PER_CHUNK
 
         if self.vad is not None:
             # Run VAD
-            speech_probability = self.vad.Process10ms(audio)
+            assert self._vad_buffer is not None
+            start_idx = self._vad_buffer_chunk_idx * BYTES_PER_CHUNK
+            self._vad_buffer[start_idx : start_idx + BYTES_PER_CHUNK] = audio
+
+            self._vad_buffer_chunk_idx += 1
+            if self._vad_buffer_chunk_idx >= self._vad_buffer_chunks:
+                # We have enough data to run Silero VAD (32 ms)
+                self._last_speech_probability = self.vad.process_chunk(
+                    self._vad_buffer[: self.vad.chunk_bytes()]
+                )
+
+                # Copy leftover audio that wasn't processed to start
+                self._vad_buffer[: self._vad_leftover_bytes] = self._vad_buffer[
+                    -self._vad_leftover_bytes :
+                ]
+                self._vad_buffer_chunk_idx = 0
 
         if self.audio_processor is not None:
             # Run noise suppression and auto gain
@@ -92,5 +121,5 @@ def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
         return EnhancedAudioChunk(
             audio=audio,
             timestamp_ms=timestamp_ms,
-            speech_probability=speech_probability,
+            speech_probability=self._last_speech_probability,
         )
@@ -8,5 +8,5 @@
   "integration_type": "system",
   "iot_class": "local_push",
   "quality_scale": "internal",
-  "requirements": ["pymicro-vad==1.0.1", "pyspeex-noise==1.0.2"]
+  "requirements": ["pysilero-vad==3.0.0", "pyspeex-noise==1.0.2"]
 }
@@ -55,7 +55,7 @@
 from homeassistant.util.hass_dict import HassKey
 from homeassistant.util.limited_size_dict import LimitedSizeDict
 
-from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, MicroVadSpeexEnhancer
+from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, SileroVadSpeexEnhancer
 from .const import (
     ACKNOWLEDGE_PATH,
     BYTES_PER_CHUNK,
@@ -633,7 +633,7 @@ def __post_init__(self) -> None:
         # Initialize with audio settings
         if self.audio_settings.needs_processor and (self.audio_enhancer is None):
             # Default audio enhancer
-            self.audio_enhancer = MicroVadSpeexEnhancer(
+            self.audio_enhancer = SileroVadSpeexEnhancer(
                 self.audio_settings.auto_gain_dbfs,
                 self.audio_settings.noise_suppression_level,
                 self.audio_settings.is_vad_enabled,
 
@@ -53,10 +53,10 @@ Pillow==12.0.0
 propcache==0.4.1
 psutil-home-assistant==0.0.1
 PyJWT==2.10.1
-pymicro-vad==1.0.1
 PyNaCl==1.6.0
 pyOpenSSL==25.3.0
 pyserial==3.5
+pysilero-vad==3.0.0
 pyspeex-noise==1.0.2
 python-slugify==8.0.4
 PyTurboJPEG==1.8.0
Original file line number	Diff line number	Diff line change
`@@ -8,5 +8,5 @@`
`8`	`8`	`"integration_type": "system",`
`9`	`9`	`"iot_class": "local_push",`
`10`	`10`	`"quality_scale": "internal",`
`11`		`- "requirements": ["pymicro-vad==1.0.1", "pyspeex-noise==1.0.2"]`
	`11`	`+ "requirements": ["pysilero-vad==3.0.0", "pyspeex-noise==1.0.2"]`
`12`	`12`	`}`