Fix model selection for non-English (#4362)

synesthesiam · web-flow · commit e56d47a79d73 · 2026-01-30T16:34:52.000-05:00
* Fix model selection for language

* Fix model selection for non-English
diff --git a/whisper/CHANGELOG.md b/whisper/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## 3.1.0
+
+- Fix model selection for language
+- Prefer Parakeet only for English (detection fails for other languages)
+- Add missing `onnx_asr` dependency
+
 ## 3.0.1
 
 - Add support for `sherpa-onnx` and Nvidia's parakeet model
diff --git a/whisper/Dockerfile b/whisper/Dockerfile
@@ -22,6 +22,7 @@ RUN \
         "wyoming[zeroconf]==1.8.0" \
         "wyoming-faster-whisper[sherpa] @ https://github.com/rhasspy/wyoming-faster-whisper/archive/refs/tags/v${WYOMING_WHISPER_VERSION}.tar.gz" \
         'transformers==4.52.4' \
+        'onnx-asr[cpu,hub]==0.7.0' \
     \
     && pip3 install --no-cache-dir \
         --index-url 'https://download.pytorch.org/whl/cpu' \
diff --git a/whisper/build.yaml b/whisper/build.yaml
@@ -3,4 +3,4 @@ build_from:
   amd64: ghcr.io/home-assistant/amd64-base-debian:bookworm
   aarch64: ghcr.io/home-assistant/aarch64-base-debian:bookworm
 args:
-  WYOMING_WHISPER_VERSION: 3.0.1
+  WYOMING_WHISPER_VERSION: 3.1.0
diff --git a/whisper/config.yaml b/whisper/config.yaml
@@ -1,5 +1,5 @@
 ---
-version: 3.0.1
+version: 3.1.0
 slug: whisper
 name: Whisper
 description: Speech-to-text with Whisper
diff --git a/whisper/translations/en.yaml b/whisper/translations/en.yaml
@@ -3,18 +3,17 @@ configuration:
   beam_size:
     name: Beam size
     description: >-
-      Number of candidates to consider simultaneously during transcription.
-      Increasing the beam size will increase accuracy at the cost of
+      Number of candidates to consider simultaneously during transcription (0 =
+      auto).  Increasing the beam size will increase accuracy at the cost of
       performance.
   language:
     name: Language
     description: >-
-      Language that you will speak to the app. If you select "auto",
-      the model will run much slower but will auto-detect the spoken language.
+      Language to pre-load model for during start-up.
   model:
     name: Model
     description: |
-      Whisper model that will be used for transcription.
+      Whisper model that will be used for transcription (faster-whisper only).
 
       The default model is `tiny-int8`, a compressed version of the smallest
       Whisper model which is able to run on a Raspberry Pi 4. Compressed models

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ RUN \`
`22`	`22`	`"wyoming[zeroconf]==1.8.0" \`
`23`	`23`	`"wyoming-faster-whisper[sherpa] @ https://github.com/rhasspy/wyoming-faster-whisper/archive/refs/tags/v${WYOMING_WHISPER_VERSION}.tar.gz" \`
`24`	`24`	`'transformers==4.52.4' \`
	`25`	`+ 'onnx-asr[cpu,hub]==0.7.0' \`
`25`	`26`	`\`
`26`	`27`	`&& pip3 install --no-cache-dir \`
`27`	`28`	`--index-url 'https://download.pytorch.org/whl/cpu' \`