OpenVoiceOS · github-actions · Apr 5, 2025 · Apr 5, 2025 · Apr 5, 2025 · Apr 5, 2025
diff --git a/.env.example b/.env.example
@@ -0,0 +1 @@
+CORS_ORIGINS=https://example.com,http://localhost:3000
diff --git a/.github/workflows/notify_matrix.yml b/.github/workflows/notify_matrix.yml
@@ -11,7 +11,7 @@ jobs:
     if: github.event.pull_request.merged == true
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v6
       - name: Send message to Matrix bots channel
         id: matrix-chat-message
         uses: fadenb/matrix-chat-message@v0.0.6

diff --git a/.github/workflows/publish_stable.yml b/.github/workflows/publish_stable.yml
@@ -19,12 +19,12 @@ jobs:
     if: success()  # Ensure this job only runs if the previous job succeeds
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v6
         with:
-          ref: dev
+          ref: master
           fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
       - name: Setup Python
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v6
         with:
           python-version: "3.11"
       - name: Install Build Tools
@@ -47,7 +47,7 @@ jobs:
     if: success()  # Ensure this job only runs if the previous job succeeds
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
           ref: master

diff --git a/.github/workflows/release_workflow.yml b/.github/workflows/release_workflow.yml
@@ -1,13 +1,13 @@
 name: Release Alpha and Propose Stable
 
 on:
+  workflow_dispatch:
   pull_request:
     types: [closed]
     branches: [dev]
 
 jobs:
   publish_alpha:
-    if: github.event.pull_request.merged == true
     uses: TigreGotico/gh-automations/.github/workflows/publish-alpha.yml@master
     secrets: inherit
     with:
@@ -23,7 +23,7 @@ jobs:
     needs: publish_alpha
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v6
       - name: Send message to Matrix bots channel
         id: matrix-chat-message
         uses: fadenb/matrix-chat-message@v0.0.6
@@ -39,12 +39,12 @@ jobs:
     if: success()  # Ensure this job only runs if the previous job succeeds
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v6
         with:
           ref: dev
           fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
       - name: Setup Python
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v6
         with:
           python-version: "3.11"
       - name: Install Build Tools
@@ -68,14 +68,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout dev branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v6
         with:
           ref: dev
 
       - name: Setup Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v6
         with:
-          python-version: '3.10'
+          python-version: '3.11'
 
       - name: Get version from setup.py
         id: get_version

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,12 +1,59 @@
 # Changelog
 
-## [0.1.4a1](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.4a1) (2025-03-15)
+## [0.1.5a10](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a10) (2026-01-09)
 
-[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.3...0.1.4a1)
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a8...0.1.5a10)
+
+**Closed issues:**
+
+- ffmpeg requirement? [\#44](https://github.com/OpenVoiceOS/ovos-stt-http-server/issues/44)
+
+## [0.1.5a8](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a8) (2026-01-09)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a7...0.1.5a8)
 
 **Merged pull requests:**
 
-- Fix invalid reference in `gradio_app` [\#28](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/28) ([NeonDaniel](https://github.com/NeonDaniel))
+- modernize: dont write audio to tmp file [\#45](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/45) ([JarbasAl](https://github.com/JarbasAl))
+
+## [0.1.5a7](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a7) (2025-12-19)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a4...0.1.5a7)
+
+**Merged pull requests:**
+
+- Update dependency ovos-plugin-manager to v2 [\#43](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/43) ([renovate[bot]](https://github.com/apps/renovate))
+
+## [0.1.5a4](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a4) (2025-12-18)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a5...0.1.5a4)
+
+## [0.1.5a5](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a5) (2025-12-18)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a3...0.1.5a5)
+
+## [0.1.5a3](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a3) (2025-12-18)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a2...0.1.5a3)
+
+**Merged pull requests:**
+
+- Update actions/setup-python action to v6 [\#41](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/41) ([renovate[bot]](https://github.com/apps/renovate))
+- Update actions/checkout action to v6 [\#38](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/38) ([renovate[bot]](https://github.com/apps/renovate))
+- Update dependency python to 3.14 [\#37](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/37) ([renovate[bot]](https://github.com/apps/renovate))
+
+## [0.1.5a2](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a2) (2025-12-18)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a1...0.1.5a2)
+
+**Merged pull requests:**
+
+- Configure Renovate [\#36](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/36) ([renovate[bot]](https://github.com/apps/renovate))
+- Add CORS middleware for STT web services [\#35](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/35) ([suvanbanerjee](https://github.com/suvanbanerjee))
+
+## [0.1.5a1](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a1) (2025-04-05)
+
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.4...0.1.5a1)
 
 
 

diff --git a/ovos_stt_http_server/__init__.py b/ovos_stt_http_server/__init__.py
@@ -10,16 +10,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
 from tempfile import NamedTemporaryFile
 
 from typing import List, Tuple, Optional, Set, Union
 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import PlainTextResponse
 from ovos_config import Configuration
 from ovos_plugin_manager.audio_transformers import load_audio_transformer_plugin, AudioLanguageDetector
 from ovos_plugin_manager.stt import load_stt_plugin
+from ovos_plugin_manager.utils.audio import AudioFile, AudioData
 from ovos_utils.log import LOG
-from speech_recognition import AudioData, Recognizer, AudioFile
 from starlette.requests import Request
 
 LOG.set_level("ERROR")  # avoid server side logs
@@ -90,21 +92,48 @@ def unload_engine(self, lang: str):
             self.engines.pop(lang)
 
     def process_audio(self, audio: AudioData, lang: str):
+        """
+        Transcribes the provided audio using the engine for the specified language.
+
+        Parameters:
+            audio (AudioData): Audio content to transcribe.
+            lang (str): Language code identifying which engine to use.
+
+        Returns:
+            str: Transcribed text for the audio, or an empty string if no transcription is produced.
+        """
         engine = self.get_engine(lang)
         return engine.execute(audio, language=lang) or ""
 
 
-def bytes2audiodata(data: bytes) -> AudioData:
-    recognizer = Recognizer()
-    with NamedTemporaryFile() as fp:
-        fp.write(data)
-        with AudioFile(fp.name) as source:
-            audio = recognizer.record(source)
-    return audio
-
-
 def create_app(stt_plugin, lang_plugin=None, multi=False, has_gradio=False):
+    """
+    Create and configure a FastAPI app that exposes STT and language-detection endpoints and returns the app with its model container.
+
+    Configures CORS origins from the CORS_ORIGINS environment variable, initializes either a single-model or multi-model container using the provided plugins, and registers three endpoints:
+    - GET /status: returns service and plugin metadata.
+    - POST /stt: accepts raw audio bytes in the request body (query params: `lang`, `sample_rate`, `sample_width`), optionally performs language detection when `lang=auto`, and returns transcribed text.
+    - POST /lang_detect: accepts raw audio bytes and returns detected language and confidence (supports `valid_langs` query param).
+
+    Parameters:
+        stt_plugin (str): Name or identifier of the STT plugin to load.
+        lang_plugin (str, optional): Name or identifier of an optional language-detection plugin. Defaults to None.
+        multi (bool, optional): If True, use a MultiModelContainer (one engine per language); otherwise use a single ModelContainer. Defaults to False.
+        has_gradio (bool, optional): Flag included in the /status response indicating whether a Gradio UI is available. Defaults to False.
+
+    Returns:
+        tuple: (app, model) where `app` is the configured FastAPI application and `model` is the initialized ModelContainer or MultiModelContainer instance.
+    """
     app = FastAPI()
+    cors_origins = os.environ.get("CORS_ORIGINS", "*")
+    origins = [origin.strip() for origin in cors_origins.split(",")] if cors_origins != "*" else ["*"]
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=origins,
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
     if multi:
         model = MultiModelContainer(stt_plugin, lang_plugin)
     else:
@@ -119,9 +148,23 @@ def stats(request: Request):
 
     @app.post("/stt", response_class=PlainTextResponse)
     async def get_stt(request: Request):
+        """
+        Handle an STT request: read audio from the request body, determine language if requested, and return the transcription.
+
+        Parameters:
+            request (Request): HTTP request whose body contains raw audio bytes. Query parameters:
+                - lang: language code or "auto" (default from Configuration().get("lang", "auto")).
+                - sample_rate: sample rate in Hz for the audio (default 16000).
+                - sample_width: sample width in bytes (default 2).
+
+        Returns:
+            str: Transcribed text from the provided audio, or an empty string if no transcription is produced.
+        """
         lang = str(request.query_params.get("lang", Configuration().get("lang", "auto"))).lower()
+        sr = int(request.query_params.get("sample_rate", 16000))
+        sw = int(request.query_params.get("sample_width", 2))
         audio_bytes = await request.body()
-        audio = bytes2audiodata(audio_bytes)
+        audio = AudioData(audio_bytes, sr, sw)
         if lang == "auto":
             lang, prob = model.detect_language(audio_bytes)
         return model.process_audio(audio, lang)
@@ -143,4 +186,4 @@ def start_stt_server(engine: str,
                      multi: bool = False,
                      has_gradio: bool = False) -> (FastAPI, ModelContainer):
     app, engine = create_app(engine, lang_engine, multi, has_gradio)
-    return app, engine
+    return app, engine
diff --git a/ovos_stt_http_server/gradio_app.py b/ovos_stt_http_server/gradio_app.py
@@ -1,18 +1,30 @@
-
 import gradio as gr
 
 from os.path import join, dirname, basename, splitext, isfile
 from ovos_utils.log import LOG
-from ovos_stt_http_server import ModelContainer, bytes2audiodata
+from ovos_stt_http_server import ModelContainer
+from ovos_plugin_manager.utils.audio import AudioData
 
 STT = None
 
 
-def transcribe(audio_file, language: str):
+def transcribe(audio_file, language: str, sample_rate: int = 16000, sample_width: int = 2):
+    """
+    Transcribe an audio file into text using the configured STT engine.
+
+    Parameters:
+        audio_file (str): Path to the audio file to transcribe.
+        language (str): Language code to use for transcription.
+        sample_rate (int): Sample rate in Hz for the provided audio (default 16000).
+        sample_width (int): Sample width in bytes for the provided audio (default 2).
+
+    Returns:
+        transcription (str): The transcribed text, or `None` if the file is missing or invalid.
+    """
     try:
         with open(audio_file, 'rb') as f:
             audio = f.read()
-        return STT.process_audio(bytes2audiodata(audio), language)
+        return STT.process_audio(AudioData(audio, sample_rate, sample_width), language)
     except TypeError:
         LOG.error(f"Requested file not valid: {audio_file}")
     except FileNotFoundError:
@@ -21,7 +33,23 @@ def transcribe(audio_file, language: str):
 def bind_gradio_service(app, stt_engine: ModelContainer,
                         title, description, info, badge,
                         default_lang="en", cache=True):
+    """
+    Create and mount a Gradio-based transcription UI at /gradio using the provided STT engine.
+
+    Initializes the module STT with the given ModelContainer, prepares available language choices and example audio files, constructs a Gradio Interface configured to call the transcribe function, and mounts that interface to the supplied app at path "/gradio". This function logs a deprecation warning for the Gradio interface.
+
+    Parameters:
+        app: The web application or framework instance to which the Gradio interface will be mounted.
+        stt_engine (ModelContainer): Speech-to-text engine container used to perform transcriptions and to obtain available languages.
+        title (str): Title to display in the Gradio UI.
+        description (str): Short description shown in the Gradio UI.
+        info (str): Additional informational HTML or text displayed in the Gradio UI article section.
+        badge: UI badge metadata (present for API compatibility; not used by this function).
+        default_lang (str): Preferred default language code; if not available it will be adjusted or replaced with the first available language.
+        cache (bool): Whether to cache example executions to speed up runtime after initial initialization.
+    """
     global STT
+    LOG.warning("gradio interface is deprecated and will be removed in a follow up release")
     STT = stt_engine
     languages = list(stt_engine.engine.available_languages or [default_lang])
     languages.sort()

diff --git a/ovos_stt_http_server/version.py b/ovos_stt_http_server/version.py
@@ -1,6 +1,6 @@
 # START_VERSION_BLOCK
 VERSION_MAJOR = 0
 VERSION_MINOR = 1
-VERSION_BUILD = 4
-VERSION_ALPHA = 0
+VERSION_BUILD = 5
+VERSION_ALPHA = 10
 # END_VERSION_BLOCK
diff --git a/renovate.json b/renovate.json
@@ -0,0 +1,6 @@
+{
+  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
+  "extends": [
+    "config:recommended"
+  ]
+}
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,4 +1,4 @@
-ovos-plugin-manager>=0.7.0,<1.0.0
+ovos-plugin-manager>=2.1.1,<3.0.0
 fastapi~=0.95
 uvicorn~=0.22
 gradio~=3.28

diff --git a/setup.py b/setup.py
@@ -47,7 +47,7 @@ def get_version():
 setup(
     name='ovos-stt-http-server',
     version=get_version(),
-    description='simple aiohttp server to host OpenVoiceOS stt plugins as a service',
+    description='simple fastapi server to host OpenVoiceOS stt plugins as a service',
     long_description=long_description,
     long_description_content_type="text/markdown",
     url='https://github.com/OpenVoiceOS/ovos-stt-http-server',
@@ -61,19 +61,7 @@ def get_version():
     classifiers=[
         'Development Status :: 3 - Alpha',
         'Intended Audience :: Developers',
-        'Topic :: Text Processing :: Linguistic',
         'License :: OSI Approved :: Apache Software License',
-
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.0',
-        'Programming Language :: Python :: 3.1',
-        'Programming Language :: Python :: 3.2',
-        'Programming Language :: Python :: 3.3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
     ],
     keywords='plugin STT OVOS OpenVoiceOS',
     entry_points={
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		CORS_ORIGINS=https://example.com,http://localhost:3000