diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..74771c1 --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +CORS_ORIGINS=https://example.com,http://localhost:3000 diff --git a/.github/workflows/notify_matrix.yml b/.github/workflows/notify_matrix.yml index 8e6543f..14ef802 100644 --- a/.github/workflows/notify_matrix.yml +++ b/.github/workflows/notify_matrix.yml @@ -11,7 +11,7 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Send message to Matrix bots channel id: matrix-chat-message uses: fadenb/matrix-chat-message@v0.0.6 diff --git a/.github/workflows/publish_stable.yml b/.github/workflows/publish_stable.yml index 107aa64..7cc14cf 100644 --- a/.github/workflows/publish_stable.yml +++ b/.github/workflows/publish_stable.yml @@ -19,12 +19,12 @@ jobs: if: success() # Ensure this job only runs if the previous job succeeds runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 with: - ref: dev + ref: master fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository. - name: Setup Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v6 with: python-version: "3.11" - name: Install Build Tools @@ -47,7 +47,7 @@ jobs: if: success() # Ensure this job only runs if the previous job succeeds runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 with: fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository. ref: master diff --git a/.github/workflows/release_workflow.yml b/.github/workflows/release_workflow.yml index fe6fa96..6435da4 100644 --- a/.github/workflows/release_workflow.yml +++ b/.github/workflows/release_workflow.yml @@ -1,13 +1,13 @@ name: Release Alpha and Propose Stable on: + workflow_dispatch: pull_request: types: [closed] branches: [dev] jobs: publish_alpha: - if: github.event.pull_request.merged == true uses: TigreGotico/gh-automations/.github/workflows/publish-alpha.yml@master secrets: inherit with: @@ -23,7 +23,7 @@ jobs: needs: publish_alpha runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Send message to Matrix bots channel id: matrix-chat-message uses: fadenb/matrix-chat-message@v0.0.6 @@ -39,12 +39,12 @@ jobs: if: success() # Ensure this job only runs if the previous job succeeds runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 with: ref: dev fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository. - name: Setup Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v6 with: python-version: "3.11" - name: Install Build Tools @@ -68,14 +68,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout dev branch - uses: actions/checkout@v3 + uses: actions/checkout@v6 with: ref: dev - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: - python-version: '3.10' + python-version: '3.11' - name: Get version from setup.py id: get_version diff --git a/CHANGELOG.md b/CHANGELOG.md index db71651..05b1d76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,59 @@ # Changelog -## [0.1.4a1](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.4a1) (2025-03-15) +## [0.1.5a10](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a10) (2026-01-09) -[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.3...0.1.4a1) +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a8...0.1.5a10) + +**Closed issues:** + +- ffmpeg requirement? [\#44](https://github.com/OpenVoiceOS/ovos-stt-http-server/issues/44) + +## [0.1.5a8](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a8) (2026-01-09) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a7...0.1.5a8) **Merged pull requests:** -- Fix invalid reference in `gradio_app` [\#28](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/28) ([NeonDaniel](https://github.com/NeonDaniel)) +- modernize: dont write audio to tmp file [\#45](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/45) ([JarbasAl](https://github.com/JarbasAl)) + +## [0.1.5a7](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a7) (2025-12-19) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a4...0.1.5a7) + +**Merged pull requests:** + +- Update dependency ovos-plugin-manager to v2 [\#43](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/43) ([renovate[bot]](https://github.com/apps/renovate)) + +## [0.1.5a4](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a4) (2025-12-18) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a5...0.1.5a4) + +## [0.1.5a5](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a5) (2025-12-18) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a3...0.1.5a5) + +## [0.1.5a3](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a3) (2025-12-18) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a2...0.1.5a3) + +**Merged pull requests:** + +- Update actions/setup-python action to v6 [\#41](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/41) ([renovate[bot]](https://github.com/apps/renovate)) +- Update actions/checkout action to v6 [\#38](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/38) ([renovate[bot]](https://github.com/apps/renovate)) +- Update dependency python to 3.14 [\#37](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/37) ([renovate[bot]](https://github.com/apps/renovate)) + +## [0.1.5a2](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a2) (2025-12-18) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.5a1...0.1.5a2) + +**Merged pull requests:** + +- Configure Renovate [\#36](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/36) ([renovate[bot]](https://github.com/apps/renovate)) +- Add CORS middleware for STT web services [\#35](https://github.com/OpenVoiceOS/ovos-stt-http-server/pull/35) ([suvanbanerjee](https://github.com/suvanbanerjee)) + +## [0.1.5a1](https://github.com/OpenVoiceOS/ovos-stt-http-server/tree/0.1.5a1) (2025-04-05) + +[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-http-server/compare/0.1.4...0.1.5a1) diff --git a/ovos_stt_http_server/__init__.py b/ovos_stt_http_server/__init__.py index 27bb99d..87a667c 100644 --- a/ovos_stt_http_server/__init__.py +++ b/ovos_stt_http_server/__init__.py @@ -10,16 +10,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os from tempfile import NamedTemporaryFile from typing import List, Tuple, Optional, Set, Union from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import PlainTextResponse from ovos_config import Configuration from ovos_plugin_manager.audio_transformers import load_audio_transformer_plugin, AudioLanguageDetector from ovos_plugin_manager.stt import load_stt_plugin +from ovos_plugin_manager.utils.audio import AudioFile, AudioData from ovos_utils.log import LOG -from speech_recognition import AudioData, Recognizer, AudioFile from starlette.requests import Request LOG.set_level("ERROR") # avoid server side logs @@ -90,21 +92,48 @@ def unload_engine(self, lang: str): self.engines.pop(lang) def process_audio(self, audio: AudioData, lang: str): + """ + Transcribes the provided audio using the engine for the specified language. + + Parameters: + audio (AudioData): Audio content to transcribe. + lang (str): Language code identifying which engine to use. + + Returns: + str: Transcribed text for the audio, or an empty string if no transcription is produced. + """ engine = self.get_engine(lang) return engine.execute(audio, language=lang) or "" -def bytes2audiodata(data: bytes) -> AudioData: - recognizer = Recognizer() - with NamedTemporaryFile() as fp: - fp.write(data) - with AudioFile(fp.name) as source: - audio = recognizer.record(source) - return audio - - def create_app(stt_plugin, lang_plugin=None, multi=False, has_gradio=False): + """ + Create and configure a FastAPI app that exposes STT and language-detection endpoints and returns the app with its model container. + + Configures CORS origins from the CORS_ORIGINS environment variable, initializes either a single-model or multi-model container using the provided plugins, and registers three endpoints: + - GET /status: returns service and plugin metadata. + - POST /stt: accepts raw audio bytes in the request body (query params: `lang`, `sample_rate`, `sample_width`), optionally performs language detection when `lang=auto`, and returns transcribed text. + - POST /lang_detect: accepts raw audio bytes and returns detected language and confidence (supports `valid_langs` query param). + + Parameters: + stt_plugin (str): Name or identifier of the STT plugin to load. + lang_plugin (str, optional): Name or identifier of an optional language-detection plugin. Defaults to None. + multi (bool, optional): If True, use a MultiModelContainer (one engine per language); otherwise use a single ModelContainer. Defaults to False. + has_gradio (bool, optional): Flag included in the /status response indicating whether a Gradio UI is available. Defaults to False. + + Returns: + tuple: (app, model) where `app` is the configured FastAPI application and `model` is the initialized ModelContainer or MultiModelContainer instance. + """ app = FastAPI() + cors_origins = os.environ.get("CORS_ORIGINS", "*") + origins = [origin.strip() for origin in cors_origins.split(",")] if cors_origins != "*" else ["*"] + app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) if multi: model = MultiModelContainer(stt_plugin, lang_plugin) else: @@ -119,9 +148,23 @@ def stats(request: Request): @app.post("/stt", response_class=PlainTextResponse) async def get_stt(request: Request): + """ + Handle an STT request: read audio from the request body, determine language if requested, and return the transcription. + + Parameters: + request (Request): HTTP request whose body contains raw audio bytes. Query parameters: + - lang: language code or "auto" (default from Configuration().get("lang", "auto")). + - sample_rate: sample rate in Hz for the audio (default 16000). + - sample_width: sample width in bytes (default 2). + + Returns: + str: Transcribed text from the provided audio, or an empty string if no transcription is produced. + """ lang = str(request.query_params.get("lang", Configuration().get("lang", "auto"))).lower() + sr = int(request.query_params.get("sample_rate", 16000)) + sw = int(request.query_params.get("sample_width", 2)) audio_bytes = await request.body() - audio = bytes2audiodata(audio_bytes) + audio = AudioData(audio_bytes, sr, sw) if lang == "auto": lang, prob = model.detect_language(audio_bytes) return model.process_audio(audio, lang) @@ -143,4 +186,4 @@ def start_stt_server(engine: str, multi: bool = False, has_gradio: bool = False) -> (FastAPI, ModelContainer): app, engine = create_app(engine, lang_engine, multi, has_gradio) - return app, engine + return app, engine \ No newline at end of file diff --git a/ovos_stt_http_server/gradio_app.py b/ovos_stt_http_server/gradio_app.py index 09009c0..7f39d5a 100644 --- a/ovos_stt_http_server/gradio_app.py +++ b/ovos_stt_http_server/gradio_app.py @@ -1,18 +1,30 @@ - import gradio as gr from os.path import join, dirname, basename, splitext, isfile from ovos_utils.log import LOG -from ovos_stt_http_server import ModelContainer, bytes2audiodata +from ovos_stt_http_server import ModelContainer +from ovos_plugin_manager.utils.audio import AudioData STT = None -def transcribe(audio_file, language: str): +def transcribe(audio_file, language: str, sample_rate: int = 16000, sample_width: int = 2): + """ + Transcribe an audio file into text using the configured STT engine. + + Parameters: + audio_file (str): Path to the audio file to transcribe. + language (str): Language code to use for transcription. + sample_rate (int): Sample rate in Hz for the provided audio (default 16000). + sample_width (int): Sample width in bytes for the provided audio (default 2). + + Returns: + transcription (str): The transcribed text, or `None` if the file is missing or invalid. + """ try: with open(audio_file, 'rb') as f: audio = f.read() - return STT.process_audio(bytes2audiodata(audio), language) + return STT.process_audio(AudioData(audio, sample_rate, sample_width), language) except TypeError: LOG.error(f"Requested file not valid: {audio_file}") except FileNotFoundError: @@ -21,7 +33,23 @@ def transcribe(audio_file, language: str): def bind_gradio_service(app, stt_engine: ModelContainer, title, description, info, badge, default_lang="en", cache=True): + """ + Create and mount a Gradio-based transcription UI at /gradio using the provided STT engine. + + Initializes the module STT with the given ModelContainer, prepares available language choices and example audio files, constructs a Gradio Interface configured to call the transcribe function, and mounts that interface to the supplied app at path "/gradio". This function logs a deprecation warning for the Gradio interface. + + Parameters: + app: The web application or framework instance to which the Gradio interface will be mounted. + stt_engine (ModelContainer): Speech-to-text engine container used to perform transcriptions and to obtain available languages. + title (str): Title to display in the Gradio UI. + description (str): Short description shown in the Gradio UI. + info (str): Additional informational HTML or text displayed in the Gradio UI article section. + badge: UI badge metadata (present for API compatibility; not used by this function). + default_lang (str): Preferred default language code; if not available it will be adjusted or replaced with the first available language. + cache (bool): Whether to cache example executions to speed up runtime after initial initialization. + """ global STT + LOG.warning("gradio interface is deprecated and will be removed in a follow up release") STT = stt_engine languages = list(stt_engine.engine.available_languages or [default_lang]) languages.sort() diff --git a/ovos_stt_http_server/version.py b/ovos_stt_http_server/version.py index 7b0dd49..7d14e01 100644 --- a/ovos_stt_http_server/version.py +++ b/ovos_stt_http_server/version.py @@ -1,6 +1,6 @@ # START_VERSION_BLOCK VERSION_MAJOR = 0 VERSION_MINOR = 1 -VERSION_BUILD = 4 -VERSION_ALPHA = 0 +VERSION_BUILD = 5 +VERSION_ALPHA = 10 # END_VERSION_BLOCK diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..5db72dd --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ] +} diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c9010eb..9fa87fa 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,4 +1,4 @@ -ovos-plugin-manager>=0.7.0,<1.0.0 +ovos-plugin-manager>=2.1.1,<3.0.0 fastapi~=0.95 uvicorn~=0.22 gradio~=3.28 diff --git a/setup.py b/setup.py index 5b220f6..7daa4d3 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def get_version(): setup( name='ovos-stt-http-server', version=get_version(), - description='simple aiohttp server to host OpenVoiceOS stt plugins as a service', + description='simple fastapi server to host OpenVoiceOS stt plugins as a service', long_description=long_description, long_description_content_type="text/markdown", url='https://github.com/OpenVoiceOS/ovos-stt-http-server', @@ -61,19 +61,7 @@ def get_version(): classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', - 'Topic :: Text Processing :: Linguistic', 'License :: OSI Approved :: Apache Software License', - - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.0', - 'Programming Language :: Python :: 3.1', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', ], keywords='plugin STT OVOS OpenVoiceOS', entry_points={