Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions ovos_stt_http_server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from ovos_config import Configuration
from ovos_plugin_manager.audio_transformers import load_audio_transformer_plugin, AudioLanguageDetector
from ovos_plugin_manager.stt import load_stt_plugin
from ovos_plugin_manager.utils.audio import AudioFile, AudioData
from ovos_utils.log import LOG
from speech_recognition import AudioData, Recognizer, AudioFile
from starlette.requests import Request

LOG.set_level("ERROR") # avoid server side logs
Expand Down Expand Up @@ -96,15 +96,6 @@ def process_audio(self, audio: AudioData, lang: str):
return engine.execute(audio, language=lang) or ""


def bytes2audiodata(data: bytes) -> AudioData:
recognizer = Recognizer()
with NamedTemporaryFile() as fp:
fp.write(data)
with AudioFile(fp.name) as source:
audio = recognizer.record(source)
return audio


def create_app(stt_plugin, lang_plugin=None, multi=False, has_gradio=False):
app = FastAPI()
cors_origins = os.environ.get("CORS_ORIGINS", "*")
Expand All @@ -131,8 +122,10 @@ def stats(request: Request):
@app.post("/stt", response_class=PlainTextResponse)
async def get_stt(request: Request):
lang = str(request.query_params.get("lang", Configuration().get("lang", "auto"))).lower()
sr = int(request.query_params.get("sample_rate", 16000))
sw = int(request.query_params.get("sample_width", 2))
audio_bytes = await request.body()
audio = bytes2audiodata(audio_bytes)
audio = AudioData(audio_bytes, sr, sw)
if lang == "auto":
lang, prob = model.detect_language(audio_bytes)
return model.process_audio(audio, lang)
Expand Down
8 changes: 5 additions & 3 deletions ovos_stt_http_server/gradio_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@

from os.path import join, dirname, basename, splitext, isfile
from ovos_utils.log import LOG
from ovos_stt_http_server import ModelContainer, bytes2audiodata
from ovos_stt_http_server import ModelContainer
from ovos_plugin_manager.utils.audio import AudioData

STT = None


def transcribe(audio_file, language: str):
def transcribe(audio_file, language: str, sample_rate: int = 16000, sample_width: int = 2):
try:
with open(audio_file, 'rb') as f:
audio = f.read()
return STT.process_audio(bytes2audiodata(audio), language)
return STT.process_audio(AudioData(audio, sample_rate, sample_width), language)
except TypeError:
LOG.error(f"Requested file not valid: {audio_file}")
except FileNotFoundError:
Expand All @@ -22,6 +23,7 @@ def bind_gradio_service(app, stt_engine: ModelContainer,
title, description, info, badge,
default_lang="en", cache=True):
global STT
LOG.warning("gradio interface is deprecated and will be removed in a follow up release")
STT = stt_engine
languages = list(stt_engine.engine.available_languages or [default_lang])
languages.sort()
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ovos-plugin-manager>=2.1.0,<2.2.0
ovos-plugin-manager>=2.1.1,<3.0.0
fastapi~=0.95
uvicorn~=0.22
gradio~=3.28
Expand Down
14 changes: 1 addition & 13 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_version():
setup(
name='ovos-stt-http-server',
version=get_version(),
description='simple aiohttp server to host OpenVoiceOS stt plugins as a service',
description='simple fastapi server to host OpenVoiceOS stt plugins as a service',
long_description=long_description,
long_description_content_type="text/markdown",
url='https://github.com/OpenVoiceOS/ovos-stt-http-server',
Expand All @@ -61,19 +61,7 @@ def get_version():
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Topic :: Text Processing :: Linguistic',
'License :: OSI Approved :: Apache Software License',

'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.0',
'Programming Language :: Python :: 3.1',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],
keywords='plugin STT OVOS OpenVoiceOS',
entry_points={
Expand Down