Skip to content

Commit 12a1748

Browse files
committed
quick api
1 parent 4000a42 commit 12a1748

File tree

7 files changed

+155
-1
lines changed

7 files changed

+155
-1
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
FROM python:3.10.19-slim
2+
3+
WORKDIR /app
4+
5+
# System dependencies
6+
RUN apt-get update && apt-get install -y --no-install-recommends \
7+
ffmpeg \
8+
espeak-ng \
9+
libportaudio2 \
10+
python3-pyaudio \
11+
&& rm -rf /var/lib/apt/lists/*
12+
13+
# Python dependencies
14+
COPY browser_tests/run_models/requirements.txt requirements.txt
15+
RUN pip install --no-cache-dir -r requirements.txt
16+
17+
# Copy the rest of the files
18+
COPY models/ models/
19+
COPY scripts/ scripts/
20+
COPY browser_tests/run_models/main.py main.py
21+
22+
EXPOSE 8080
23+
24+
CMD ["gunicorn", "--workers=2", "--threads=1", "--bind=0.0.0.0:8080", "--timeout", "6000", "main:app"]

browser_tests/run_models/main.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import os
2+
import sys
3+
import uuid
4+
from functools import wraps
5+
6+
if not os.path.exists(os.environ["GOOGLE_APPLICATION_CREDENTIALS"]):
7+
with open(os.environ["GOOGLE_APPLICATION_CREDENTIALS"], "w") as f:
8+
f.write(os.environ["GOOGLE_APPLICATION_CREDENTIALS_FILE"])
9+
10+
from flask import Flask, request
11+
from werkzeug.utils import secure_filename
12+
13+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
14+
from scripts.asr.deepspeech import deepspeech_transcribe_from_file
15+
from scripts.asr.google_speech import google_transcribe_from_file
16+
17+
app = Flask(__name__)
18+
19+
20+
def api_key_required(f):
21+
@wraps(f)
22+
def decorated_function(*args, **kwargs):
23+
api_key = str(request.headers.get("Authorization")).split(" ")[-1]
24+
if not api_key:
25+
return "API Key is missing", 401
26+
if api_key != os.environ.get("API_KEY", "secret"):
27+
return "Invalid API Key", 403
28+
return f(*args, **kwargs)
29+
30+
return decorated_function
31+
32+
33+
@app.route("/api/v1/asr/<model>", methods=["POST"])
34+
@api_key_required
35+
def run_asr(model: str):
36+
MODELS = {
37+
"deepspeech": deepspeech_transcribe_from_file,
38+
"google": lambda f: google_transcribe_from_file(f)
39+
.results[0]
40+
.alternatives[0]
41+
.transcript,
42+
}
43+
44+
if model not in MODELS:
45+
return f"Invalid model '{model}', must choose one from {MODELS.keys()}", 400
46+
47+
if "audio_file" not in request.files or not request.files["audio_file"].filename:
48+
return "No audio file part in the request", 400
49+
50+
file = request.files["audio_file"]
51+
filename = secure_filename(file.filename) # type: ignore
52+
filepath = os.path.join(
53+
os.path.dirname(__file__), str(uuid.uuid4()) + "_" + filename
54+
)
55+
file.save(filepath)
56+
57+
try:
58+
return MODELS[model](filepath), 200
59+
except Exception as e:
60+
return f"Transcription failed: {e}", 500
61+
finally:
62+
os.remove(filepath)
63+
64+
65+
if __name__ == "__main__":
66+
from dotenv import load_dotenv
67+
68+
load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
69+
70+
# run dev server with: python browser_tests/run_models/main.py
71+
# test with: curl -X POST -F "audio_file=@data/ExamplesWithComments/TIMIT_sample_0.wav" http://127.0.0.1:5000/api/v1/asr/deepspeech -H "Authorization: Bearer secret"
72+
app.run(debug=True)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
flask==3.1.2
2+
stt==1.4.0
3+
google-cloud-speech==2.34.0
4+
google-cloud-storage==2.19.0
5+
ffmpeg-python==0.2.0
6+
sounddevice==0.5.3
7+
scipy==1.15.3
8+
gunicorn==23.0.0
9+
numpy==1.24.4
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
docker build --platform=linux/amd64 --tag 'koel-api' -f ./Dockerfile ../..
4+
docker run -t -i -p 8080:8080 'koel-api'

browser_tests/run_models/test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
import requests
3+
from dotenv import load_dotenv
4+
5+
load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
6+
7+
model = "google" # or "deepspeech"
8+
9+
# url = f"http://127.0.0.1:5000/api/v1/asr/{model}"
10+
url = f"https://koel-api.fly.dev/api/v1/asr/{model}"
11+
headers = {"Authorization": f"Bearer {os.environ['API_KEY']}"}
12+
13+
files = {"audio_file": open("data/ExamplesWithComments/TIMIT_sample_0.wav", "rb")}
14+
15+
response = requests.post(url, headers=headers, files=files)
16+
17+
print(response.status_code)
18+
print(response.text)

fly.toml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# fly.toml app configuration file generated for koel-api on 2025-11-28T18:33:04-08:00
2+
#
3+
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
4+
#
5+
6+
app = 'koel-api'
7+
primary_region = 'sjc'
8+
9+
[build]
10+
dockerfile = './browser_tests/run_models/Dockerfile'
11+
12+
[http_service]
13+
internal_port = 8080
14+
force_https = true
15+
auto_stop_machines = 'stop'
16+
auto_start_machines = true
17+
min_machines_running = 0
18+
processes = ['app']
19+
20+
[[vm]]
21+
memory = '4gb'
22+
cpu_kind = 'performance'
23+
cpus = 2

scripts/core/load_secrets.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import os
2-
from dotenv import load_dotenv
2+
3+
try:
4+
from dotenv import load_dotenv
5+
except ImportError:
6+
load_dotenv = lambda _: None
37

48

59
def load_secrets():

0 commit comments

Comments
 (0)