quick api

SanderGi · SanderGi · commit 12a1748c8ac5 · 2025-11-28T19:57:37.000-08:00
diff --git a/browser_tests/run_models/Dockerfile b/browser_tests/run_models/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.10.19-slim
+
+WORKDIR /app
+
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    espeak-ng \
+    libportaudio2 \
+    python3-pyaudio \
+ && rm -rf /var/lib/apt/lists/*
+
+# Python dependencies
+COPY browser_tests/run_models/requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the files
+COPY models/ models/
+COPY scripts/ scripts/
+COPY browser_tests/run_models/main.py main.py
+
+EXPOSE 8080
+
+CMD ["gunicorn", "--workers=2", "--threads=1", "--bind=0.0.0.0:8080", "--timeout", "6000", "main:app"]
diff --git a/browser_tests/run_models/main.py b/browser_tests/run_models/main.py
@@ -0,0 +1,72 @@
+import os
+import sys
+import uuid
+from functools import wraps
+
+if not os.path.exists(os.environ["GOOGLE_APPLICATION_CREDENTIALS"]):
+    with open(os.environ["GOOGLE_APPLICATION_CREDENTIALS"], "w") as f:
+        f.write(os.environ["GOOGLE_APPLICATION_CREDENTIALS_FILE"])
+
+from flask import Flask, request
+from werkzeug.utils import secure_filename
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
+from scripts.asr.deepspeech import deepspeech_transcribe_from_file
+from scripts.asr.google_speech import google_transcribe_from_file
+
+app = Flask(__name__)
+
+
+def api_key_required(f):
+    @wraps(f)
+    def decorated_function(*args, **kwargs):
+        api_key = str(request.headers.get("Authorization")).split(" ")[-1]
+        if not api_key:
+            return "API Key is missing", 401
+        if api_key != os.environ.get("API_KEY", "secret"):
+            return "Invalid API Key", 403
+        return f(*args, **kwargs)
+
+    return decorated_function
+
+
+@app.route("/api/v1/asr/<model>", methods=["POST"])
+@api_key_required
+def run_asr(model: str):
+    MODELS = {
+        "deepspeech": deepspeech_transcribe_from_file,
+        "google": lambda f: google_transcribe_from_file(f)
+        .results[0]
+        .alternatives[0]
+        .transcript,
+    }
+
+    if model not in MODELS:
+        return f"Invalid model '{model}', must choose one from {MODELS.keys()}", 400
+
+    if "audio_file" not in request.files or not request.files["audio_file"].filename:
+        return "No audio file part in the request", 400
+
+    file = request.files["audio_file"]
+    filename = secure_filename(file.filename)  # type: ignore
+    filepath = os.path.join(
+        os.path.dirname(__file__), str(uuid.uuid4()) + "_" + filename
+    )
+    file.save(filepath)
+
+    try:
+        return MODELS[model](filepath), 200
+    except Exception as e:
+        return f"Transcription failed: {e}", 500
+    finally:
+        os.remove(filepath)
+
+
+if __name__ == "__main__":
+    from dotenv import load_dotenv
+
+    load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
+
+    # run dev server with: python browser_tests/run_models/main.py
+    # test with: curl -X POST -F "audio_file=@data/ExamplesWithComments/TIMIT_sample_0.wav" http://127.0.0.1:5000/api/v1/asr/deepspeech -H "Authorization: Bearer secret"
+    app.run(debug=True)
diff --git a/browser_tests/run_models/requirements.txt b/browser_tests/run_models/requirements.txt
@@ -0,0 +1,9 @@
+flask==3.1.2
+stt==1.4.0
+google-cloud-speech==2.34.0
+google-cloud-storage==2.19.0
+ffmpeg-python==0.2.0
+sounddevice==0.5.3
+scipy==1.15.3
+gunicorn==23.0.0
+numpy==1.24.4
diff --git a/browser_tests/run_models/run-prod.sh b/browser_tests/run_models/run-prod.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+docker build --platform=linux/amd64 --tag 'koel-api' -f ./Dockerfile ../..
+docker run -t -i -p 8080:8080 'koel-api'
diff --git a/browser_tests/run_models/test.py b/browser_tests/run_models/test.py
@@ -0,0 +1,18 @@
+import os
+import requests
+from dotenv import load_dotenv
+
+load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
+
+model = "google"  # or "deepspeech"
+
+# url = f"http://127.0.0.1:5000/api/v1/asr/{model}"
+url = f"https://koel-api.fly.dev/api/v1/asr/{model}"
+headers = {"Authorization": f"Bearer {os.environ['API_KEY']}"}
+
+files = {"audio_file": open("data/ExamplesWithComments/TIMIT_sample_0.wav", "rb")}
+
+response = requests.post(url, headers=headers, files=files)
+
+print(response.status_code)
+print(response.text)
diff --git a/fly.toml b/fly.toml
@@ -0,0 +1,23 @@
+# fly.toml app configuration file generated for koel-api on 2025-11-28T18:33:04-08:00
+#
+# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
+#
+
+app = 'koel-api'
+primary_region = 'sjc'
+
+[build]
+  dockerfile = './browser_tests/run_models/Dockerfile'
+
+[http_service]
+  internal_port = 8080
+  force_https = true
+  auto_stop_machines = 'stop'
+  auto_start_machines = true
+  min_machines_running = 0
+  processes = ['app']
+
+[[vm]]
+  memory = '4gb'
+  cpu_kind = 'performance'
+  cpus = 2
diff --git a/scripts/core/load_secrets.py b/scripts/core/load_secrets.py
@@ -1,5 +1,9 @@
 import os
-from dotenv import load_dotenv
+
+try:
+    from dotenv import load_dotenv
+except ImportError:
+    load_dotenv = lambda _: None
 
 
 def load_secrets():