docker file

sadrafh · sadrafh · commit 3bbfc8e538a5 · 2025-05-23T18:10:06.000Z
diff --git a/docs/whisper_transcription/Docker/Dockerfile b/docs/whisper_transcription/Docker/Dockerfile
@@ -0,0 +1,38 @@
+# Base image with Python and CUDA (for H100/A100 GPUs with CUDA 12.1)
+FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
+
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TRANSFORMERS_CACHE=/root/.cache/huggingface/transformers
+
+# Install system packages and cuDNN 9.10.1 manually (for CUDA 12)
+RUN apt-get update && apt-get install -y \
+    python3 python3-pip python3-venv python3-dev \
+    ffmpeg git curl wget ca-certificates gnupg libsndfile1 && \
+    mkdir -p /tmp/cudnn && cd /tmp/cudnn && \
+    wget https://developer.download.nvidia.com/compute/cudnn/9.10.1/local_installers/cudnn-local-repo-ubuntu2204-9.10.1_1.0-1_amd64.deb && \
+    dpkg -i cudnn-local-repo-ubuntu2204-9.10.1_1.0-1_amd64.deb && \
+    cp /var/cudnn-local-repo-ubuntu2204-9.10.1/cudnn-*-keyring.gpg /usr/share/keyrings/ && \
+    apt-get update && \
+    apt-get -y install cudnn-cuda-12 && \
+    rm -rf /var/lib/apt/lists/* /tmp/cudnn
+
+# Upgrade pip and install torch separately for CUDA 12.1
+RUN python3 -m pip install --upgrade pip && \
+    pip install torch==2.2.2+cu121 torchaudio==2.2.2+cu121 -f https://download.pytorch.org/whl/torch_stable.html
+
+# Copy requirements and install them
+COPY requirements.txt /app/requirements.txt
+WORKDIR /app
+RUN pip install -r requirements.txt
+
+# Copy application code
+COPY whisper_code.py whisper_code.py
+COPY whisper_api_server.py whisper_api_server.py
+
+# Expose port
+EXPOSE 8000
+
+# Run API
+CMD ["uvicorn", "whisper_api_server:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/docs/whisper_transcription/Docker/requirements.txt b/docs/whisper_transcription/Docker/requirements.txt
@@ -0,0 +1,37 @@
+# Core frameworks
+fastapi==0.115.12
+uvicorn==0.34.2
+
+# Whisper + speech processing
+faster-whisper==1.1.1
+librosa==0.11.0
+pydub==0.25.1
+soundfile==0.13.1
+noisereduce==3.0.3
+demucs==4.0.1
+ffmpeg-python==0.2.0
+
+# Diarization (PyAnnote)
+pyannote.audio==3.3.2
+pyannote.core==5.0.0
+pyannote.database==5.1.3
+pyannote.metrics==3.2.1
+pyannote.pipeline==3.0.1
+
+# Transformers + Summarization
+transformers==4.51.3
+huggingface-hub==0.31.4
+sentencepiece==0.2.0
+
+# Evaluation
+jiwer==3.1.0
+
+# Utilities
+numpy==1.26.4
+scikit-learn==1.6.1
+requests==2.32.3
+tqdm==4.67.1
+typing_extensions==4.13.2
+pydantic==2.11.4
+python-multipart==0.0.20
+accelerate==1.7.0
diff --git a/docs/whisper_transcription/Docker/whisper_api_server.py b/docs/whisper_transcription/Docker/whisper_api_server.py
@@ -0,0 +1,129 @@
+import os
+import tempfile
+import json
+import threading
+import glob
+from queue import Queue
+from threading import Lock
+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import StreamingResponse, JSONResponse
+from whisper_code import transcribe_and_summarize, load_whisper_model_faster
+
+app = FastAPI()
+
+# Global model cache and lock
+model_cache = {}
+model_lock = Lock()
+
+@app.post("/transcribe")
+async def transcribe_audio_api(
+    audio_file: UploadFile = File(...),
+    model: str = Form("base"),
+    summarized_model: str = Form("mistralai/Mistral-7B-Instruct-v0.1"),
+    denoise: bool = Form(False),
+    prop_decrease: float = Form(0.7),
+    summary: bool = Form(True),
+    speaker: bool = Form(False),
+    hf_token: str = Form(None),
+    max_speakers: int = Form(None),
+    streaming: bool = Form(False)
+):
+    temp_audio_path = tempfile.mktemp(suffix=f"_{audio_file.filename}")
+    with open(temp_audio_path, "wb") as f:
+        f.write(await audio_file.read())
+
+    output_dir = tempfile.mkdtemp()
+
+    # Ensure model is loaded once
+    if model not in model_cache:
+        model_cache[model] = load_whisper_model_faster(model)
+
+    whisper_model = model_cache[model]
+
+    if streaming:
+        def generator():
+            q = Queue()
+
+            def api_callback(result):
+                q.put(json.dumps(result) + "\n")
+
+            def run_pipeline():
+                try:
+                    with model_lock:
+                        transcribe_and_summarize(
+                            path=temp_audio_path,
+                            model_name=model,
+                            output_dir=output_dir,
+                            summarized_model_id=summarized_model,
+                            denoise=denoise,
+                            prop_decrease=prop_decrease,
+                            summary=summary,
+                            speaker=speaker,
+                            hf_token=hf_token,
+                            max_speakers=max_speakers,
+                            streaming=True,
+                            api_callback=api_callback,
+                            model_instance=whisper_model
+                        )
+                except Exception as e:
+                    import traceback
+                    traceback.print_exc()
+                    q.put(json.dumps({"error": f"Streaming transcription failed: {str(e)}"}))
+                finally:
+                    q.put(None)
+
+            threading.Thread(target=run_pipeline).start()
+
+            while True:
+                chunk = q.get()
+                if chunk is None:
+                    break
+                yield chunk
+
+        return StreamingResponse(generator(), media_type="application/json")
+
+    else:
+        try:
+            with model_lock:
+                transcribe_and_summarize(
+                    path=temp_audio_path,
+                    model_name=model,
+                    output_dir=output_dir,
+                    summarized_model_id=summarized_model,
+                    denoise=denoise,
+                    prop_decrease=prop_decrease,
+                    summary=summary,
+                    speaker=speaker,
+                    hf_token=hf_token,
+                    max_speakers=max_speakers,
+                    streaming=False,
+                    api_callback=None,
+                    model_instance=whisper_model
+                )
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            return JSONResponse(
+                content={"error": f"Transcription failed: {str(e)}"},
+                status_code=500
+            )
+
+        try:
+            json_files = sorted(
+                glob.glob(os.path.join(output_dir, "*.json")),
+                key=os.path.getmtime,
+                reverse=True
+            )
+            if not json_files:
+                raise FileNotFoundError("No output JSON file found.")
+
+            with open(json_files[0]) as f:
+                return JSONResponse(content=json.load(f))
+            
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            return JSONResponse(
+                content={"error": f"Failed to read output JSON: {str(e)}"},
+                status_code=500
+            )
diff --git a/docs/whisper_transcription/Docker/whisper_code.py b/docs/whisper_transcription/Docker/whisper_code.py