feat: add RTX 5090 Blackwell GPU support (sm_120)

fxfitz · rishikanthc · commit 8f537548d477 · 2025-12-24T14:46:44.000-08:00
Add support for NVIDIA RTX 50-series GPUs (Blackwell architecture) which require CUDA 12.8+ and PyTorch cu128 wheels due to the new sm_120 compute capability. Changes: - Add configurable PYTORCH_CUDA_VERSION environment variable to control PyTorch wheel version at runtime (cu126 for legacy, cu128 for Blackwell) - Update all model adapters to use dynamic CUDA version instead of hardcoded cu126 URLs - Update Dockerfile.cuda.12.9 for Blackwell with CUDA 12.9.1 base image, PYTORCH_CUDA_VERSION=cu128, and missing WHISPERX_ENV/yt-dlp - Update Dockerfile.cuda with explicit PYTORCH_CUDA_VERSION=cu126 - Add docker-compose.blackwell.yml for pre-built Blackwell image - Add docker-compose.build.blackwell.yml for local Blackwell builds - Add GPU compatibility documentation to README Fixes: #104
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
@@ -59,7 +59,9 @@ ENV PYTHONUNBUFFERED=1 \
   PUID=1000 \
   PGID=1000 \
   NVIDIA_VISIBLE_DEVICES=all \
-  NVIDIA_DRIVER_CAPABILITIES=compute,utility
+  NVIDIA_DRIVER_CAPABILITIES=compute,utility \
+  # PyTorch CUDA wheel version: cu126 for legacy GPUs (GTX 10-series through RTX 40-series)
+  PYTORCH_CUDA_VERSION=cu126
 
 WORKDIR /app
 
diff --git a/Dockerfile.cuda.12.9 b/Dockerfile.cuda.12.9
@@ -1,4 +1,6 @@
-# Multi-stage build for Scriberr with CUDA support
+# Multi-stage build for Scriberr with CUDA 12.9 support (Blackwell GPUs)
+# This Dockerfile targets NVIDIA RTX 50-series (Blackwell architecture, sm_120)
+# For legacy GPUs (GTX 10-series through RTX 40-series), use Dockerfile.cuda instead
 # Builds React UI and Go server, then ships with NVIDIA CUDA runtime
 
 ########################
@@ -54,10 +56,14 @@ ENV PYTHONUNBUFFERED=1 \
     PORT=8080 \
     DATABASE_PATH=/app/data/scriberr.db \
     UPLOAD_DIR=/app/data/uploads \
+    WHISPERX_ENV=/app/whisperx-env \
+    APP_ENV=production \
     PUID=1000 \
     PGID=1000 \
     NVIDIA_VISIBLE_DEVICES=all \
-    NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
+    # PyTorch CUDA wheel version: cu128 for Blackwell GPUs (RTX 50-series, sm_120)
+    PYTORCH_CUDA_VERSION=cu128
 
 WORKDIR /app
 
@@ -76,6 +82,11 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
   && chmod 755 /usr/local/bin/uv \
   && uv --version
 
+# Install yt-dlp standalone binary (required by API handlers for YouTube URL processing)
+RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp \
+  && chmod a+rx /usr/local/bin/yt-dlp \
+  && yt-dlp --version
+
 # Install Deno (JavaScript runtime required for yt-dlp YouTube downloads)
 # YouTube now requires JS execution for video cipher decryption
 # See: https://github.com/yt-dlp/yt-dlp/issues/14404
diff --git a/README.md b/README.md
@@ -275,6 +275,30 @@ volumes:
 docker compose -f docker-compose.cuda.yml up -d
 ```
 
+#### GPU Compatibility
+
+Scriberr provides separate Docker images for different NVIDIA GPU generations due to CUDA/PyTorch compatibility requirements:
+
+| GPU Generation | Compute Capability | Docker Image | Docker Compose File |
+|:---|:---|:---|:---|
+| GTX 10-series (Pascal) | sm_61 | `scriberr-cuda` | `docker-compose.cuda.yml` |
+| RTX 20-series (Turing) | sm_75 | `scriberr-cuda` | `docker-compose.cuda.yml` |
+| RTX 30-series (Ampere) | sm_86 | `scriberr-cuda` | `docker-compose.cuda.yml` |
+| RTX 40-series (Ada Lovelace) | sm_89 | `scriberr-cuda` | `docker-compose.cuda.yml` |
+| **RTX 50-series (Blackwell)** | sm_120 | `scriberr-cuda-blackwell` | `docker-compose.blackwell.yml` |
+
+**RTX 50-series users (RTX 5080, 5090, etc.):** You must use the Blackwell-specific image. The standard CUDA image will not work due to PyTorch CUDA compatibility requirements. Use:
+
+```bash
+docker compose -f docker-compose.blackwell.yml up -d
+```
+
+Or for local builds:
+
+```bash
+docker compose -f docker-compose.build.blackwell.yml up -d
+```
+
 ### App Startup
 
 When you run Scriberr for the first time, it may take several minutes to start. This is normal!
diff --git a/docker-compose.blackwell.yml b/docker-compose.blackwell.yml
@@ -0,0 +1,34 @@
+# Docker Compose for Scriberr with Blackwell GPU support (RTX 50-series)
+# Uses pre-built image from GitHub Container Registry
+# For legacy GPUs (GTX 10-series through RTX 40-series), use docker-compose.cuda.yml instead
+version: "3.9"
+services:
+  scriberr:
+    image: ghcr.io/rishikanthc/scriberr-cuda-blackwell:latest
+    ports:
+      - "8080:8080"
+    volumes:
+      - scriberr_data:/app/data
+      - env_data:/app/whisperx-env
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities:
+                - gpu
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - PUID=${PUID:-1000}
+      - PGID=${PGID:-1000}
+      # Security: already set in container, but can be overridden
+      - APP_ENV=production
+      # CORS: comma-separated list of allowed origins for production
+      # - ALLOWED_ORIGINS=https://your-domain.com
+
+volumes:
+  scriberr_data: {}
+  env_data: {}
diff --git a/docker-compose.build.blackwell.yml b/docker-compose.build.blackwell.yml
@@ -0,0 +1,37 @@
+# Docker Compose for building Scriberr with Blackwell GPU support locally
+# Uses Dockerfile.cuda.12.9 for RTX 50-series (Blackwell architecture, sm_120)
+# For legacy GPUs (GTX 10-series through RTX 40-series), use docker-compose.build.cuda.yml instead
+services:
+  scriberr:
+    build:
+      context: .
+      dockerfile: Dockerfile.cuda.12.9
+    image: scriberr:local-blackwell
+    container_name: scriberr-blackwell
+    ports:
+      - "8080:8080"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities:
+                - gpu
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - PUID=${PUID:-1000}
+      - PGID=${PGID:-1000}
+      # Security: already set in container, but can be overridden
+      - APP_ENV=production
+      # CORS: comma-separated list of allowed origins for production
+      # - ALLOWED_ORIGINS=https://your-domain.com
+    volumes:
+      - ./scriberr_data:/app/data
+      - ./env-data:/app/whisperx-env
+    restart: unless-stopped
+
+volumes:
+  scriberr_data:
+  env-data:
diff --git a/internal/transcription/adapters/base_adapter.go b/internal/transcription/adapters/base_adapter.go
@@ -27,6 +27,22 @@ var (
 	requestGroup  singleflight.Group
 )
 
+// GetPyTorchCUDAVersion returns the PyTorch CUDA wheel version to use.
+// This is configurable via the PYTORCH_CUDA_VERSION environment variable.
+// Defaults to "cu126" for CUDA 12.6 (legacy GPUs: GTX 10-series through RTX 40-series).
+// Set to "cu128" for CUDA 12.8 (Blackwell GPUs: RTX 50-series).
+func GetPyTorchCUDAVersion() string {
+	if cudaVersion := os.Getenv("PYTORCH_CUDA_VERSION"); cudaVersion != "" {
+		return cudaVersion
+	}
+	return "cu126" // Default to CUDA 12.6 for legacy compatibility
+}
+
+// GetPyTorchWheelURL returns the full PyTorch wheel URL for the configured CUDA version.
+func GetPyTorchWheelURL() string {
+	return fmt.Sprintf("https://download.pytorch.org/whl/%s", GetPyTorchCUDAVersion())
+}
+
 // CheckEnvironmentReady checks if a UV environment is ready with caching and singleflight
 func CheckEnvironmentReady(envPath, importStatement string) bool {
 	cacheKey := fmt.Sprintf("%s:%s", envPath, importStatement)
diff --git a/internal/transcription/adapters/canary_adapter.go b/internal/transcription/adapters/canary_adapter.go
@@ -207,8 +207,8 @@ func (c *CanaryAdapter) setupCanaryEnvironment() error {
 		return nil
 	}
 
-	// Create pyproject.toml (same as Parakeet since they share environment)
-	pyprojectContent := `[project]
+	// Create pyproject.toml with configurable PyTorch CUDA version
+	pyprojectContent := fmt.Sprintf(`[project]
 name = "parakeet-transcription"
 version = "0.1.0"
 description = "Audio transcription using NVIDIA Parakeet models"
@@ -241,14 +241,14 @@ triton = [
 
 [[tool.uv.index]]
 name = "pytorch"
-url = "https://download.pytorch.org/whl/cu126"
+url = "%s"
 explicit = true
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
-`
+`, GetPyTorchWheelURL())
 	if err := os.WriteFile(pyprojectPath, []byte(pyprojectContent), 0644); err != nil {
 		return fmt.Errorf("failed to write pyproject.toml: %w", err)
 	}
diff --git a/internal/transcription/adapters/parakeet_adapter.go b/internal/transcription/adapters/parakeet_adapter.go
@@ -179,8 +179,8 @@ func (p *ParakeetAdapter) setupParakeetEnvironment() error {
 		return fmt.Errorf("failed to create parakeet directory: %w", err)
 	}
 
-	// Create pyproject.toml
-	pyprojectContent := `[project]
+	// Create pyproject.toml with configurable PyTorch CUDA version
+	pyprojectContent := fmt.Sprintf(`[project]
 name = "parakeet-transcription"
 version = "0.1.0"
 description = "Audio transcription using NVIDIA Parakeet models"
@@ -213,14 +213,14 @@ triton = [
 
 [[tool.uv.index]]
 name = "pytorch"
-url = "https://download.pytorch.org/whl/cu126"
+url = "%s"
 explicit = true
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
-`
+`, GetPyTorchWheelURL())
 	pyprojectPath := filepath.Join(p.envPath, "pyproject.toml")
 	if err := os.WriteFile(pyprojectPath, []byte(pyprojectContent), 0644); err != nil {
 		return fmt.Errorf("failed to write pyproject.toml: %w", err)
diff --git a/internal/transcription/adapters/pyannote_adapter.go b/internal/transcription/adapters/pyannote_adapter.go
@@ -216,10 +216,10 @@ func (p *PyAnnoteAdapter) setupPyAnnoteEnvironment() error {
 		return fmt.Errorf("failed to create pyannote directory: %w", err)
 	}
 
-	// Create pyproject.toml for PyAnnote
+	// Create pyproject.toml with configurable PyTorch CUDA version
 	// Note: We explicitly pin torch and torchaudio to 2.1.2 to ensure compatibility with pyannote.audio 3.1
 	// Newer versions of torchaudio (2.2+) removed AudioMetaData which causes crashes
-	pyprojectContent := `[project]
+	pyprojectContent := fmt.Sprintf(`[project]
 name = "pyannote-diarization"
 version = "0.1.0"
 description = "Audio diarization using PyAnnote"
@@ -245,14 +245,14 @@ torchaudio = [
 
 [[tool.uv.index]]
 name = "pytorch"
-url = "https://download.pytorch.org/whl/cu126"
+url = "%s"
 explicit = true
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
-`
+`, GetPyTorchWheelURL())
 	pyprojectPath := filepath.Join(p.envPath, "pyproject.toml")
 	if err := os.WriteFile(pyprojectPath, []byte(pyprojectContent), 0644); err != nil {
 		return fmt.Errorf("failed to write pyproject.toml: %w", err)
diff --git a/internal/transcription/adapters/sortformer_adapter.go b/internal/transcription/adapters/sortformer_adapter.go
@@ -195,8 +195,8 @@ func (s *SortformerAdapter) setupSortformerEnvironment() error {
 		return fmt.Errorf("failed to create sortformer directory: %w", err)
 	}
 
-	// Create pyproject.toml (same as other NVIDIA models)
-	pyprojectContent := `[project]
+	// Create pyproject.toml with configurable PyTorch CUDA version
+	pyprojectContent := fmt.Sprintf(`[project]
 name = "parakeet-transcription"
 version = "0.1.0"
 description = "Audio transcription using NVIDIA Parakeet models"
@@ -230,14 +230,14 @@ triton = [
 
 [[tool.uv.index]]
 name = "pytorch"
-url = "https://download.pytorch.org/whl/cu126"
+url = "%s"
 explicit = true
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
-`
+`, GetPyTorchWheelURL())
 	pyprojectPath := filepath.Join(s.envPath, "pyproject.toml")
 	if err := os.WriteFile(pyprojectPath, []byte(pyprojectContent), 0644); err != nil {
 		return fmt.Errorf("failed to write pyproject.toml: %w", err)
diff --git a/internal/transcription/adapters/whisperx_adapter.go b/internal/transcription/adapters/whisperx_adapter.go
@@ -359,9 +359,10 @@ func (w *WhisperXAdapter) updateWhisperXDependencies(whisperxPath string) error
     "yt-dlp[default]",`)
 	}
 
-	// Pin PyTorch to CUDA 12.6
+	// Set PyTorch CUDA version based on environment configuration
 	// The repo already has the correct [tool.uv.sources] configuration, we just need to update the CUDA version
-	content = strings.ReplaceAll(content, "https://download.pytorch.org/whl/cu128", "https://download.pytorch.org/whl/cu126")
+	// This allows using cu126 for legacy GPUs (GTX 10-series through RTX 40-series) or cu128 for Blackwell (RTX 50-series)
+	content = strings.ReplaceAll(content, "https://download.pytorch.org/whl/cu128", GetPyTorchWheelURL())
 
 	if err := os.WriteFile(pyprojectPath, []byte(content), 0644); err != nil {
 		return fmt.Errorf("failed to write pyproject.toml: %w", err)