Skip to content

Commit 2ab3dfa

Browse files
kennethreitzclaude
andcommitted
Add static site generation with nginx + hardened FastAPI sidecar
Pre-render ~1,277 high-traffic HTML pages (homepage, books, chapters) at build time and serve them directly via nginx. All other routes (verses, search, API, PDFs, Strong's) fall through to a FastAPI sidecar. If the sidecar crashes, nginx continues serving static pages and health checks. Also harden the FastAPI app against the memory/crash issues: - Switch from bare uvicorn to gunicorn with uvicorn workers - Add --max-requests worker recycling to prevent memory leaks - Add --timeout to kill hung workers - Add per-IP rate limiting middleware (10 req/s, burst of 50) - Add request timeout middleware (30s max per request) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent c082570 commit 2ab3dfa

File tree

9 files changed

+662
-4
lines changed

9 files changed

+662
-4
lines changed

Dockerfile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,9 @@ COPY . .
4949
# Build search index at image build time for fast searches
5050
RUN python3 -c "from kjvstudy_org.utils.search_index import init_search_index; init_search_index()"
5151

52-
# Run uvicorn directly (no nginx sidecar)
53-
CMD ["sh", "-c", "uv run uvicorn kjvstudy_org.server:app --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --workers ${WORKERS:-1} --proxy-headers"]
52+
# Run with gunicorn + uvicorn workers for production resilience:
53+
# --max-requests: recycle workers after N requests (prevents memory leaks)
54+
# --max-requests-jitter: stagger recycling so workers don't all restart at once
55+
# --timeout: kill workers that hang for >60s
56+
# --graceful-timeout: give workers 10s to finish after SIGTERM
57+
CMD ["sh", "-c", "uv run gunicorn kjvstudy_org.server:app --worker-class uvicorn.workers.UvicornWorker --bind ${HOST:-0.0.0.0}:${PORT:-8000} --workers ${WORKERS:-2} --max-requests 2000 --max-requests-jitter 500 --timeout 60 --graceful-timeout 10 --proxy-protocol --forwarded-allow-ips='*' --access-logfile -"]

Dockerfile.static

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# =============================================================================
2+
# Stage 1: Builder — install deps, generate static HTML pages
3+
# =============================================================================
4+
FROM python:3.13 AS builder
5+
6+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
7+
8+
ENV PYTHONUNBUFFERED=1 \
9+
PYTHONDONTWRITEBYTECODE=1 \
10+
UV_COMPILE_BYTECODE=1 \
11+
UV_LINK_MODE=copy
12+
13+
WORKDIR /app
14+
15+
COPY pyproject.toml uv.lock ./
16+
RUN uv sync --frozen --no-install-project --no-dev
17+
18+
COPY . .
19+
20+
# Build search index (needed by app startup)
21+
RUN uv run python3 -c "from kjvstudy_org.utils.search_index import init_search_index; init_search_index()"
22+
23+
# Generate static HTML pages (~50K files, no PDFs or API JSON)
24+
RUN uv run python scripts/generate_static_site.py --output /app/dist --workers 4
25+
26+
# =============================================================================
27+
# Stage 2: Runtime — nginx for static files + FastAPI sidecar for dynamic routes
28+
# =============================================================================
29+
FROM python:3.13-slim
30+
31+
# Install nginx + runtime deps for WeasyPrint (PDF generation in sidecar)
32+
RUN apt-get update && apt-get install -y --no-install-recommends \
33+
nginx \
34+
curl \
35+
libpango-1.0-0 \
36+
libharfbuzz0b \
37+
libpangoft2-1.0-0 \
38+
libffi8 \
39+
libgdk-pixbuf-2.0-0 \
40+
shared-mime-info \
41+
fonts-dejavu-core \
42+
&& rm -rf /var/lib/apt/lists/*
43+
44+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
45+
46+
ENV PYTHONUNBUFFERED=1 \
47+
PYTHONDONTWRITEBYTECODE=1 \
48+
PYTHONPATH="/app" \
49+
PATH="/app/.venv/bin:$PATH"
50+
51+
WORKDIR /app
52+
53+
# Copy virtualenv from builder
54+
COPY --from=builder /app/.venv /app/.venv
55+
56+
# Copy application code (needed by the sidecar)
57+
COPY --from=builder /app/kjvstudy_org /app/kjvstudy_org
58+
COPY --from=builder /app/scripts/search_api.py /app/scripts/search_api.py
59+
60+
# Copy pre-rendered static site
61+
COPY --from=builder /app/dist /app/dist
62+
63+
# Copy nginx config
64+
COPY nginx.conf /etc/nginx/nginx.conf
65+
66+
# Entrypoint: start FastAPI sidecar + nginx
67+
COPY <<'ENTRY' /app/start.sh
68+
#!/bin/sh
69+
set -e
70+
71+
# Start the FastAPI sidecar in the background
72+
# It handles: search, API, PDFs, OG images, and any uncached pages
73+
python3 /app/scripts/search_api.py &
74+
SIDECAR_PID=$!
75+
76+
# Wait briefly for sidecar to be ready
77+
sleep 1
78+
79+
# Start nginx in the foreground
80+
exec nginx -g 'daemon off;'
81+
ENTRY
82+
RUN chmod +x /app/start.sh
83+
84+
EXPOSE 8000
85+
86+
HEALTHCHECK --interval=15s --timeout=5s --start-period=10s \
87+
CMD curl -f http://localhost:8000/health || exit 1
88+
89+
CMD ["/app/start.sh"]

fly.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ primary_region = 'iad'
1010
strategy = "bluegreen"
1111

1212
[build]
13+
dockerfile = "Dockerfile.static"
1314

1415
[http_service]
1516
internal_port = 8000
@@ -45,5 +46,5 @@ PYTHONDONTWRITEBYTECODE = "1"
4546
# Lazy-load interlinear data to reduce memory usage
4647
PRELOAD_INTERLINEAR = "false"
4748

48-
# Number of Uvicorn workers
49-
WORKERS = "2"
49+
# Sidecar workers (gunicorn)
50+
SIDECAR_WORKERS = "1"

kjvstudy_org/server.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import re
55
import random
6+
import time
67
from contextlib import asynccontextmanager
78
from datetime import datetime, timedelta
89
from pathlib import Path as PathLib
@@ -236,6 +237,70 @@ async def dispatch(self, request: Request, call_next):
236237
return response
237238

238239

240+
# Rate limiting middleware — per-IP request throttle
241+
class RateLimitMiddleware(BaseHTTPMiddleware):
242+
"""Simple in-memory per-IP rate limiter using a sliding window."""
243+
244+
def __init__(self, app, requests_per_second: float = 10.0):
245+
super().__init__(app)
246+
self.rate = requests_per_second
247+
# {ip: (token_count, last_refill_time)}
248+
self._buckets: dict[str, tuple[float, float]] = {}
249+
self._max_tokens = requests_per_second * 5 # burst allowance
250+
251+
async def dispatch(self, request: Request, call_next):
252+
# Skip rate limiting for health checks
253+
if request.url.path == "/health":
254+
return await call_next(request)
255+
256+
ip = request.client.host if request.client else "unknown"
257+
now = time.monotonic()
258+
259+
tokens, last = self._buckets.get(ip, (self._max_tokens, now))
260+
elapsed = now - last
261+
tokens = min(self._max_tokens, tokens + elapsed * self.rate)
262+
263+
if tokens < 1.0:
264+
return JSONResponse(
265+
{"detail": "Too many requests"},
266+
status_code=429,
267+
headers={"Retry-After": "1"},
268+
)
269+
270+
self._buckets[ip] = (tokens - 1.0, now)
271+
272+
# Periodic cleanup — evict stale entries every ~1000 requests
273+
if len(self._buckets) > 5000:
274+
cutoff = now - 60
275+
self._buckets = {
276+
k: (t, ts) for k, (t, ts) in self._buckets.items() if ts > cutoff
277+
}
278+
279+
return await call_next(request)
280+
281+
282+
# Request timeout middleware — kill requests that take too long
283+
class TimeoutMiddleware(BaseHTTPMiddleware):
284+
"""Cancel requests that exceed a time limit."""
285+
286+
def __init__(self, app, timeout_seconds: float = 30.0):
287+
super().__init__(app)
288+
self.timeout = timeout_seconds
289+
290+
async def dispatch(self, request: Request, call_next):
291+
import asyncio
292+
try:
293+
return await asyncio.wait_for(
294+
call_next(request),
295+
timeout=self.timeout,
296+
)
297+
except asyncio.TimeoutError:
298+
return JSONResponse(
299+
{"detail": "Request timeout"},
300+
status_code=504,
301+
)
302+
303+
239304
# Add GZip compression middleware (compress responses > 500 bytes)
240305
app.add_middleware(GZipMiddleware, minimum_size=500)
241306

@@ -245,6 +310,12 @@ async def dispatch(self, request: Request, call_next):
245310
# Add bot logging middleware
246311
app.add_middleware(BotLoggerMiddleware)
247312

313+
# Add rate limiting (10 req/s per IP, burst of 50)
314+
app.add_middleware(RateLimitMiddleware, requests_per_second=10.0)
315+
316+
# Add request timeout (30 seconds max, 60 for PDFs handled by route-level timeout)
317+
app.add_middleware(TimeoutMiddleware, timeout_seconds=30.0)
318+
248319

249320
# Set up Jinja2 templates and static files
250321
current_dir = PathLib(__file__).parent

nginx.conf

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
worker_processes auto;
2+
error_log /var/log/nginx/error.log warn;
3+
pid /var/run/nginx.pid;
4+
5+
events {
6+
worker_connections 1024;
7+
}
8+
9+
http {
10+
include /etc/nginx/mime.types;
11+
default_type application/octet-stream;
12+
13+
log_format main '$remote_addr - [$time_local] "$request" $status $body_bytes_sent "$http_user_agent"';
14+
access_log /var/log/nginx/access.log main;
15+
16+
sendfile on;
17+
keepalive_timeout 65;
18+
19+
# Gzip
20+
gzip on;
21+
gzip_vary on;
22+
gzip_proxied any;
23+
gzip_comp_level 6;
24+
gzip_min_length 500;
25+
gzip_types text/plain text/css text/xml text/javascript
26+
application/json application/javascript application/xml
27+
application/rss+xml image/svg+xml;
28+
29+
# Upstream: FastAPI sidecar for dynamic routes
30+
upstream sidecar {
31+
server 127.0.0.1:8001;
32+
}
33+
34+
server {
35+
listen 8000;
36+
server_name _;
37+
root /app/dist;
38+
39+
# Security headers
40+
add_header X-Content-Type-Options nosniff always;
41+
add_header X-Frame-Options SAMEORIGIN always;
42+
43+
# -----------------------------------------------------------
44+
# Dynamic routes — proxy to FastAPI sidecar
45+
# -----------------------------------------------------------
46+
47+
# Search (dynamic query results)
48+
location = /search {
49+
proxy_pass http://sidecar;
50+
proxy_set_header Host $host;
51+
proxy_set_header X-Real-IP $remote_addr;
52+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
53+
proxy_set_header X-Forwarded-Proto $scheme;
54+
}
55+
56+
# All API endpoints
57+
location /api/ {
58+
proxy_pass http://sidecar;
59+
proxy_set_header Host $host;
60+
proxy_set_header X-Real-IP $remote_addr;
61+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
62+
proxy_set_header X-Forwarded-Proto $scheme;
63+
}
64+
65+
# PDF generation (on-demand)
66+
location ~ /pdf$ {
67+
proxy_pass http://sidecar;
68+
proxy_set_header Host $host;
69+
proxy_set_header X-Real-IP $remote_addr;
70+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
71+
proxy_set_header X-Forwarded-Proto $scheme;
72+
proxy_read_timeout 30s;
73+
}
74+
75+
# Verse of the day redirect (needs server-side date logic)
76+
location = /verse-of-the-day {
77+
proxy_pass http://sidecar;
78+
proxy_set_header Host $host;
79+
proxy_set_header X-Real-IP $remote_addr;
80+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
81+
proxy_set_header X-Forwarded-Proto $scheme;
82+
}
83+
84+
# OG images (dynamically generated)
85+
location /og/ {
86+
proxy_pass http://sidecar;
87+
proxy_set_header Host $host;
88+
proxy_set_header X-Real-IP $remote_addr;
89+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
90+
proxy_set_header X-Forwarded-Proto $scheme;
91+
}
92+
93+
# Family tree search (dynamic query)
94+
location = /family-tree/search {
95+
proxy_pass http://sidecar;
96+
proxy_set_header Host $host;
97+
proxy_set_header X-Real-IP $remote_addr;
98+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
99+
proxy_set_header X-Forwarded-Proto $scheme;
100+
}
101+
102+
# Family tree SVG (dynamically rendered)
103+
location = /family-tree/lineage.svg {
104+
proxy_pass http://sidecar;
105+
proxy_set_header Host $host;
106+
proxy_set_header X-Real-IP $remote_addr;
107+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
108+
proxy_set_header X-Forwarded-Proto $scheme;
109+
}
110+
111+
# OpenAPI docs
112+
location /api/docs {
113+
proxy_pass http://sidecar;
114+
proxy_set_header Host $host;
115+
}
116+
location /api/redoc {
117+
proxy_pass http://sidecar;
118+
proxy_set_header Host $host;
119+
}
120+
location /api/openapi.json {
121+
proxy_pass http://sidecar;
122+
proxy_set_header Host $host;
123+
}
124+
125+
# -----------------------------------------------------------
126+
# Health check — static (no sidecar dependency)
127+
# -----------------------------------------------------------
128+
location = /health {
129+
default_type application/json;
130+
return 200 '{"status":"healthy","service":"kjv-study"}';
131+
}
132+
133+
# -----------------------------------------------------------
134+
# Static assets — aggressive caching
135+
# -----------------------------------------------------------
136+
location /static/ {
137+
expires 1y;
138+
add_header Cache-Control "public, immutable";
139+
try_files $uri =404;
140+
}
141+
142+
# -----------------------------------------------------------
143+
# Robots / sitemaps
144+
# -----------------------------------------------------------
145+
location = /robots.txt {
146+
default_type text/plain;
147+
expires 1d;
148+
}
149+
location ~ ^/sitemap.*\.xml$ {
150+
default_type application/xml;
151+
expires 1d;
152+
}
153+
154+
# Random verse list JSON
155+
location = /random-verse-list.json {
156+
default_type application/json;
157+
expires 7d;
158+
}
159+
160+
# -----------------------------------------------------------
161+
# Default — serve pre-rendered HTML with clean URLs
162+
# -----------------------------------------------------------
163+
location / {
164+
try_files $uri $uri/index.html $uri/ @sidecar;
165+
expires 7d;
166+
add_header Cache-Control "public";
167+
}
168+
169+
# Fallback: if no static file exists, proxy to sidecar
170+
location @sidecar {
171+
proxy_pass http://sidecar;
172+
proxy_set_header Host $host;
173+
proxy_set_header X-Real-IP $remote_addr;
174+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
175+
proxy_set_header X-Forwarded-Proto $scheme;
176+
}
177+
178+
# Custom 404
179+
error_page 404 /404.html;
180+
location = /404.html {
181+
internal;
182+
}
183+
}
184+
}

0 commit comments

Comments
 (0)