Skip to content

Commit 7e4db08

Browse files
mihowclaude
andcommitted
feat: configurable NATS tuning and gunicorn worker management
Rebase onto main after RolnickLab#1135 merge. Keep only the additions unique to this branch: - Make TASK_TTR configurable via NATS_TASK_TTR Django setting (default 30s) - Make max_ack_pending configurable via NATS_MAX_ACK_PENDING setting (default 100) - Local dev: switch to gunicorn+UvicornWorker by default for production parity, with USE_UVICORN=1 escape hatch for raw uvicorn - Production: auto-detect WEB_CONCURRENCY from CPU cores (capped at 8) when not explicitly set in the environment Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 17bf10f commit 7e4db08

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

ami/ml/orchestration/nats_queue.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ async def get_connection(nats_url: str) -> tuple[nats.NATS, JetStreamContext]:
4242
return nc, js
4343

4444

45-
TASK_TTR = 300 # Default Time-To-Run (visibility timeout) in seconds
45+
TASK_TTR = getattr(settings, "NATS_TASK_TTR", 30) # Visibility timeout in seconds (configurable)
4646

4747

4848
class TaskQueueManager:
@@ -56,8 +56,9 @@ class TaskQueueManager:
5656
await manager.acknowledge_task(tasks[0].reply_subject)
5757
"""
5858

59-
def __init__(self, nats_url: str | None = None):
59+
def __init__(self, nats_url: str | None = None, max_ack_pending: int | None = None):
6060
self.nats_url = nats_url or getattr(settings, "NATS_URL", "nats://nats:4222")
61+
self.max_ack_pending = max_ack_pending or getattr(settings, "NATS_MAX_ACK_PENDING", 100)
6162
self.nc: nats.NATS | None = None
6263
self.js: JetStreamContext | None = None
6364

@@ -141,7 +142,7 @@ async def _ensure_consumer(self, job_id: int):
141142
ack_wait=TASK_TTR, # Visibility timeout (TTR)
142143
max_deliver=5, # Max retry attempts
143144
deliver_policy=DeliverPolicy.ALL,
144-
max_ack_pending=100, # Max unacked messages
145+
max_ack_pending=self.max_ack_pending,
145146
filter_subject=subject,
146147
),
147148
),

compose/local/django/start

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,26 @@ set -o nounset
66

77
python manage.py migrate
88

9-
# Launch VS Code debug server if DEBUGGER environment variable is set to 1
10-
# Note that the --reload flag is not compatible with debugpy, so manually restart the server when code changes
9+
# Set USE_UVICORN=1 to use the original raw uvicorn dev server instead of gunicorn
10+
if [ "${USE_UVICORN:-0}" = "1" ]; then
11+
if [ "${DEBUGGER:-0}" = "1" ]; then
12+
exec python -Xfrozen_modules=off -m debugpy --listen 0.0.0.0:5678 -m uvicorn config.asgi:application --host 0.0.0.0
13+
else
14+
exec uvicorn config.asgi:application --host 0.0.0.0 --reload --reload-include '*.html'
15+
fi
16+
fi
17+
18+
# Gunicorn with UvicornWorker (production-parity mode, now the default)
19+
# WEB_CONCURRENCY controls worker count (default: 1 for dev with auto-reload)
20+
WORKERS=${WEB_CONCURRENCY:-1}
21+
1122
if [ "${DEBUGGER:-0}" = "1" ]; then
12-
exec python -Xfrozen_modules=off -m debugpy --listen 0.0.0.0:5678 -m uvicorn config.asgi:application --host 0.0.0.0
23+
echo "Starting Gunicorn with debugpy (1 worker)..."
24+
exec python -Xfrozen_modules=off -m debugpy --listen 0.0.0.0:5678 -m gunicorn config.asgi --bind 0.0.0.0:8000 --workers 1 -k uvicorn.workers.UvicornWorker
25+
elif [ "$WORKERS" -eq 1 ]; then
26+
echo "Starting Gunicorn with 1 worker (auto-reload enabled)..."
27+
exec gunicorn config.asgi --bind 0.0.0.0:8000 --workers 1 -k uvicorn.workers.UvicornWorker --reload
1328
else
14-
exec uvicorn config.asgi:application --host 0.0.0.0 --reload --reload-include '*.html'
29+
echo "Starting Gunicorn with $WORKERS workers..."
30+
exec gunicorn config.asgi --bind 0.0.0.0:8000 --workers "$WORKERS" -k uvicorn.workers.UvicornWorker
1531
fi

compose/production/django/start

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,13 @@ set -o nounset
66

77
python /app/manage.py collectstatic --noinput
88

9+
# Gunicorn natively reads WEB_CONCURRENCY as its --workers default.
10+
# If not set, auto-detect based on CPU cores (capped at 8 for async ASGI workers).
11+
if [ -z "${WEB_CONCURRENCY:-}" ]; then
12+
CPU_CORES=$(nproc)
13+
export WEB_CONCURRENCY=$(( CPU_CORES > 8 ? 8 : CPU_CORES ))
14+
fi
15+
16+
echo "Starting Gunicorn with $WEB_CONCURRENCY worker(s)..."
17+
918
exec newrelic-admin run-program /usr/local/bin/gunicorn config.asgi --bind 0.0.0.0:5000 --chdir=/app -k uvicorn.workers.UvicornWorker

0 commit comments

Comments
 (0)