diff --git a/.agents/AGENTS.md b/.agents/AGENTS.md index 356077f3d..8acceeb4d 100644 --- a/.agents/AGENTS.md +++ b/.agents/AGENTS.md @@ -13,6 +13,16 @@ Every call to the AI model API incurs a cost and requires electricity. Be smart - Always prefer command line tools to avoid expensive API requests (e.g., use git and jq instead of reading whole files) - Use bulk operations and prefetch patterns to minimize database queries +**Performance Optimization:** +- django-cachalot handles automatic query caching - don't add manual caching layers on top +- Focus on optimizing cold queries first before adding caching +- When ordering by annotated fields, pagination COUNT queries include those annotations - use `.values('pk')` to strip them +- For large tables (>10k rows), consider fuzzy counting using PostgreSQL's pg_class.reltuples + +**Git Commit Guidelines:** +- Do NOT include "Generated with Claude Code" in commit messages +- ALWAYS include "Co-Authored-By: Claude " at the end of commit messages + ## Project Overview Antenna is an Automated Monitoring of Insects ML Platform. It's a collaborative platform for processing and reviewing images from automated insect monitoring stations, maintaining metadata, and orchestrating multiple machine learning pipelines for analysis. @@ -273,7 +283,7 @@ Location: `processing_services/` directory contains example implementations ### Celery Task Queue -**Broker & Result Backend:** Redis +**Broker & Result Backend:** RabbitMQ **Key Tasks:** - `ami.jobs.tasks.run_job` - Main ML processing workflow diff --git a/.envs/.ci/.django b/.envs/.ci/.django index f0adf722b..833bb7ec7 100644 --- a/.envs/.ci/.django +++ b/.envs/.ci/.django @@ -19,3 +19,9 @@ MINIO_BROWSER_REDIRECT_URL=http://minio:9001 DEFAULT_PROCESSING_SERVICE_NAME=Test Processing Service DEFAULT_PROCESSING_SERVICE_ENDPOINT=http://ml_backend:2000 + +# RabbitMQ +# ------------------------------------------------------------------------------ +CELERY_BROKER_URL=amqp://rabbituser:rabbitpass@rabbitmq:5672/ +RABBITMQ_DEFAULT_USER=rabbituser +RABBITMQ_DEFAULT_PASS=rabbitpass diff --git a/.envs/.ci/.postgres b/.envs/.ci/.postgres index f5b543e75..9d24e90fd 100644 --- a/.envs/.ci/.postgres +++ b/.envs/.ci/.postgres @@ -3,3 +3,5 @@ POSTGRES_PORT=5432 POSTGRES_DB=ami-ci POSTGRES_USER=4JXkOnTAeDmDyIapSRrGEE POSTGRES_PASSWORD=d4xojpnJU3OzPQ0apSCLP1oHR1TYvyMzAlF5KpE9HFL6MPlnbDibwI +# DATABASE_URL is for optional convenience with certain tools (e.g., psql), not used by Django +DATABASE_URL=postgresql://4JXkOnTAeDmDyIapSRrGEE:d4xojpnJU3OzPQ0apSCLP1oHR1TYvyMzAlF5KpE9HFL6MPlnbDibwI@postgres:5432/ami-ci diff --git a/.envs/.local/.django b/.envs/.local/.django index 76a69e68e..59586664e 100644 --- a/.envs/.local/.django +++ b/.envs/.local/.django @@ -16,6 +16,11 @@ REDIS_URL=redis://redis:6379/0 CELERY_FLOWER_USER=QSocnxapfMvzLqJXSsXtnEZqRkBtsmKT CELERY_FLOWER_PASSWORD=BEQgmCtgyrFieKNoGTsux9YIye0I7P5Q7vEgfJD2C4jxmtHDetFaE2jhS7K7rxaf +# RabbitMQ +CELERY_BROKER_URL=amqp://rabbituser:rabbitpass@rabbitmq:5672/ +RABBITMQ_DEFAULT_USER=rabbituser +RABBITMQ_DEFAULT_PASS=rabbitpass + # Attempting to keep Flower from showing workers as offline # FLOWER_BROKER_API=REDIS_URL FLOWER_PERSISTENT=True diff --git a/.envs/.local/.postgres b/.envs/.local/.postgres index 7fd1e8e22..1654270bd 100644 --- a/.envs/.local/.postgres +++ b/.envs/.local/.postgres @@ -3,3 +3,5 @@ POSTGRES_PORT=5432 POSTGRES_DB=ami POSTGRES_USER=xekSryPnqczJXkOnTAeDmDyIapSRrGEE POSTGRES_PASSWORD=iMRQjJEGflj5xojpnJU3OzPQ0apSCLP1oHR1TYvyMzAlF5KpE9HFL6MPlnbDibwI +# DATABASE_URL is for optional convenience with certain tools (e.g., psql), not used by Django +DATABASE_URL=postgres://xekSryPnqczJXkOnTAeDmDyIapSRrGEE:iMRQjJEGflj5xojpnJU3OzPQ0apSCLP1oHR1TYvyMzAlF5KpE9HFL6MPlnbDibwI@postgres:5432/ami diff --git a/compose/local/django/celery/worker/start b/compose/local/django/celery/worker/start index 1bbfb5f31..0d4f67362 100755 --- a/compose/local/django/celery/worker/start +++ b/compose/local/django/celery/worker/start @@ -8,7 +8,7 @@ set -o nounset # Note that auto reloading is disabled when debugging, manual restart required for code changes. if [ "${DEBUGGER:-0}" = "1" ]; then # exec watchfiles --filter python 'python -m debugpy --listen 0.0.0.0:5679 -m celery -A config.celery_app worker -l INFO' - exec python -Xfrozen_modules=off -m debugpy --listen 0.0.0.0:5679 -m celery -A config.celery_app worker -l INFO + exec python -Xfrozen_modules=off -m debugpy --listen 0.0.0.0:5679 -m celery -A config.celery_app worker --queues=antenna -l INFO else - exec watchfiles --filter python celery.__main__.main --args '-A config.celery_app worker -l INFO' + exec watchfiles --filter python celery.__main__.main --args '-A config.celery_app worker --queues=antenna -l INFO' fi diff --git a/compose/production/django/celery/worker/start b/compose/production/django/celery/worker/start index 9d41926e7..6b372e854 100644 --- a/compose/production/django/celery/worker/start +++ b/compose/production/django/celery/worker/start @@ -4,4 +4,4 @@ set -o errexit set -o pipefail set -o nounset -exec newrelic-admin run-program celery -A config.celery_app worker -l INFO +exec newrelic-admin run-program celery -A config.celery_app worker --queues=antenna -l INFO diff --git a/config/settings/base.py b/config/settings/base.py index aa2783c7f..68f5d4130 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -260,6 +260,7 @@ }, } } +REDIS_URL = env("REDIS_URL", default=None) # ADMIN # ------------------------------------------------------------------------------ @@ -299,6 +300,8 @@ if USE_TZ: # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-timezone CELERY_TIMEZONE = TIME_ZONE + +CELERY_TASK_DEFAULT_QUEUE = "antenna" # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-broker_url CELERY_BROKER_URL = env("CELERY_BROKER_URL") # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_backend diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml index c622cdbc1..8e93b684d 100644 --- a/docker-compose.ci.yml +++ b/docker-compose.ci.yml @@ -1,14 +1,27 @@ +# Ensure this launches a separate stack for CI testing to avoid conflicts with local dev setup: +name: antenna-ci + +volumes: + ami_ci_postgres_data: + driver: local + minio_ci_data: + driver: local + services: django: &django build: context: . dockerfile: ./compose/local/django/Dockerfile - image: ami_ci_django + volumes: + - .:/app:z + extra_hosts: + - "host.docker.internal:host-gateway" depends_on: - postgres - redis - minio-init - ml_backend + - rabbitmq env_file: - ./.envs/.ci/.django - ./.envs/.ci/.postgres @@ -18,6 +31,8 @@ services: build: context: . dockerfile: ./compose/local/postgres/Dockerfile + volumes: + - ami_ci_postgres_data:/var/lib/postgresql/data env_file: - ./.envs/.ci/.postgres @@ -26,12 +41,20 @@ services: celeryworker: <<: *django - image: ami_ci_celeryworker + depends_on: + - rabbitmq command: /start-celeryworker + rabbitmq: + image: rabbitmq:3-management + env_file: + - ./.envs/.ci/.django + minio: image: minio/minio:RELEASE.2024-11-07T00-52-20Z command: minio server --console-address ":9001" /data + volumes: + - "minio_ci_data:/data" env_file: - ./.envs/.ci/.django healthcheck: @@ -45,7 +68,8 @@ services: env_file: - ./.envs/.ci/.django depends_on: - - minio + minio: + condition: service_healthy volumes: - ./compose/local/minio/init.sh:/etc/minio/init.sh entrypoint: /etc/minio/init.sh diff --git a/docker-compose.production.yml b/docker-compose.production.yml index 099464277..83e2799da 100644 --- a/docker-compose.production.yml +++ b/docker-compose.production.yml @@ -16,8 +16,9 @@ services: - ./config:/app/config ports: - "5001:5000" - extra_hosts: + extra_hosts: # hostname aliases to the IPs of persistant services running outside docker network - "db:${DATABASE_IP}" + - "rabbitmq:${RABBITMQ_IP}" - "redis:${REDIS_IP}" command: /start scale: 1 # Can't scale until the load balancer is within the compose config diff --git a/docker-compose.yml b/docker-compose.yml index f5cee4a85..cf82d7ed4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,6 +24,7 @@ services: - redis - minio-init - ml_backend + - rabbitmq volumes: - .:/app:z env_file: @@ -92,6 +93,8 @@ services: scale: 1 ports: [] command: /start-celeryworker + depends_on: + - rabbitmq celerybeat: <<: *django @@ -108,6 +111,16 @@ services: volumes: - ./data/flower/:/data/ + rabbitmq: + image: rabbitmq:3-management + ports: + - "5672:5672" + - "15672:15672" + env_file: + - ./.envs/.local/.django + networks: + - antenna_network + minio: image: minio/minio:RELEASE.2024-11-07T00-52-20Z command: minio server --console-address ":9001" /data