Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
29979ec
feat: clean up when events are regrouped for a deployment
mihow Jul 25, 2025
2b8f78d
feat: add created & updated at columns to sessions/events list
mihow Jul 25, 2025
32fc496
feat: configure default related models with new projects
mihow Jul 22, 2025
ad26bb5
feat: allow specifying which pipelines are enabled by default
mihow Jul 25, 2025
bcf205c
feat: add denmark/uk model to default pipelines
mihow Jul 25, 2025
c00f6a7
feat: add tests for default enabled pipelines
mihow Jul 25, 2025
27cb97e
chore: rename default station
mihow Jul 25, 2025
49d6e47
fix: undefined variables in certain cases
mihow Jul 25, 2025
34badc1
chore: cleanup typos and comments
mihow Jul 25, 2025
d4f7047
fix: update default sampling method for collections
mihow Jul 25, 2025
8b9a3a8
Configure default related models for new projects (#905)
mihow Jul 28, 2025
747ebad
feat: process images immediately after uploading (prototype)
mihow Jul 29, 2025
e984197
feat: read timestamp from EXIF data in special cases
mihow Jul 29, 2025
efcadda
Merge branch 'main' of github.com:RolnickLab/antenna into feat/quicks…
mihow Jul 31, 2025
e18e37a
chore: clean up comments and unused
mihow Jul 31, 2025
28352de
feat: query method to select pipelines enabled for project
mihow Jul 31, 2025
e29c16e
feat: process_single_image function in a new home
mihow Jul 31, 2025
e7df08c
fix: default pipeline query
mihow Jul 31, 2025
84dc4a4
feat: fallback to the current datetime for test uploads
mihow Jul 31, 2025
978ef0d
chore: disable auto-processing manual uploads by default
mihow Jul 31, 2025
f5c55b8
fix: cleanup
mihow Aug 9, 2025
346a9c3
Merge branch 'main' of github.com:RolnickLab/antenna into feat/quicks…
mihow Aug 9, 2025
50999d5
chore: cleanup
mihow Aug 9, 2025
76ac612
fix: remove duplicate migration after merge
mihow Aug 9, 2025
5d2294b
fix: select only pipelines with an avail processor for the project
mihow Aug 9, 2025
b49666b
feat: move the create method to the view
mihow Aug 9, 2025
7fc8c59
fix: allow the current project to be passed in post / form data
mihow Aug 9, 2025
b5be1f7
feat: require project in source image upload
mihow Aug 9, 2025
306252a
feat: use project feature flag for auto processing
mihow Aug 9, 2025
6c03080
fix: pass project ID when creating source image in test
mihow Aug 13, 2025
6a35c10
fix: separate titles for source images & source image collections
mihow Aug 13, 2025
2d44bf2
fix: typo in property name, require projects for source image uploads
mihow Aug 13, 2025
6bc4cad
Merge branch 'main' of github.com:RolnickLab/antenna into feat/quicks…
mihow Aug 13, 2025
a82078b
feat: use default pipeline in project settings first
mihow Aug 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 10 additions & 17 deletions ami/main/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import datetime

from django.core.exceptions import ValidationError as DjangoValidationError
from django.db.models import QuerySet
from guardian.shortcuts import get_perms
from rest_framework import serializers
Expand Down Expand Up @@ -32,7 +31,6 @@
SourceImageUpload,
TaxaList,
Taxon,
validate_filename_timestamp,
)


Expand Down Expand Up @@ -1013,28 +1011,23 @@ class Meta:

def create(self, validated_data):
# Add the user to the validated data
request = self.context.get("request")
request: Request = self.context["request"]
user = get_current_user(request)
# @TODO IMPORTANT ensure current user is a member of the deployment's project
obj = SourceImageUpload.objects.create(user=user, **validated_data)
process_now = request.data.get("process_now", False)
source_image = create_source_image_from_upload(
obj.image,
obj.deployment,
request,
image=obj.image,
deployment=obj.deployment,
request=request,
process_now=process_now,
)
if source_image is not None:
obj.source_image = source_image # type: ignore
obj.save()
if source_image is None:
raise serializers.ValidationError("Failed to create source image from upload.")
obj.source_image = source_image # type: ignore
obj.save()
return obj

def validate_image(self, value):
# Ensure that image filename contains a timestamp
try:
validate_filename_timestamp(value.name)
except DjangoValidationError as e:
raise serializers.ValidationError(str(e))
return value


class SourceImageCollectionCommonKwargsSerializer(serializers.Serializer):
# The most common kwargs for the sampling methods
Expand Down
33 changes: 33 additions & 0 deletions ami/main/migrations/0061_alter_sourceimagecollection_method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.10 on 2025-07-24 21:26

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("main", "0060_alter_sourceimagecollection_method"),
]

operations = [
migrations.AlterField(
model_name="sourceimagecollection",
name="method",
field=models.CharField(
choices=[
("full", "full"),
("random", "random"),
("stratified_random", "stratified_random"),
("interval", "interval"),
("manual", "manual"),
("starred", "starred"),
("random_from_each_event", "random_from_each_event"),
("last_and_random_from_each_event", "last_and_random_from_each_event"),
("greatest_file_size_from_each_event", "greatest_file_size_from_each_event"),
("detections_only", "detections_only"),
("common_combined", "common_combined"),
],
default="full",
max_length=255,
),
),
]
76 changes: 58 additions & 18 deletions ami/main/models.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import collections
import datetime
import functools
import hashlib
import logging
import textwrap
import time
import typing
import urllib.parse
from io import BytesIO
from typing import Final, final # noqa: F401

import PIL.Image
import pydantic
from django.apps import apps
from django.conf import settings
Expand All @@ -30,11 +31,12 @@
from ami.base.models import BaseModel
from ami.main import charts
from ami.users.models import User
from ami.utils.media import calculate_file_checksum, extract_timestamp
from ami.utils.schemas import OrderedEnum

if typing.TYPE_CHECKING:
from ami.jobs.models import Job
from ami.ml.models import ProcessingService
from ami.ml.models import Pipeline, ProcessingService

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -119,10 +121,16 @@ def get_or_create_default_deployment(


def get_or_create_default_collection(project: "Project") -> "SourceImageCollection":
"""Create a default collection for a project for all images, updated dynamically."""
"""
Create a default collection for a project for all images.

@TODO Consider ways to update this collection automatically. With a query-only collection
or a periodic task that runs the populate_collection method.
"""
collection, _created = SourceImageCollection.objects.get_or_create(
name="All Images",
project=project,
method="full",
)
logger.info(f"Created default collection for project {project}")
return collection
Expand Down Expand Up @@ -213,6 +221,9 @@ class Project(BaseModel):
jobs: models.QuerySet["Job"]
sourceimage_collections: models.QuerySet["SourceImageCollection"]
processing_services: models.QuerySet["ProcessingService"]
pipelines: models.QuerySet["Pipeline"]
sourceimage_collections: models.QuerySet["SourceImageCollection"]
processing_services: models.QuerySet["ProcessingService"]

objects = ProjectManager()

Expand Down Expand Up @@ -1352,35 +1363,64 @@ def validate_filename_timestamp(filename: str) -> None:
raise ValidationError("Image filename does not contain a valid timestamp (e.g. YYYYMMDDHHMMSS-snapshot.jpg).")


def create_source_image_from_upload(image: ImageFieldFile, deployment: Deployment, request=None) -> "SourceImage":
def create_source_image_from_upload(
image: ImageFieldFile,
deployment: Deployment,
request=None,
process_now=True,
) -> "SourceImage":
"""Create a complete SourceImage from an uploaded file."""
# md5 checksum from file
checksum = hashlib.md5(image.read()).hexdigest()
checksum_algorithm = "md5"

# Read file content once
image.seek(0)
file_content = image.read()

# Calculate a checksum for the image content
checksum, checksum_algorithm = calculate_file_checksum(file_content)

# Create PIL image from file content (no additional file reads)
image_stream = BytesIO(file_content)
pil_image = PIL.Image.open(image_stream)

timestamp = extract_timestamp(filename=image.name, image=pil_image)
if not timestamp:
logger.warning(
"A valid timestamp could not be found in the image's EXIF data or filename. "
"Please rename the file to include a timestamp "
"(e.g. YYYYMMDDHHMMSS-snapshot.jpg). "
"Falling back to the current time for the image captured timestamp."
)
timestamp = timezone.now()
width = pil_image.width
height = pil_image.height
size = len(file_content)

# get full public media url of image:
if request:
base_url = request.build_absolute_uri(settings.MEDIA_URL)
else:
base_url = settings.MEDIA_URL

source_image = SourceImage(
source_image = SourceImage.objects.create(
path=image.name, # Includes relative path from MEDIA_ROOT
public_base_url=base_url, # @TODO how to merge this with the data source?
project=deployment.project,
deployment=deployment,
timestamp=None, # Will be calculated from filename or EXIF data on save
timestamp=timestamp,
event=None, # Will be assigned when the image is grouped into events
size=image.size,
size=size,
checksum=checksum,
checksum_algorithm=checksum_algorithm,
width=image.width,
height=image.height,
width=width,
height=height,
test_image=True,
uploaded_by=request.user if request else None,
)
source_image.save()
deployment.save()
deployment.save(regroup_async=False)
if process_now:
from ami.ml.orchestration.processing import process_single_source_image

process_single_source_image(source_image=source_image)
return source_image


Expand All @@ -1397,7 +1437,7 @@ class SourceImageUpload(BaseModel):
The SourceImageViewSet will create a SourceImage from the uploaded file and delete the upload.
"""

image = models.ImageField(upload_to=upload_to_with_deployment, validators=[validate_filename_timestamp])
image = models.ImageField(upload_to=upload_to_with_deployment)
user = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
deployment = models.ForeignKey(Deployment, on_delete=models.CASCADE, related_name="manually_uploaded_captures")
source_image = models.OneToOneField(
Expand Down Expand Up @@ -3110,7 +3150,7 @@ def html(self) -> str:


_SOURCE_IMAGE_SAMPLING_METHODS = [
"common_combined", # Deprecated
"full",
"random",
"stratified_random",
"interval",
Expand All @@ -3120,7 +3160,7 @@ def html(self) -> str:
"last_and_random_from_each_event",
"greatest_file_size_from_each_event",
"detections_only",
"full",
"common_combined", # Deprecated
]


Expand Down Expand Up @@ -3203,7 +3243,7 @@ class SourceImageCollection(BaseModel):
method = models.CharField(
max_length=255,
choices=as_choices(_SOURCE_IMAGE_SAMPLING_METHODS),
default="common_combined",
default="full",
)
# @TODO this should be a JSON field with a schema, use a pydantic model
kwargs = models.JSONField(
Expand Down
28 changes: 28 additions & 0 deletions ami/ml/models/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
if TYPE_CHECKING:
from ami.ml.models import ProcessingService, ProjectPipelineConfig
from ami.jobs.models import Job
from ami.main.models import Project

import collections
import dataclasses
Expand Down Expand Up @@ -886,6 +887,30 @@ class PipelineStage(ConfigurableStage):
"""A configurable stage of a pipeline."""


class PipelineQuerySet(models.QuerySet):
"""Custom QuerySet for Pipeline model."""

def enabled(self, project: Project) -> PipelineQuerySet:
"""
Return pipelines that are enabled for a given project.

# @TODO how can this automatically filter based on the pipeline's projects
# or the current query without having to specify the project? (e.g. with OuterRef?)
"""
return self.filter(
projects=project,
project_pipeline_configs__enabled=True,
project_pipeline_configs__project=project,
).distinct()


class PipelineManager(models.Manager):
"""Custom Manager for Pipeline model."""

def get_queryset(self) -> PipelineQuerySet:
return PipelineQuerySet(self.model, using=self._db)


@typing.final
class Pipeline(BaseModel):
"""A pipeline of algorithms"""
Expand Down Expand Up @@ -917,6 +942,9 @@ class Pipeline(BaseModel):
"and the processing service."
),
)

objects = PipelineManager()

processing_services: models.QuerySet[ProcessingService]
project_pipeline_configs: models.QuerySet[ProjectPipelineConfig]
jobs: models.QuerySet[Job]
Expand Down
1 change: 1 addition & 0 deletions ami/ml/orchestration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .processing import * # noqa: F401, F403
22 changes: 22 additions & 0 deletions ami/ml/orchestration/pipelines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from django.db import models

from ami.main.models import Project
from ami.ml.models.pipeline import Pipeline


def get_default_pipeline(project: Project) -> Pipeline | None:
"""
Select a default pipeline to use for processing images in a project.

This is a placeholder function that selects the pipeline with the most categories
and which is enabled for the project.

@TODO use project settings to determine the default pipeline
"""
return (
Pipeline.objects.all()
.enabled(project=project) # type: ignore
.annotate(num_categories=models.Count("algorithms__category_map__labels"))
.order_by("-num_categories", "-created_at")
.first()
)
45 changes: 45 additions & 0 deletions ami/ml/orchestration/processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import typing

from ami.jobs.models import Job
from ami.ml.models import Pipeline
from ami.ml.orchestration.pipelines import get_default_pipeline

if typing.TYPE_CHECKING:
from ami.main.models import SourceImage


def process_single_source_image(
source_image: "SourceImage",
pipeline: "Pipeline | None" = None,
run_async=True,
) -> "Job":
"""
Process a single SourceImage immediately.
"""

assert source_image.deployment is not None, "SourceImage must belong to a deployment"

if not source_image.event:
source_image.deployment.save(regroup_async=False)
source_image.refresh_from_db()
assert source_image.event is not None, "SourceImage must belong to an event"

project = source_image.project
assert project is not None, "SourceImage must belong to a project"

pipeline_choice = pipeline or get_default_pipeline(project)
assert pipeline_choice is not None, "Project must have a pipeline to run"

# @TODO add images to a queue without creatin a job for each image
job = Job.objects.create(
name=f"Capture #{source_image.pk} ({source_image.timestamp}) from {source_image.deployment.name}",
job_type_key="ml",
source_image_single=source_image,
pipeline=pipeline_choice,
project=project,
)
if run_async:
job.enqueue()
else:
job.run()
return job
Loading