Skip to content

Commit f843105

Browse files
authored
Auto-process manually uploaded images (if enabled) (#909)
* feat: clean up when events are regrouped for a deployment * feat: add created & updated at columns to sessions/events list * feat: configure default related models with new projects * feat: allow specifying which pipelines are enabled by default * feat: add denmark/uk model to default pipelines * feat: add tests for default enabled pipelines * chore: rename default station * fix: undefined variables in certain cases * chore: cleanup typos and comments * fix: update default sampling method for collections * Configure default related models for new projects (#905) * feat: configure default related models with new projects * feat: allow specifying which pipelines are enabled by default * feat: add denmark/uk model to default pipelines * feat: add tests for default enabled pipelines * chore: rename default station * Update ami/ml/models/processing_service.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: live processing endpoint URL * feat: process images immediately after uploading (prototype) * feat: read timestamp from EXIF data in special cases * chore: clean up comments and unused * feat: query method to select pipelines enabled for project * feat: process_single_image function in a new home * fix: default pipeline query * feat: fallback to the current datetime for test uploads * chore: disable auto-processing manual uploads by default * fix: select only pipelines with an avail processor for the project * feat: move the create method to the view * fix: allow the current project to be passed in post / form data * feat: require project in source image upload * feat: use project feature flag for auto processing * fix: pass project ID when creating source image in test * fix: separate titles for source images & source image collections * feat: use default pipeline in project settings first
1 parent 5bbc13f commit f843105

File tree

16 files changed

+369
-58
lines changed

16 files changed

+369
-58
lines changed

ami/base/views.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,23 @@ class ProjectMixin:
2222
def get_active_project(self) -> Project:
2323
from ami.base.serializers import SingleParamSerializer
2424

25+
param = "project_id"
26+
2527
project_id = None
2628
# Extract from URL `/projects/` is in the url path
2729
if "/projects/" in self.request.path:
2830
project_id = self.kwargs.get("pk")
2931

3032
# If not in URL, try query parameters
3133
if not project_id:
32-
if self.require_project:
33-
project_id = SingleParamSerializer[int].clean(
34-
param_name="project_id",
35-
field=serializers.IntegerField(required=True, min_value=0),
36-
data=self.request.query_params,
37-
)
38-
else:
39-
project_id = self.request.query_params.get("project_id") # No validation
34+
# Look for project_id in GET query parameters or POST data
35+
# POST data returns a list of ints, but QueryDict.get() returns a single value
36+
project_id = self.request.query_params.get(param) or self.request.data.get(param)
37+
38+
project_id = SingleParamSerializer[int].clean(
39+
param_name=param,
40+
field=serializers.IntegerField(required=self.require_project, min_value=0),
41+
data={param: project_id} if project_id else {},
42+
)
4043

4144
return get_object_or_404(Project, id=project_id) if project_id else None

ami/main/api/serializers.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
import datetime
22

3-
from django.core.exceptions import ValidationError as DjangoValidationError
43
from django.db.models import QuerySet
54
from guardian.shortcuts import get_perms
65
from rest_framework import serializers
76
from rest_framework.request import Request
87

98
from ami.base.fields import DateStringField
10-
from ami.base.serializers import DefaultSerializer, MinimalNestedModelSerializer, get_current_user, reverse_with_params
9+
from ami.base.serializers import DefaultSerializer, MinimalNestedModelSerializer, reverse_with_params
1110
from ami.jobs.models import Job
12-
from ami.main.models import Tag, create_source_image_from_upload
11+
from ami.main.models import Tag
1312
from ami.ml.models import Algorithm, Pipeline
1413
from ami.ml.serializers import AlgorithmSerializer, PipelineNestedSerializer
1514
from ami.users.models import User
@@ -33,7 +32,6 @@
3332
SourceImageUpload,
3433
TaxaList,
3534
Taxon,
36-
validate_filename_timestamp,
3735
)
3836

3937

@@ -1085,30 +1083,6 @@ class Meta:
10851083
"created_at",
10861084
]
10871085

1088-
def create(self, validated_data):
1089-
# Add the user to the validated data
1090-
request = self.context.get("request")
1091-
user = get_current_user(request)
1092-
# @TODO IMPORTANT ensure current user is a member of the deployment's project
1093-
obj = SourceImageUpload.objects.create(user=user, **validated_data)
1094-
source_image = create_source_image_from_upload(
1095-
obj.image,
1096-
obj.deployment,
1097-
request,
1098-
)
1099-
if source_image is not None:
1100-
obj.source_image = source_image # type: ignore
1101-
obj.save()
1102-
return obj
1103-
1104-
def validate_image(self, value):
1105-
# Ensure that image filename contains a timestamp
1106-
try:
1107-
validate_filename_timestamp(value.name)
1108-
except DjangoValidationError as e:
1109-
raise serializers.ValidationError(str(e))
1110-
return value
1111-
11121086

11131087
class SourceImageCollectionCommonKwargsSerializer(serializers.Serializer):
11141088
# The most common kwargs for the sampling methods

ami/main/api/views.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@ class SourceImageUploadViewSet(DefaultViewSet, ProjectMixin):
760760

761761
serializer_class = SourceImageUploadSerializer
762762
permission_classes = [SourceImageUploadCRUDPermission]
763+
require_project = True
763764

764765
def get_queryset(self) -> QuerySet:
765766
# Only allow users to see their own uploads
@@ -772,6 +773,35 @@ def get_queryset(self) -> QuerySet:
772773
# This is the maximum limit for manually uploaded captures
773774
pagination_class.default_limit = 20
774775

776+
def perform_create(self, serializer):
777+
"""
778+
Save the SourceImageUpload with the current user and create the associated SourceImage.
779+
"""
780+
from ami.base.serializers import get_current_user
781+
from ami.main.models import create_source_image_from_upload
782+
783+
# Get current user from request
784+
user = get_current_user(self.request)
785+
project = self.get_active_project()
786+
787+
# Create the SourceImageUpload object with the user
788+
obj = serializer.save(user=user)
789+
790+
# Get process_now flag from project feature flags
791+
process_now = project.feature_flags.auto_processs_manual_uploads
792+
793+
# Create source image from the upload
794+
source_image = create_source_image_from_upload(
795+
image=obj.image,
796+
deployment=obj.deployment,
797+
request=self.request,
798+
process_now=process_now,
799+
)
800+
801+
# Update the source_image reference and save
802+
obj.source_image = source_image
803+
obj.save()
804+
775805

776806
class DetectionViewSet(DefaultViewSet, ProjectMixin):
777807
"""
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Generated by Django 4.2.10 on 2025-08-08 21:53
2+
3+
import ami.main.models
4+
from django.db import migrations, models
5+
import django_pydantic_field.fields
6+
7+
8+
class Migration(migrations.Migration):
9+
dependencies = [
10+
("main", "0065_project_default_filters_exclude_taxa_and_more"),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name="project",
16+
name="feature_flags",
17+
field=django_pydantic_field.fields.PydanticSchemaField(
18+
blank=True,
19+
config=None,
20+
default={"auto_processs_manual_uploads": False, "tags": False},
21+
schema=ami.main.models.ProjectFeatureFlags,
22+
),
23+
),
24+
migrations.AlterField(
25+
model_name="sourceimageupload",
26+
name="image",
27+
field=models.ImageField(upload_to=ami.main.models.upload_to_with_deployment),
28+
),
29+
]

ami/main/models.py

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import collections
22
import datetime
33
import functools
4-
import hashlib
54
import logging
65
import textwrap
76
import time
87
import typing
98
import urllib.parse
9+
from io import BytesIO
1010
from typing import Final, final # noqa: F401
1111

12+
import PIL.Image
1213
import pydantic
1314
from django.apps import apps
1415
from django.conf import settings
@@ -31,11 +32,12 @@
3132
from ami.main import charts
3233
from ami.main.models_future.projects import ProjectSettingsMixin
3334
from ami.users.models import User
35+
from ami.utils.media import calculate_file_checksum, extract_timestamp
3436
from ami.utils.schemas import OrderedEnum
3537

3638
if typing.TYPE_CHECKING:
3739
from ami.jobs.models import Job
38-
from ami.ml.models import ProcessingService
40+
from ami.ml.models import Pipeline, ProcessingService
3941

4042
logger = logging.getLogger(__name__)
4143

@@ -120,12 +122,16 @@ def get_or_create_default_deployment(
120122

121123

122124
def get_or_create_default_collection(project: "Project") -> "SourceImageCollection":
123-
"""Create a default collection for a project for all images, updated dynamically."""
125+
"""
126+
Create a default collection for a project for all images.
127+
128+
@TODO Consider ways to update this collection automatically. With a query-only collection
129+
or a periodic task that runs the populate_collection method.
130+
"""
124131
collection, _created = SourceImageCollection.objects.get_or_create(
125132
name="All Images",
126133
project=project,
127134
method="full",
128-
# @TODO make this a dynamic collection that updates automatically
129135
)
130136
logger.info(f"Created default collection for project {project}")
131137
return collection
@@ -196,6 +202,7 @@ class ProjectFeatureFlags(pydantic.BaseModel):
196202
"""
197203

198204
tags: bool = False # Whether the project supports tagging taxa
205+
auto_processs_manual_uploads: bool = False # Whether to automatically process uploaded images
199206

200207

201208
default_feature_flags = ProjectFeatureFlags()
@@ -233,6 +240,7 @@ class Project(ProjectSettingsMixin, BaseModel):
233240
jobs: models.QuerySet["Job"]
234241
sourceimage_collections: models.QuerySet["SourceImageCollection"]
235242
processing_services: models.QuerySet["ProcessingService"]
243+
pipelines: models.QuerySet["Pipeline"]
236244
tags: models.QuerySet["Tag"]
237245

238246
objects = ProjectManager()
@@ -1373,35 +1381,64 @@ def validate_filename_timestamp(filename: str) -> None:
13731381
raise ValidationError("Image filename does not contain a valid timestamp (e.g. YYYYMMDDHHMMSS-snapshot.jpg).")
13741382

13751383

1376-
def create_source_image_from_upload(image: ImageFieldFile, deployment: Deployment, request=None) -> "SourceImage":
1384+
def create_source_image_from_upload(
1385+
image: ImageFieldFile,
1386+
deployment: Deployment,
1387+
request=None,
1388+
process_now=True,
1389+
) -> "SourceImage":
13771390
"""Create a complete SourceImage from an uploaded file."""
1378-
# md5 checksum from file
1379-
checksum = hashlib.md5(image.read()).hexdigest()
1380-
checksum_algorithm = "md5"
1391+
1392+
# Read file content once
1393+
image.seek(0)
1394+
file_content = image.read()
1395+
1396+
# Calculate a checksum for the image content
1397+
checksum, checksum_algorithm = calculate_file_checksum(file_content)
1398+
1399+
# Create PIL image from file content (no additional file reads)
1400+
image_stream = BytesIO(file_content)
1401+
pil_image = PIL.Image.open(image_stream)
1402+
1403+
timestamp = extract_timestamp(filename=image.name, image=pil_image)
1404+
if not timestamp:
1405+
logger.warning(
1406+
"A valid timestamp could not be found in the image's EXIF data or filename. "
1407+
"Please rename the file to include a timestamp "
1408+
"(e.g. YYYYMMDDHHMMSS-snapshot.jpg). "
1409+
"Falling back to the current time for the image captured timestamp."
1410+
)
1411+
timestamp = timezone.now()
1412+
width = pil_image.width
1413+
height = pil_image.height
1414+
size = len(file_content)
13811415

13821416
# get full public media url of image:
13831417
if request:
13841418
base_url = request.build_absolute_uri(settings.MEDIA_URL)
13851419
else:
13861420
base_url = settings.MEDIA_URL
13871421

1388-
source_image = SourceImage(
1422+
source_image = SourceImage.objects.create(
13891423
path=image.name, # Includes relative path from MEDIA_ROOT
13901424
public_base_url=base_url, # @TODO how to merge this with the data source?
13911425
project=deployment.project,
13921426
deployment=deployment,
1393-
timestamp=None, # Will be calculated from filename or EXIF data on save
1427+
timestamp=timestamp,
13941428
event=None, # Will be assigned when the image is grouped into events
1395-
size=image.size,
1429+
size=size,
13961430
checksum=checksum,
13971431
checksum_algorithm=checksum_algorithm,
1398-
width=image.width,
1399-
height=image.height,
1432+
width=width,
1433+
height=height,
14001434
test_image=True,
14011435
uploaded_by=request.user if request else None,
14021436
)
1403-
source_image.save()
1404-
deployment.save()
1437+
deployment.save(regroup_async=False)
1438+
if process_now:
1439+
from ami.ml.orchestration.processing import process_single_source_image
1440+
1441+
process_single_source_image(source_image=source_image)
14051442
return source_image
14061443

14071444

@@ -1418,7 +1455,7 @@ class SourceImageUpload(BaseModel):
14181455
The SourceImageViewSet will create a SourceImage from the uploaded file and delete the upload.
14191456
"""
14201457

1421-
image = models.ImageField(upload_to=upload_to_with_deployment, validators=[validate_filename_timestamp])
1458+
image = models.ImageField(upload_to=upload_to_with_deployment)
14221459
user = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
14231460
deployment = models.ForeignKey(Deployment, on_delete=models.CASCADE, related_name="manually_uploaded_captures")
14241461
source_image = models.OneToOneField(

ami/main/tests.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1618,7 +1618,11 @@ def _test_sourceimageupload_permissions(self, user, permission_map):
16181618
# --- Test Create ---
16191619
response = self.client.post(
16201620
list_url,
1621-
{"image": self._create_source_image_upload_file(), "deployment": self.deployment.id},
1621+
{
1622+
"image": self._create_source_image_upload_file(),
1623+
"deployment": self.deployment.pk,
1624+
"project_id": self.project.pk,
1625+
},
16221626
format="multipart",
16231627
)
16241628

ami/ml/models/pipeline.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
if TYPE_CHECKING:
66
from ami.ml.models import ProcessingService, ProjectPipelineConfig
77
from ami.jobs.models import Job
8+
from ami.main.models import Project
89

910
import collections
1011
import dataclasses
@@ -886,6 +887,40 @@ class PipelineStage(ConfigurableStage):
886887
"""A configurable stage of a pipeline."""
887888

888889

890+
class PipelineQuerySet(models.QuerySet):
891+
"""Custom QuerySet for Pipeline model."""
892+
893+
def enabled(self, project: Project) -> PipelineQuerySet:
894+
"""
895+
Return pipelines that are enabled for a given project.
896+
897+
# @TODO how can this automatically filter based on the pipeline's projects
898+
# or the current query without having to specify the project? (e.g. with OuterRef?)
899+
"""
900+
return self.filter(
901+
projects=project,
902+
project_pipeline_configs__enabled=True,
903+
project_pipeline_configs__project=project,
904+
processing_services__projects=project,
905+
).distinct()
906+
907+
def online(self, project: Project) -> PipelineQuerySet:
908+
"""
909+
Return pipelines that are available at least one online processing service.
910+
"""
911+
return self.filter(
912+
processing_services__projects=project,
913+
processing_services__last_checked_live=True,
914+
).distinct()
915+
916+
917+
class PipelineManager(models.Manager):
918+
"""Custom Manager for Pipeline model."""
919+
920+
def get_queryset(self) -> PipelineQuerySet:
921+
return PipelineQuerySet(self.model, using=self._db)
922+
923+
889924
@typing.final
890925
class Pipeline(BaseModel):
891926
"""A pipeline of algorithms"""
@@ -917,6 +952,9 @@ class Pipeline(BaseModel):
917952
"and the processing service."
918953
),
919954
)
955+
956+
objects = PipelineManager()
957+
920958
processing_services: models.QuerySet[ProcessingService]
921959
project_pipeline_configs: models.QuerySet[ProjectPipelineConfig]
922960
jobs: models.QuerySet[Job]

ami/ml/orchestration/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .processing import * # noqa: F401, F403

0 commit comments

Comments
 (0)