RolnickLab · mihow · May 7, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 16, 2025
diff --git a/.github/workflows/test.backend.yml b/.github/workflows/test.backend.yml
@@ -7,11 +7,11 @@ env:
 
 on:
   pull_request:
-    branches: ["master", "main"]
+    branches: ["main", "deployments/*", "releases/*"]
     paths-ignore: ["docs/**", "ui/**"]
 
   push:
-    branches: ["master", "main"]
+    branches: ["main", "deployments/*", "releases/*"]
     paths-ignore: ["docs/**", "ui/**"]
 
 concurrency:

diff --git a/.github/workflows/test.frontend.yml b/.github/workflows/test.frontend.yml
@@ -7,13 +7,13 @@ env:
 
 on:
   pull_request:
-    branches: ["master", "main"]
+    branches: ["main", "deployments/*", "releases/*"]
     paths:
       - "!./**"
       - "ui/**"
 
   push:
-    branches: ["master", "main"]
+    branches: ["main", "deployments/*", "releases/*"]
     paths:
       - "!./**"
       - "ui/**"

diff --git a/ami/jobs/migrations/0017_alter_job_job_type_key.py b/ami/jobs/migrations/0017_alter_job_job_type_key.py
@@ -0,0 +1,29 @@
+# Generated by Django 4.2.10 on 2025-04-24 16:25
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("jobs", "0016_job_data_export_job_params_alter_job_job_type_key"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="job",
+            name="job_type_key",
+            field=models.CharField(
+                choices=[
+                    ("ml", "ML pipeline"),
+                    ("populate_captures_collection", "Populate captures collection"),
+                    ("data_storage_sync", "Data storage sync"),
+                    ("unknown", "Unknown"),
+                    ("data_export", "Data Export"),
+                    ("occurrence_clustering", "Occurrence Feature Clustering"),
+                ],
+                default="unknown",
+                max_length=255,
+                verbose_name="Job Type",
+            ),
+        ),
+    ]
diff --git a/ami/jobs/migrations/0018_alter_job_job_type_key.py b/ami/jobs/migrations/0018_alter_job_job_type_key.py
@@ -0,0 +1,29 @@
+# Generated by Django 4.2.10 on 2025-04-28 11:06
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("jobs", "0017_alter_job_job_type_key"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="job",
+            name="job_type_key",
+            field=models.CharField(
+                choices=[
+                    ("ml", "ML pipeline"),
+                    ("populate_captures_collection", "Populate captures collection"),
+                    ("data_storage_sync", "Data storage sync"),
+                    ("unknown", "Unknown"),
+                    ("data_export", "Data Export"),
+                    ("detection_clustering", "Detection Feature Clustering"),
+                ],
+                default="unknown",
+                max_length=255,
+                verbose_name="Job Type",
+            ),
+        ),
+    ]
diff --git a/ami/jobs/models.py b/ami/jobs/models.py
@@ -400,7 +400,8 @@ def run(cls, job: "Job"):
         total_classifications = 0
 
         config = job.pipeline.get_config(project_id=job.project.pk)
-        chunk_size = config.get("request_source_image_batch_size", 1)
+        chunk_size = config.get("request_source_image_batch_size", 2)
+        # @TODO Ensure only images of the same dimensions are processed in a batch
         chunks = [images[i : i + chunk_size] for i in range(0, image_count, chunk_size)]  # noqa
         request_failed_images = []
 
@@ -639,6 +640,38 @@ def run(cls, job: "Job"):
         job.update_status(JobState.SUCCESS, save=True)
 
 
+class DetectionClusteringJob(JobType):
+    name = "Detection Feature Clustering"
+    key = "detection_clustering"
+
+    @classmethod
+    def run(cls, job: "Job"):
+        job.update_status(JobState.STARTED)
+        job.started_at = datetime.datetime.now()
+        job.finished_at = None
+        job.progress.add_stage(name="Collecting Features", key="feature_collection")
+        job.progress.add_stage("Clustering", key="clustering")
+        job.progress.add_stage("Creating Unknown Taxa", key="create_unknown_taxa")
+        job.save()
+
+        if not job.source_image_collection:
+            raise ValueError("No source image collection provided")
+
+        job.logger.info(f"Clustering detections for collection {job.source_image_collection}")
+        job.update_status(JobState.STARTED)
+        job.started_at = datetime.datetime.now()
+        job.finished_at = None
+        job.save()
+
+        # Call the clustering method
+        job.source_image_collection.cluster_detections(job=job)
+        job.logger.info(f"Finished clustering detections for collection {job.source_image_collection}")
+
+        job.finished_at = datetime.datetime.now()
+        job.update_status(JobState.SUCCESS, save=False)
+        job.save()
+
+
 class UnknownJobType(JobType):
     name = "Unknown"
     key = "unknown"
@@ -648,7 +681,14 @@ def run(cls, job: "Job"):
         raise ValueError(f"Unknown job type '{job.job_type()}'")
 
 
-VALID_JOB_TYPES = [MLJob, SourceImageCollectionPopulateJob, DataStorageSyncJob, UnknownJobType, DataExportJob]
+VALID_JOB_TYPES = [
+    MLJob,
+    SourceImageCollectionPopulateJob,
+    DataStorageSyncJob,
+    UnknownJobType,
+    DataExportJob,
+    DetectionClusteringJob,
+]
 
 
 def get_job_type_by_key(key: str) -> type[JobType] | None:

diff --git a/ami/main/admin.py b/ami/main/admin.py
@@ -6,6 +6,7 @@
 from django.http.request import HttpRequest
 from django.template.defaultfilters import filesizeformat
 from django.utils.formats import number_format
+from django.utils.html import format_html
 from guardian.admin import GuardedModelAdmin
 
 import ami.utils
@@ -220,7 +221,6 @@ def update_calculated_fields(self, request: HttpRequest, queryset: QuerySet[Even
         self.message_user(request, f"Updated {queryset.count()} events.")
 
     list_filter = ("deployment", "project", "start")
-    actions = [update_calculated_fields]
 
 
 @admin.register(SourceImage)
@@ -262,20 +262,27 @@ class ClassificationInline(admin.TabularInline):
     model = Classification
     extra = 0
     fields = (
+        "view_classification",
         "taxon",
         "algorithm",
         "timestamp",
         "terminal",
         "created_at",
     )
     readonly_fields = (
+        "view_classification",
         "taxon",
         "algorithm",
         "timestamp",
         "terminal",
         "created_at",
     )
 
+    @admin.display(description="Classification")
+    def view_classification(self, obj):
+        url = f"/admin/main/classification/{obj.pk}/change/"
+        return format_html('<a href="{}">{}</a>', url, obj.pk)
+
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
         qs = super().get_queryset(request)
         return qs.select_related("taxon", "algorithm", "detection")
@@ -285,20 +292,27 @@ class DetectionInline(admin.TabularInline):
     model = Detection
     extra = 0
     fields = (
+        "view_detection",
         "detection_algorithm",
         "source_image",
         "timestamp",
         "created_at",
         "occurrence",
     )
     readonly_fields = (
+        "view_detection",
         "detection_algorithm",
         "source_image",
         "timestamp",
         "created_at",
         "occurrence",
     )
 
+    @admin.display(description="Detection")
+    def view_detection(self, obj):
+        url = f"/admin/main/detection/{obj.pk}/change/"
+        return format_html('<a href="{}">{}</a>', url, obj.pk)
+
 
 @admin.register(Detection)
 class DetectionAdmin(admin.ModelAdmin[Detection]):
@@ -461,7 +475,7 @@ class TaxonAdmin(admin.ModelAdmin[Taxon]):
         "created_at",
         "updated_at",
     )
-    list_filter = ("lists", "rank", TaxonParentFilter)
+    list_filter = ("unknown_species", "lists", "rank", TaxonParentFilter)
     search_fields = ("name",)
     autocomplete_fields = (
         "parent",
@@ -594,7 +608,48 @@ def populate_collection_async(self, request: HttpRequest, queryset: QuerySet[Sou
             f"Populating {len(queued_tasks)} collection(s) background tasks: {queued_tasks}.",
         )
 
-    actions = [populate_collection, populate_collection_async]
+    @admin.action(description="Create clustering job (but don't run it)")
+    @admin.action()
+    def create_clustering_job(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
+        from ami.jobs.models import DetectionClusteringJob, Job
+
+        for collection in queryset:
+            job = Job.objects.create(
+                name=f"Clustering detections for collection {collection.pk}",
+                project=collection.project,
+                source_image_collection=collection,
+                job_type_key=DetectionClusteringJob.key,
+                params={
+                    "ood_threshold": 0.3,
+                    "algorithm": "agglomerative",
+                    "algorithm_kwargs": {"distance_threshold": 80},
+                    "pca": {"n_components": 384},
+                },
+            )
+            self.message_user(request, f"Created clustering job #{job.pk} for collection #{collection.pk}")
+
+    @admin.action()
+    def cluster_detections(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
+        for collection in queryset:
+            from ami.jobs.models import DetectionClusteringJob, Job
+
+            job = Job.objects.create(
+                name=f"Clustering detections for collection {collection.pk}",
+                project=collection.project,
+                source_image_collection=collection,
+                job_type_key=DetectionClusteringJob.key,
+                params={
+                    "ood_threshold": 0.3,
+                    "algorithm": "agglomerative",
+                    "algorithm_kwargs": {"distance_threshold": 80},
+                    "pca": {"n_components": 384},
+                },
+            )
+            job.enqueue()
+
+        self.message_user(request, f"Clustered {queryset.count()} collection(s).")
+
+    actions = [populate_collection, populate_collection_async, cluster_detections, create_clustering_job]
 
     # Hide images many-to-many field from form. This would list all source images in the database.
     exclude = ("images",)
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -518,6 +518,7 @@ class Meta:
             "last_detected",
             "best_determination_score",
             "cover_image_url",
+            "unknown_species",
             "created_at",
             "updated_at",
         ]
@@ -740,6 +741,8 @@ class Meta:
             "fieldguide_id",
             "cover_image_url",
             "cover_image_credit",
+            "unknown_species",
+            "last_detected",  # @TODO this has performance impact, review
         ]
 
 
@@ -1548,3 +1551,11 @@ class Meta:
             "total_size",
             "last_checked",
         ]
+
+
+class ClusterDetectionsSerializer(serializers.Serializer):
+    ood_threshold = serializers.FloatField(required=False, default=0.0)
+    feature_extraction_algorithm = serializers.CharField(required=False, allow_null=True)
+    algorithm = serializers.CharField(required=False, default="agglomerative")
+    algorithm_kwargs = serializers.DictField(required=False, default={"distance_threshold": 0.5})
+    pca = serializers.DictField(required=False, default={"n_components": 384})
diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -42,6 +42,8 @@
 )
 from ami.base.serializers import FilterParamsSerializer, SingleParamSerializer
 from ami.base.views import ProjectMixin
+from ami.jobs.models import DetectionClusteringJob, Job
+from ami.main.api.serializers import ClusterDetectionsSerializer
 from ami.utils.requests import get_active_classification_threshold, project_id_doc_param
 from ami.utils.storages import ConnectionTestResult
 
@@ -744,6 +746,27 @@ def remove(self, request, pk=None):
             }
         )
 
+    @action(detail=True, methods=["post"], name="cluster detections")
+    def cluster_detections(self, request, pk=None):
+        """
+        Trigger a background job to cluster detections from this collection.
+        """
+
+        collection: SourceImageCollection = self.get_object()
+        serializer = ClusterDetectionsSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+        params = serializer.validated_data
+        job = Job.objects.create(
+            name=f"Clustering detections for collection {collection.pk}",
+            project=collection.project,
+            source_image_collection=collection,
+            job_type_key=DetectionClusteringJob.key,
+            params=params,
+        )
+        job.enqueue()
+        logger.info(f"Triggered clustering job for collection {collection.pk}")
+        return Response({"job_id": job.pk, "project_id": collection.project.pk})
+
     @extend_schema(parameters=[project_id_doc_param])
     def list(self, request, *args, **kwargs):
         return super().list(request, *args, **kwargs)
@@ -1273,8 +1296,7 @@ def get_queryset(self) -> QuerySet:
         project = self.get_active_project()
 
         if project:
-            # Allow showing detail views for unobserved taxa
-            include_unobserved = True
+            include_unobserved = True  # Show detail views for unobserved taxa instead of 404
             if self.action == "list":
                 include_unobserved = self.request.query_params.get("include_unobserved", False)
             qs = self.get_taxa_observed(qs, project, include_unobserved=include_unobserved)

diff --git a/ami/main/migrations/0063_taxon_unknown_species.py b/ami/main/migrations/0063_taxon_unknown_species.py
@@ -0,0 +1,17 @@
+# Generated by Django 4.2.10 on 2025-04-28 11:11
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("main", "0062_classification_ood_score_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="taxon",
+            name="unknown_species",
+            field=models.BooleanField(default=False, help_text="Is this a clustering-generated taxon"),
+        ),
+    ]
diff --git a/..._credit_taxon_cover_image_url_and_more.py → ..._credit_taxon_cover_image_url_and_more.py b/..._credit_taxon_cover_image_url_and_more.py → ..._credit_taxon_cover_image_url_and_more.py
@@ -5,7 +5,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("main", "0059_alter_project_options"),
+        ("main", "0063_taxon_unknown_species"),
     ]
 
     operations = [