Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
45c1d77
feat: added celery export occurrence task
mohamedelabbas1996 Feb 17, 2025
f6871ea
feat: added export & export_status endpoints
mohamedelabbas1996 Feb 17, 2025
b3e448d
added migration files
mohamedelabbas1996 Feb 17, 2025
bb745f6
fixed migration conflict
mohamedelabbas1996 Feb 17, 2025
518b8df
fix: disabled pagination for export action
mohamedelabbas1996 Feb 18, 2025
b3b4369
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Feb 18, 2025
8d98759
fix: merged migrations
mohamedelabbas1996 Feb 18, 2025
21470b9
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Feb 23, 2025
a8673af
feat: added DataExport Job Type
mohamedelabbas1996 Feb 24, 2025
523d177
Implemented JSON export for occurrence data
mohamedelabbas1996 Mar 4, 2025
ac7cfbc
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 4, 2025
04ab2cf
feat: Added support for csv file format
mohamedelabbas1996 Mar 4, 2025
e4599b9
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 6, 2025
94cc7a3
chore: Moved export actions to a separate view under the exports app
mohamedelabbas1996 Mar 6, 2025
ed3960a
Merge branch 'main' of github.com:RolnickLab/antenna into feat/export…
mihow Mar 7, 2025
c4c9820
chore: ignore unresolvable type errors
mihow Mar 7, 2025
5dbc002
chore: remove dependencies for darwincore export in this PR
mihow Mar 7, 2025
a86a348
fix: use mixin for get_active_project
mihow Mar 7, 2025
57c5905
feat: register export views in api router
mihow Mar 7, 2025
e0df304
feat: Implemented Data Export Framework & Occurrence Exports
mohamedelabbas1996 Mar 10, 2025
8be00cd
Merge branch 'feat/export-occurrences-data' of https://github.com/Rol…
mohamedelabbas1996 Mar 10, 2025
b297a84
feat: Added more fields to the OccurrenceTabularSerializer
mohamedelabbas1996 Mar 11, 2025
d8d3b5d
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 11, 2025
1270fd1
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 17, 2025
44c3ca8
Refactor DataExport Model and API & Admin Integration
mohamedelabbas1996 Mar 17, 2025
95e6e86
Merge branch 'feat/export-occurrences-data' of https://github.com/Rol…
mohamedelabbas1996 Mar 17, 2025
8a02b3d
Removed DataExport status field
mohamedelabbas1996 Mar 17, 2025
c8f5d3e
chore: Raise NotImplemented for abstract methods
mohamedelabbas1996 Mar 17, 2025
349925a
Brought back DataExport file_url field
mohamedelabbas1996 Mar 17, 2025
e0321bd
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 19, 2025
45485b2
Refactor Data Export: Improve Filtering, Naming, and JSON Validity
mohamedelabbas1996 Mar 20, 2025
4105177
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 20, 2025
3c9aca2
Merge branch 'feat/export-occurrences-data' of https://github.com/Rol…
mohamedelabbas1996 Mar 20, 2025
543a142
fix: Added missing migration file
mohamedelabbas1996 Mar 20, 2025
95745d7
fix: Added missing migration file
mohamedelabbas1996 Mar 20, 2025
25896b7
Merge branch 'main' into feat/export-occurrences-data
annavik Mar 21, 2025
f14653f
fix: tweak labels to be sentence case
annavik Mar 21, 2025
43e8835
fix: update CSV export field from verification -> verification_status
annavik Mar 21, 2025
f836bfa
Improve DataExport handling, filtering, and cleanup logic
mohamedelabbas1996 Mar 24, 2025
3d0514a
Merge branch 'feat/export-occurrences-data' of https://github.com/Rol…
mohamedelabbas1996 Mar 24, 2025
1e879e4
test: multiple methods of nesting related obj data for exports
mihow Mar 25, 2025
4d48622
feat: return absolute urls for export files
mihow Mar 25, 2025
747708c
Merge branch 'main' into feat/export-occurrences-data
mihow Mar 25, 2025
2478789
Merge branch 'main' into feat/export-occurrences-data
mohamedelabbas1996 Mar 28, 2025
cd2f57c
Refactor Export Logic and Add Export Stats
mohamedelabbas1996 Mar 28, 2025
4bae6c7
Merge branch 'feat/export-occurrences-data' of https://github.com/Rol…
mohamedelabbas1996 Mar 28, 2025
26181d0
Enhance Export Details
mohamedelabbas1996 Mar 31, 2025
eded961
fix: make summary count consistent with exports
mihow Apr 1, 2025
02dd4b7
feat: update and return total record count before starting export
mihow Apr 1, 2025
058f93e
feat: update total record count before exporting first batch
mihow Apr 2, 2025
b20a851
feat: lower batch size for exports to increase update frequency
mihow Apr 2, 2025
a518a74
chore: reset all migrations to main
mihow Apr 3, 2025
0b06579
chore: recreate migrations
mihow Apr 3, 2025
ee34d2c
chore: moved export format validation logic to the serializer
mohamedelabbas1996 Apr 4, 2025
0900bb0
chore: changed collection filter param name to collection_id
mohamedelabbas1996 Apr 4, 2025
a1eb605
Merge branch 'feat/export-occurrences-data' of https://github.com/Rol…
mohamedelabbas1996 Apr 4, 2025
faeb081
Merge branch 'main' of github.com:RolnickLab/antenna into feat/export…
mihow Apr 8, 2025
6a50eed
chore: fix type hints
mihow Apr 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ami/base/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def add_object_level_permissions(
# Do not return create, view permissions at object-level
filtered_permissions -= {"create", "view"}
permissions.update(filtered_permissions)
response_data["user_permissions"] = permissions
response_data["user_permissions"] = list(permissions)
return response_data


Expand All @@ -86,7 +86,7 @@ def add_collection_level_permissions(user: User | None, response_data: dict, mod

if user and project and f"create_{model.__name__.lower()}" in get_perms(user, project):
permissions.add("create")
response_data["user_permissions"] = permissions
response_data["user_permissions"] = list(permissions)
return response_data


Expand Down
Empty file added ami/exports/__init__.py
Empty file.
64 changes: 64 additions & 0 deletions ami/exports/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from django.contrib import admin
from django.http import HttpRequest

from .models import DataExport


@admin.register(DataExport)
class DataExportAdmin(admin.ModelAdmin):
"""
Admin panel for managing DataExport objects.
"""

list_display = ("id", "user", "format", "status_display", "project", "created_at", "get_job")
list_filter = ("format", "project")
search_fields = ("user__username", "format", "project__name")
readonly_fields = ("status_display", "file_url_display")

fieldsets = (
(
None,
{
"fields": ("user", "format", "project", "filters"),
},
),
(
"Job Info",
{
"fields": ("status_display", "file_url_display"),
"classes": ("collapse",), # This makes job-related fields collapsible in the admin panel
},
),
)

def get_queryset(self, request: HttpRequest):
"""
Optimize queryset by selecting related project and job data.
"""
return super().get_queryset(request).select_related("project", "job")

@admin.display(description="Status")
def status_display(self, obj):
return obj.status # Calls the @property from the model

@admin.display(description="File URL")
def file_url_display(self, obj):
return obj.file_url # Calls the @property from the model

@admin.display(description="Job ID")
def get_job(self, obj):
"""Displays the related job ID or 'No Job' if none exists."""
return obj.job.id if obj.job else "No Job"

@admin.action(description="Run export job")
def run_export_job(self, request: HttpRequest, queryset):
"""
Admin action to trigger the export job manually.
"""
for export in queryset:
if export.job:
export.job.enqueue()

self.message_user(request, f"Started export job for {queryset.count()} export(s).")

actions = [run_export_job]
9 changes: 9 additions & 0 deletions ami/exports/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.apps import AppConfig


class ExportsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually already the default, set by DEFAULT_AUTO_FIELD in settings/base.py

name = "ami.exports"

def ready(self):
import ami.exports.signals # noqa: F401
73 changes: 73 additions & 0 deletions ami/exports/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import logging
import os
from abc import ABC, abstractmethod

from ami.exports.utils import apply_filters

logger = logging.getLogger(__name__)


class BaseExporter(ABC):
"""Base class for all data export handlers."""

file_format = "" # To be defined in child classes
serializer_class = None
filter_backends = []

def __init__(self, data_export):
self.data_export = data_export
self.job = data_export.job if hasattr(data_export, "job") else None
self.project = data_export.project
self.queryset = apply_filters(
queryset=self.get_queryset(), filters=data_export.filters, filter_backends=self.get_filter_backends()
)
self.total_records = self.queryset.count()
if self.job:
self.job.progress.add_stage_param(self.job.job_type_key, "Number of records exported", 0)
self.job.progress.add_stage_param(self.job.job_type_key, "Total records to export", self.total_records)
self.job.save()

@abstractmethod
def export(self):
"""Perform the export process."""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For abstract base class methods, I like to raise NotImplementedError

raise NotImplementedError()

@abstractmethod
def get_queryset(self):
raise NotImplementedError()

def get_serializer_class(self):
return self.serializer_class

def get_filter_backends(self):
from ami.main.api.views import OccurrenceCollectionFilter

return [OccurrenceCollectionFilter]

def update_export_stats(self, file_temp_path=None):
"""
Updates record_count based on queryset and file size after export.
"""
# Set record count from queryset
self.data_export.record_count = self.queryset.count()

# Check if temp file path is provided and update file size

if file_temp_path and os.path.exists(file_temp_path):
self.data_export.file_size = os.path.getsize(file_temp_path)

# Save the updated values
self.data_export.save()

def update_job_progress(self, records_exported):
"""
Updates job progress and record count.
"""
if self.job:
self.job.progress.update_stage(
self.job.job_type_key, progress=round(records_exported / self.total_records, 2)
)
self.job.progress.add_or_update_stage_param(
self.job.job_type_key, "Number of records exported", records_exported
)
self.job.save()
159 changes: 159 additions & 0 deletions ami/exports/format_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import csv
import json
import logging
import tempfile

from django.core.serializers.json import DjangoJSONEncoder
from rest_framework import serializers

from ami.exports.base import BaseExporter
from ami.exports.utils import get_data_in_batches
from ami.main.models import Occurrence

logger = logging.getLogger(__name__)


def get_export_serializer():
from ami.main.api.serializers import OccurrenceSerializer

class OccurrenceExportSerializer(OccurrenceSerializer):
detection_images = serializers.SerializerMethodField()

def get_detection_images(self, obj: Occurrence):
"""Convert the generator field to a list before serialization"""
if hasattr(obj, "detection_images") and callable(obj.detection_images):
return list(obj.detection_images()) # Convert generator to list
return []

def get_permissions(self, instance_data):
return instance_data

def to_representation(self, instance):
return serializers.HyperlinkedModelSerializer.to_representation(self, instance)

return OccurrenceExportSerializer


class JSONExporter(BaseExporter):
"""Handles JSON export of occurrences."""

file_format = "json"

def get_serializer_class(self):
return get_export_serializer()

def get_queryset(self):
return (
Occurrence.objects.filter(project=self.project)
.select_related(
"determination",
"deployment",
"event",
)
.with_timestamps() # type: ignore[union-attr] Custom queryset method
.with_detections_count()
.with_identifications()
)

def export(self):
"""Exports occurrences to JSON format."""
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
with open(temp_file.name, "w", encoding="utf-8") as f:
first = True
f.write("[")
records_exported = 0
for i, batch in enumerate(get_data_in_batches(self.queryset, self.get_serializer_class())):
json_data = json.dumps(batch, cls=DjangoJSONEncoder)
json_data = json_data[1:-1] # remove [ and ] from json string
f.write(",\n" if not first else "")
f.write(json_data)
first = False
records_exported += len(batch)
self.update_job_progress(records_exported)
f.write("]")

self.update_export_stats(file_temp_path=temp_file.name)
return temp_file.name # Return file path


class OccurrenceTabularSerializer(serializers.ModelSerializer):
"""Serializer to format occurrences for tabular data export."""

event_id = serializers.IntegerField(source="event.id", allow_null=True)
event_name = serializers.CharField(source="event.name", allow_null=True)
deployment_id = serializers.IntegerField(source="deployment.id", allow_null=True)
deployment_name = serializers.CharField(source="deployment.name", allow_null=True)
project_id = serializers.IntegerField(source="project.id", allow_null=True)
project_name = serializers.CharField(source="project.name", allow_null=True)

determination_id = serializers.IntegerField(source="determination.id", allow_null=True)
determination_name = serializers.CharField(source="determination.name", allow_null=True)
determination_score = serializers.FloatField(allow_null=True)
verification_status = serializers.SerializerMethodField()

class Meta:
model = Occurrence
fields = [
"id",
"event_id",
"event_name",
"deployment_id",
"deployment_name",
"project_id",
"project_name",
"determination_id",
"determination_name",
"determination_score",
"verification_status",
"detections_count",
"first_appearance_timestamp",
"last_appearance_timestamp",
"duration",
]

def get_verification_status(self, obj):
"""
Returns 'Verified' if the occurrence has identifications, otherwise 'Not verified'.
"""
return "Verified" if obj.identifications.exists() else "Not verified"


class CSVExporter(BaseExporter):
"""Handles CSV export of occurrences."""

file_format = "csv"

serializer_class = OccurrenceTabularSerializer

def get_queryset(self):
return (
Occurrence.objects.filter(project=self.project)
.select_related(
"determination",
"deployment",
"event",
)
.with_timestamps() # type: ignore[union-attr] Custom queryset method
.with_detections_count()
.with_identifications()
)

def export(self):
"""Exports occurrences to CSV format."""

temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The named temporary file may only exist for a short time (even with delete=False). Use the default_storage writer instead. That will write to object store, or local storage. Depending on how the Antenna environment is configured

with default_storage.open(file_path, "w") as file:
writer = csv.writer(file)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see now that you are writing to a tempfile first, and then transferring to the object store. Sorry I missed that!


# Extract field names dynamically from the serializer
serializer = self.serializer_class()
field_names = list(serializer.fields.keys())
records_exported = 0
with open(temp_file.name, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=field_names)
writer.writeheader()

for i, batch in enumerate(get_data_in_batches(self.queryset, self.serializer_class)):
writer.writerows(batch)
records_exported += len(batch)
self.update_job_progress(records_exported)
self.update_export_stats(file_temp_path=temp_file.name)
return temp_file.name # Return the file path
57 changes: 57 additions & 0 deletions ami/exports/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Generated by Django 4.2.10 on 2025-04-02 20:12

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
initial = True

dependencies = [
("main", "0058_alter_project_options"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

operations = [
migrations.CreateModel(
name="DataExport",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"format",
models.CharField(
choices=[
("occurrences_simple_json", "occurrences_simple_json"),
("occurrences_simple_csv", "occurrences_simple_csv"),
],
max_length=255,
),
),
("filters", models.JSONField(blank=True, null=True)),
("filters_display", models.JSONField(blank=True, null=True)),
("file_url", models.URLField(blank=True, null=True)),
("record_count", models.PositiveIntegerField(default=0)),
("file_size", models.PositiveBigIntegerField(default=0)),
(
"project",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, related_name="exports", to="main.project"
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="exports",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"abstract": False,
},
),
]
Empty file.
Loading