Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,12 @@ For each PR made, an entry should be added to this changelog. It should contain
- physics_of_the_cosmos
- stsci_space_telescope_science_institute
- Once the front end has been updated to allow for tag edits, all astrophysics collections will be marked to be run through the pipeline

- 1295-asynchronous-metrics-download-in-admin-panel
- Description: Implemented asynchronous metrics download in Django admin
- Changes:
- Button Addition:Integrated a 'metrics' button
- Task Generation: download_metrics handles button clicks to initiate a Celery task, display a download link and manage redirection.
- Cleanup Mechanism: Cleans up old metrics files in the directory, keeping only current task related files.
- Background Processing: Runs generate_metrics task asynchronously to gather data and generate a CSV in MEDIA_ROOT/metrics/
- File Retrieval: get_metrics_file checks file availability and size, providing a download if ready or status messages for in progress files.
1 change: 1 addition & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ tenacity==8.2.2
tqdm==4.66.3
unidecode==1.3.8
xmltodict==0.13.0
numpy==1.24.3
87 changes: 85 additions & 2 deletions sde_collections/admin.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import csv
import os
import uuid

from django import forms
from django.conf import settings
from django.contrib import admin, messages
from django.http import HttpResponse
from django.http import FileResponse, HttpResponse
from django.urls import path
from django.utils.safestring import mark_safe

from sde_collections.models.delta_patterns import (
DeltaDivisionPattern,
Expand All @@ -15,7 +20,7 @@
from .models.collection_choice_fields import TDAMMTags
from .models.delta_url import CuratedUrl, DeltaUrl, DumpUrl
from .models.pattern import DivisionPattern, IncludePattern, TitlePattern
from .tasks import fetch_full_text, import_candidate_urls_from_api
from .tasks import fetch_full_text, generate_metrics, import_candidate_urls_from_api


def fetch_and_replace_text_for_server(modeladmin, request, queryset, server_name):
Expand Down Expand Up @@ -296,6 +301,84 @@ def included_curated_urls_count(self, obj) -> int:
]
ordering = ("cleaning_order",)

def changelist_view(self, request, extra_context=None):
"""
To add a button for metrics download
"""
extra_context = extra_context or {}
extra_context["show_metrics_button"] = True
extra_context["metrics_url"] = request.path + "metrics/"
return super().changelist_view(request, extra_context=extra_context)

def get_urls(self):
"""
To add custom endpoints for metrics functionality
"""
urls = super().get_urls()
custom_urls = [
path(
"metrics/", self.admin_site.admin_view(self.download_metrics), name="sde_collections_collection_metrics"
),
path(
"metrics/<str:task_id>/",
self.admin_site.admin_view(self.get_metrics_file),
name="sde_collections_get_metrics",
),
]
return custom_urls + urls

def download_metrics(self, request):
"""Custom view that starts metrics generation and returns to collection list"""
task_id = str(uuid.uuid4())
generate_metrics.delay(task_id)

download_url = request.path.rsplit("metrics/", 1)[0] + f"metrics/{task_id}/"

messages.add_message(
request,
messages.INFO,
mark_safe(
f"Metrics generation started. Please wait a moment and then "
f"<a href='{download_url}'>click here to download</a> when ready."
),
)
return HttpResponse(status=303, headers={"Location": request.path.replace("/metrics/", "")})

def get_metrics_file(self, request, task_id):
"""Serve the generated metrics file if it exists and is valid"""

file_path = os.path.join(settings.MEDIA_ROOT, "metrics", f"metrics_{task_id}.csv")

# Create the retry URL
current_url = request.build_absolute_uri()

# Check if file exists and is not empty (minimum size 100 bytes)
if os.path.exists(file_path) and os.path.getsize(file_path) > 100:
response = FileResponse(open(file_path, "rb"), content_type="text/csv")
response["Content-Disposition"] = 'attachment; filename="metrics.csv"'
return response
else:
# Also check if there's a temporary file indicating task is still running
temp_file_path = os.path.join(settings.MEDIA_ROOT, "metrics", f"metrics_{task_id}.tmp")
if os.path.exists(temp_file_path):
messages.add_message(
request,
messages.INFO,
mark_safe(
f"The metrics file is still being generated. "
f"<a href='{current_url}'>Click here to try again</a>."
),
)
else:
messages.add_message(
request,
messages.WARNING,
mark_safe(
f"The metrics file is not ready yet. " f"<a href='{current_url}'>Click here to try again</a>."
),
)
return HttpResponse(status=303, headers={"Location": request.path.replace(f"/metrics/{task_id}/", "")})


@admin.action(description="Exclude URL and all children")
def exclude_pattern(modeladmin, request, queryset):
Expand Down
Loading