From cd7bf037f5d67dfd77b0fa1b508eb3ef1fc53f00 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Mon, 31 Mar 2025 13:56:59 -0500
Subject: [PATCH 1/5] Metrics Download Feature_01

---
 sde_collections/admin.py                      |  73 ++++-
 sde_collections/tasks.py                      | 268 ++++++++++++++++++
 .../collection/change_list.html               |  15 +
 3 files changed, 354 insertions(+), 2 deletions(-)
 create mode 100644 sde_collections/templates/admin/sde_collections/collection/change_list.html
diff --git a/sde_collections/admin.py b/sde_collections/admin.py
index 766d8f3f..8f897eea 100644
--- a/sde_collections/admin.py
+++ b/sde_collections/admin.py
@@ -2,7 +2,12 @@
 
 from django import forms
 from django.contrib import admin, messages
-from django.http import HttpResponse
+from django.http import HttpResponse, FileResponse
+from django.utils.safestring import mark_safe
+from django.urls import path
+import uuid
+import os
+from django.conf import settings
 
 from sde_collections.models.delta_patterns import (
     DeltaDivisionPattern,
@@ -15,7 +20,7 @@
 from .models.collection_choice_fields import TDAMMTags
 from .models.delta_url import CuratedUrl, DeltaUrl, DumpUrl
 from .models.pattern import DivisionPattern, IncludePattern, TitlePattern
-from .tasks import fetch_full_text, import_candidate_urls_from_api
+from .tasks import fetch_full_text, import_candidate_urls_from_api, generate_metrics
 
 
 def fetch_and_replace_text_for_server(modeladmin, request, queryset, server_name):
@@ -295,6 +300,70 @@ def included_curated_urls_count(self, obj) -> int:
         fetch_full_text_xli_action,
     ]
     ordering = ("cleaning_order",)
+    
+    def changelist_view(self, request, extra_context=None):
+        """
+        To add a button for metrics download
+        """
+        extra_context = extra_context or {}
+        extra_context['show_metrics_button'] = True
+        extra_context['metrics_url'] = request.path + 'metrics/'
+        return super().changelist_view(request, extra_context=extra_context)
+    
+    def get_urls(self):
+        """
+        To add custom endpoints for metrics functionality
+        """
+        urls = super().get_urls()
+        custom_urls = [
+            path('metrics/', self.admin_site.admin_view(self.download_metrics), name='sde_collections_collection_metrics'),
+            path('metrics/<str:task_id>/', self.admin_site.admin_view(self.get_metrics_file), name='sde_collections_get_metrics'),
+        ]
+        return custom_urls + urls
+    
+    def download_metrics(self, request):
+        """Custom view that starts metrics generation and returns to collection list"""
+        task_id = str(uuid.uuid4())
+        task = generate_metrics.delay(task_id)
+        
+        download_url = request.path.rsplit('metrics/', 1)[0] + f'metrics/{task_id}/'
+        
+        messages.add_message(
+            request,
+            messages.INFO,
+            mark_safe(f"Metrics generation started. Please wait a moment and then <a href='{download_url}'>click here to download</a> when ready.")
+        )
+        return HttpResponse(status=303, headers={"Location": request.path.replace('/metrics/', '')})
+    
+    def get_metrics_file(self, request, task_id):
+        """Serve the generated metrics file if it exists and is valid"""
+        
+        file_path = os.path.join(settings.MEDIA_ROOT, 'metrics', f'metrics_{task_id}.csv')
+        
+        # Create the retry URL
+        current_url = request.build_absolute_uri()
+        
+        # Check if file exists and is not empty (minimum size 100 bytes)
+        if os.path.exists(file_path) and os.path.getsize(file_path) > 100:
+            response = FileResponse(open(file_path, 'rb'), content_type='text/csv')
+            response['Content-Disposition'] = 'attachment; filename="metrics.csv"'
+            return response
+        else:
+            # Also check if there's a temporary file indicating task is still running
+            temp_file_path = os.path.join(settings.MEDIA_ROOT, 'metrics', f'metrics_{task_id}.tmp')
+            if os.path.exists(temp_file_path):
+                messages.add_message(
+                    request,
+                    messages.INFO,
+                    mark_safe(f"The metrics file is still being generated. <a href='{current_url}'>Click here to try again</a>.")
+                )
+            else:
+                messages.add_message(
+                    request,
+                    messages.WARNING,
+                    mark_safe(f"The metrics file is not ready yet. <a href='{current_url}'>Click here to try again</a>.")
+                )
+            return HttpResponse(status=303, headers={"Location": request.path.replace(f'/metrics/{task_id}/', '')})
 
 
 @admin.action(description="Exclude URL and all children")
diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py
index 98f26a34..2ca2de06 100644
--- a/sde_collections/tasks.py
+++ b/sde_collections/tasks.py
@@ -2,6 +2,9 @@
 import json
 import os
 import shutil
+import csv
+import requests
+import numpy as np
 
 import boto3
 from django.apps import apps
@@ -229,3 +232,268 @@ def migrate_dump_to_delta_and_handle_status_transistions(collection_id):
         collection.save()
 
     return f"Successfully migrated DumpUrls to DeltaUrls for collection {collection.name}."
+
+
+@celery_app.task(name="generate_metrics")
+def generate_metrics(task_id):
+    """
+    To asynchronously generate metrics
+    """
+    try:
+        # Generate a file path in the media directory
+        metrics_dir = os.path.join(settings.MEDIA_ROOT, 'metrics')
+        os.makedirs(metrics_dir, exist_ok=True)
+        
+        # Use a temporary file during generation
+        temp_file_path = os.path.join(metrics_dir, f'metrics_{task_id}.tmp')
+        final_file_path = os.path.join(metrics_dir, f'metrics_{task_id}.csv')
+        
+        # Initialize common variables
+        divArr = ['/Astrophysics/','/Biological and Physical Sciences/','/Earth Science/','/Heliophysics/','/Planetary Science/']
+        docArr = ['Data','Images','Documentation','Software and Tools','Missions and Instruments']
+        
+        url = "https://sciencediscoveryengine.nasa.gov/api/v1/search.query"
+        
+        # Create a temporary buffer for CSV data
+        with open(temp_file_path, 'w', newline='') as csvfile:
+            writer = csv.writer(csvfile)
+            
+            # SECTION 1: DOCUMENT COUNT METRICS
+            writer.writerow(['SECTION 1: DOCUMENT COUNT BY DIVISION AND DOCUMENT TYPE'])
+            writer.writerow([])
+            
+            # Initialize document count array
+            recCounter = np.zeros((6,5))
+            
+            # Process documents through API
+            pageCount = 1
+            pageSize = 1000
+            
+            print("Fetching document count metrics...")
+            
+            while True:
+                payload = {
+                    "app": 'nasa-sba-smd',
+                    "query": {
+                        "name": 'query-smd-primary',
+                        "page": pageCount,
+                        "pageSize": pageSize,
+                        "scope": 'All',
+                        "advanced": {},
+                    },
+                }
+                
+                print(f"Processing page {pageCount} for document count")
+                response_data = requests.post(url, headers={}, json=payload, verify=False).json()
+                
+                num_records = len(response_data.get('records', []))
+                if num_records == 0:
+                    break
+                    
+                for record in response_data['records']:
+                    if 'sourcestr56' in record:
+                        # Extract division and document type
+                        division = record.get('treepath', [''])[0]
+                        doctype = record['sourcestr56']
+                        
+                        # Update counters
+                        for j in range(0, len(docArr)):
+                            for k in range(0, len(divArr)):
+                                if doctype == docArr[j]:
+                                    if divArr[k] in division:
+                                        recCounter[k, j] += 1
+                                        recCounter[5, j] += 1
+                
+                pageCount += 1
+            
+            # Write document count metrics
+            writer.writerow(['Division', 'Document Type', 'Count'])
+            
+            # Add All Divisions data
+            writer.writerow(['All Divisions', 'Data', str(int(recCounter[5, 0]))])
+            writer.writerow(['All Divisions', 'Images', str(int(recCounter[5, 1]))])
+            writer.writerow(['All Divisions', 'Documentation', str(int(recCounter[5, 2]))])
+            writer.writerow(['All Divisions', 'Software and Tools', str(int(recCounter[5, 3]))])
+            writer.writerow(['All Divisions', 'Missions and Instruments', str(int(recCounter[5, 4]))])
+            writer.writerow(['All Divisions', 'Total', str(int(np.sum(recCounter[5, :])))])
+            
+            # Add data for each division
+            division_names = ['Astrophysics', 'Biological and Physical Sciences', 'Earth Science', 'Heliophysics', 'Planetary Science']
+            for i, div_name in enumerate(division_names):
+                writer.writerow([div_name, 'Data', str(int(recCounter[i, 0]))])
+                writer.writerow([div_name, 'Images', str(int(recCounter[i, 1]))])
+                writer.writerow([div_name, 'Documentation', str(int(recCounter[i, 2]))])
+                writer.writerow([div_name, 'Software and Tools', str(int(recCounter[i, 3]))])
+                writer.writerow([div_name, 'Missions and Instruments', str(int(recCounter[i, 4]))])
+                writer.writerow([div_name, 'Total', str(int(np.sum(recCounter[i, :])))])
+            
+            # SECTION 2: SOURCES COUNT
+            writer.writerow([])
+            writer.writerow(['SECTION 2: SOURCES COUNT BY DIVISION'])
+            writer.writerow([])
+            
+            # Initialize sources and divisions lists
+            sources = []
+            divs = []
+            
+            # Reset page counter for new query
+            pageCount = 1
+            
+            print("Fetching sources count metrics...")
+            
+            while True:
+                payload = {
+                    "app": 'nasa-sba-smd',
+                    "query": {
+                        "name": 'query-smd-primary',
+                        "page": pageCount,
+                        "pageSize": pageSize,
+                        "scope": 'All',
+                        "advanced": {},
+                    },
+                }
+                
+                print(f"Processing page {pageCount} for sources count")
+                response_data = requests.post(url, headers={}, json=payload, verify=False).json()
+                
+                num_records = len(response_data.get('records', []))
+                if num_records == 0:
+                    break
+                    
+                for record in response_data['records']:
+                    if 'sourcestr56' in record:
+                        source_name = record.get('collection', [''])[0]
+                        division = record.get('treepath', [''])[0]
+                        
+                        if source_name not in sources:
+                            sources.append(source_name)
+                            divs.append(division)
+                
+                pageCount += 1
+            
+            # Count sources by division
+            division_counts = {div: 0 for div in divArr}
+            total_count = len(sources)
+            
+            for i, div_path in enumerate(divs):
+                for div in divArr:
+                    if div in div_path:
+                        division_counts[div] += 1
+            
+            # Write sources count metrics
+            writer.writerow(['Division', 'Count'])
+            
+            for div in divArr:
+                writer.writerow([div.strip('/'), str(division_counts[div])])
+            
+            writer.writerow(['Total', str(total_count)])
+            
+            # SECTION 3: DOCUMENTS BY SOURCE
+            writer.writerow([])
+            writer.writerow(['SECTION 3: DOCUMENTS BY SOURCE'])
+            writer.writerow([])
+            
+            # List of new sources to track
+            new_sources = [
+                "AIM: Aeronomy of Ice in the Mesosphere",
+                "ASDC: Atmospheric Science Data Center",
+                "G-LiHT",
+                "GENESIS: Global Environmental & Earth Science Information System",
+                "HyTES: Hyperspectral Thermal Emission Spectrometer",
+                "IBM-NASA Prithvi Models Family",
+                "COMET ASTEROID TELESCOPIC CATALOG HUNTER",
+                "Escape and Plasma Acceleration and Dynamics Explorers (ESCAPADE)",
+                "Extreme Ultraviolet Variability Experiment (EVE)",
+                "Crustal Dynamics Data Information System",
+                "Aura Atmospheric Chemistry",
+                "LDOPE: Land Data Operational Products Evaluation",
+                "CPL: Cloud Physics Lidar",
+                "Direct Readout Laboratory",
+                "Center for Near Earth Object Studies (CNEOS)"
+            ]
+            
+            # Initialize counter array for source documents
+            nsCounter = np.zeros((len(new_sources), 5, 5))
+            
+            # Reset page counter for new query
+            pageCount = 1
+            
+            print("Fetching source documents metrics...")
+            
+            while True:
+                payload = {
+                    "app": 'nasa-sba-smd',
+                    "query": {
+                        "name": 'query-smd-primary',
+                        "page": pageCount,
+                        "pageSize": pageSize,
+                        "scope": 'All',
+                        "advanced": {},
+                    },
+                }
+                
+                print(f"Processing page {pageCount} for source documents")
+                response_data = requests.post(url, headers={}, json=payload, verify=False).json()
+                
+                num_records = len(response_data.get('records', []))
+                if num_records == 0:
+                    break
+                    
+                for record in response_data['records']:
+                    if 'sourcestr56' in record:
+                        source_name = record.get('treepath', [''])[0]
+                        division = record.get('treepath', [''])[0]
+                        doc_type = record.get('sourcestr56', '')
+                        
+                        for n, new_source in enumerate(new_sources):
+                            if new_source in source_name:
+                                for j, doc in enumerate(docArr):
+                                    if doc_type == doc:
+                                        for k, div in enumerate(divArr):
+                                            if div in division:
+                                                nsCounter[n, k, j] += 1
+                
+                pageCount += 1
+            
+            # Write source documents metrics
+            for n, source in enumerate(new_sources):
+                total_source_docs = np.sum(nsCounter[n, :, :])
+                writer.writerow([f'Source: {source}'])
+                writer.writerow(['The total number of new documents:', str(int(total_source_docs))])
+                
+                for j, doc_type in enumerate(docArr):
+                    doc_count = np.sum(nsCounter[n, :, j])
+                    writer.writerow([f'The total number of new {doc_type.lower()} documents:', str(int(doc_count))])
+                writer.writerow([])
+            
+            writer.writerow(['DIVISION BREAKDOWN'])
+            writer.writerow([])
+            
+            for d, division in enumerate(divArr):
+                div_name = division.strip('/')
+                total_docs = np.sum(nsCounter[:, d, :])
+                writer.writerow([f'Division: {div_name}'])
+                writer.writerow(['The total number of new documents:', str(int(total_docs))])
+                
+                for t, doc_type in enumerate(docArr):
+                    doc_count = np.sum(nsCounter[:, d, t])
+                    writer.writerow([f'The total number of new {docArr[t].lower()} documents:', str(int(doc_count))])
+                writer.writerow([])
+            
+            total_overall = np.sum(nsCounter[:, :, :])
+            writer.writerow(['All Divisions'])
+            writer.writerow(['The total number of new documents:', str(int(total_overall))])
+            
+            for t, doc_type in enumerate(docArr):
+                total_doc_type = np.sum(nsCounter[:, :, t])
+                writer.writerow([f'The total number of new {docArr[t].lower()} documents:', str(int(total_doc_type))])
+        
+        # Move the file to its final location when completed
+        shutil.move(temp_file_path, final_file_path)
+        
+        print(f"Metrics generation complete. File saved to {final_file_path}")
+        return True
+        
+    except Exception as e:
+        print(f"Error in generate_metrics: {str(e)}")
+        return False
diff --git a/sde_collections/templates/admin/sde_collections/collection/change_list.html b/sde_collections/templates/admin/sde_collections/collection/change_list.html
new file mode 100644
index 00000000..0913672f
--- /dev/null
+++ b/sde_collections/templates/admin/sde_collections/collection/change_list.html
@@ -0,0 +1,15 @@
+{% extends "admin/change_list.html" %}
+{% load i18n %}
+
+{% block object-tools %}
+    <ul class="object-tools">
+        {% if show_metrics_button %}
+        <li>
+            <a href="{{ metrics_url }}">Download Metrics</a>
+        </li>
+        {% endif %}
+        {% block object-tools-items %}
+            {{ block.super }}
+        {% endblock %}
+    </ul>
+{% endblock %} 

From 80fd262bacb319abe64eebbb3136f97a0f21cbe0 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Mon, 31 Mar 2025 15:08:18 -0500
Subject: [PATCH 2/5] Added Cleanup Mechanism

---
 sde_collections/tasks.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py
index 2ca2de06..b238abcc 100644
--- a/sde_collections/tasks.py
+++ b/sde_collections/tasks.py
@@ -237,13 +237,25 @@ def migrate_dump_to_delta_and_handle_status_transistions(collection_id):
 @celery_app.task(name="generate_metrics")
 def generate_metrics(task_id):
     """
-    To asynchronously generate metrics
+    Asynchronously generate metrics and save to a downloadable file
     """
     try:
         # Generate a file path in the media directory
         metrics_dir = os.path.join(settings.MEDIA_ROOT, 'metrics')
         os.makedirs(metrics_dir, exist_ok=True)
         
+        # Clean up old metrics files
+        for filename in os.listdir(metrics_dir):
+            if filename.startswith('metrics_') and (filename.endswith('.csv') or filename.endswith('.tmp')):
+                # Skip the current task's files
+                if not filename.startswith(f'metrics_{task_id}'):
+                    file_path = os.path.join(metrics_dir, filename)
+                    try:
+                        os.remove(file_path)
+                        print(f"Deleted old metrics file: {filename}")
+                    except Exception as e:
+                        print(f"Failed to delete {filename}: {str(e)}")
+        
         # Use a temporary file during generation
         temp_file_path = os.path.join(metrics_dir, f'metrics_{task_id}.tmp')
         final_file_path = os.path.join(metrics_dir, f'metrics_{task_id}.csv')

From b7e666cabec9ef8ed9ef5281e2b8a58bbbd9ae87 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 31 Mar 2025 20:10:27 +0000
Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 sde_collections/admin.py                      |  70 +++--
 sde_collections/tasks.py                      | 258 +++++++++---------
 .../collection/change_list.html               |   2 +-
 3 files changed, 177 insertions(+), 153 deletions(-)

diff --git a/sde_collections/admin.py b/sde_collections/admin.py
index 8f897eea..0f51135f 100644
--- a/sde_collections/admin.py
+++ b/sde_collections/admin.py
@@ -1,13 +1,13 @@
 import csv
+import os
+import uuid
 
 from django import forms
+from django.conf import settings
 from django.contrib import admin, messages
-from django.http import HttpResponse, FileResponse
-from django.utils.safestring import mark_safe
+from django.http import FileResponse, HttpResponse
 from django.urls import path
-import uuid
-import os
-from django.conf import settings
+from django.utils.safestring import mark_safe
 
 from sde_collections.models.delta_patterns import (
     DeltaDivisionPattern,
@@ -20,7 +20,7 @@
 from .models.collection_choice_fields import TDAMMTags
 from .models.delta_url import CuratedUrl, DeltaUrl, DumpUrl
 from .models.pattern import DivisionPattern, IncludePattern, TitlePattern
-from .tasks import fetch_full_text, import_candidate_urls_from_api, generate_metrics
+from .tasks import fetch_full_text, generate_metrics, import_candidate_urls_from_api
 
 
 def fetch_and_replace_text_for_server(modeladmin, request, queryset, server_name):
@@ -300,70 +300,82 @@ def included_curated_urls_count(self, obj) -> int:
         fetch_full_text_xli_action,
     ]
     ordering = ("cleaning_order",)
-    
+
     def changelist_view(self, request, extra_context=None):
         """
         To add a button for metrics download
         """
         extra_context = extra_context or {}
-        extra_context['show_metrics_button'] = True
-        extra_context['metrics_url'] = request.path + 'metrics/'
+        extra_context["show_metrics_button"] = True
+        extra_context["metrics_url"] = request.path + "metrics/"
         return super().changelist_view(request, extra_context=extra_context)
-    
+
     def get_urls(self):
         """
         To add custom endpoints for metrics functionality
         """
         urls = super().get_urls()
         custom_urls = [
-            path('metrics/', self.admin_site.admin_view(self.download_metrics), name='sde_collections_collection_metrics'),
-            path('metrics/<str:task_id>/', self.admin_site.admin_view(self.get_metrics_file), name='sde_collections_get_metrics'),
+            path(
+                "metrics/", self.admin_site.admin_view(self.download_metrics), name="sde_collections_collection_metrics"
+            ),
+            path(
+                "metrics/<str:task_id>/",
+                self.admin_site.admin_view(self.get_metrics_file),
+                name="sde_collections_get_metrics",
+            ),
         ]
         return custom_urls + urls
-    
+
     def download_metrics(self, request):
         """Custom view that starts metrics generation and returns to collection list"""
         task_id = str(uuid.uuid4())
         task = generate_metrics.delay(task_id)
-        
-        download_url = request.path.rsplit('metrics/', 1)[0] + f'metrics/{task_id}/'
-        
+
+        download_url = request.path.rsplit("metrics/", 1)[0] + f"metrics/{task_id}/"
+
         messages.add_message(
             request,
             messages.INFO,
-            mark_safe(f"Metrics generation started. Please wait a moment and then <a href='{download_url}'>click here to download</a> when ready.")
+            mark_safe(
+                f"Metrics generation started. Please wait a moment and then <a href='{download_url}'>click here to download</a> when ready."
+            ),
         )
-        return HttpResponse(status=303, headers={"Location": request.path.replace('/metrics/', '')})
-    
+        return HttpResponse(status=303, headers={"Location": request.path.replace("/metrics/", "")})
+
     def get_metrics_file(self, request, task_id):
         """Serve the generated metrics file if it exists and is valid"""
-        
-        file_path = os.path.join(settings.MEDIA_ROOT, 'metrics', f'metrics_{task_id}.csv')
-        
+
+        file_path = os.path.join(settings.MEDIA_ROOT, "metrics", f"metrics_{task_id}.csv")
+
         # Create the retry URL
         current_url = request.build_absolute_uri()
-        
+
         # Check if file exists and is not empty (minimum size 100 bytes)
         if os.path.exists(file_path) and os.path.getsize(file_path) > 100:
-            response = FileResponse(open(file_path, 'rb'), content_type='text/csv')
-            response['Content-Disposition'] = 'attachment; filename="metrics.csv"'
+            response = FileResponse(open(file_path, "rb"), content_type="text/csv")
+            response["Content-Disposition"] = 'attachment; filename="metrics.csv"'
             return response
         else:
             # Also check if there's a temporary file indicating task is still running
-            temp_file_path = os.path.join(settings.MEDIA_ROOT, 'metrics', f'metrics_{task_id}.tmp')
+            temp_file_path = os.path.join(settings.MEDIA_ROOT, "metrics", f"metrics_{task_id}.tmp")
             if os.path.exists(temp_file_path):
                 messages.add_message(
                     request,
                     messages.INFO,
-                    mark_safe(f"The metrics file is still being generated. <a href='{current_url}'>Click here to try again</a>.")
+                    mark_safe(
+                        f"The metrics file is still being generated. <a href='{current_url}'>Click here to try again</a>."
+                    ),
                 )
             else:
                 messages.add_message(
                     request,
                     messages.WARNING,
-                    mark_safe(f"The metrics file is not ready yet. <a href='{current_url}'>Click here to try again</a>.")
+                    mark_safe(
+                        f"The metrics file is not ready yet. <a href='{current_url}'>Click here to try again</a>."
+                    ),
                 )
-            return HttpResponse(status=303, headers={"Location": request.path.replace(f'/metrics/{task_id}/', '')})
+            return HttpResponse(status=303, headers={"Location": request.path.replace(f"/metrics/{task_id}/", "")})
 
 
 @admin.action(description="Exclude URL and all children")
diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py
index b238abcc..859bdade 100644
--- a/sde_collections/tasks.py
+++ b/sde_collections/tasks.py
@@ -1,12 +1,12 @@
 # /sde_collections/tasks.py
+import csv
 import json
 import os
 import shutil
-import csv
-import requests
-import numpy as np
 
 import boto3
+import numpy as np
+import requests
 from django.apps import apps
 from django.conf import settings
 from django.core import management
@@ -241,73 +241,79 @@ def generate_metrics(task_id):
     """
     try:
         # Generate a file path in the media directory
-        metrics_dir = os.path.join(settings.MEDIA_ROOT, 'metrics')
+        metrics_dir = os.path.join(settings.MEDIA_ROOT, "metrics")
         os.makedirs(metrics_dir, exist_ok=True)
-        
+
         # Clean up old metrics files
         for filename in os.listdir(metrics_dir):
-            if filename.startswith('metrics_') and (filename.endswith('.csv') or filename.endswith('.tmp')):
+            if filename.startswith("metrics_") and (filename.endswith(".csv") or filename.endswith(".tmp")):
                 # Skip the current task's files
-                if not filename.startswith(f'metrics_{task_id}'):
+                if not filename.startswith(f"metrics_{task_id}"):
                     file_path = os.path.join(metrics_dir, filename)
                     try:
                         os.remove(file_path)
                         print(f"Deleted old metrics file: {filename}")
                     except Exception as e:
                         print(f"Failed to delete {filename}: {str(e)}")
-        
+
         # Use a temporary file during generation
-        temp_file_path = os.path.join(metrics_dir, f'metrics_{task_id}.tmp')
-        final_file_path = os.path.join(metrics_dir, f'metrics_{task_id}.csv')
-        
+        temp_file_path = os.path.join(metrics_dir, f"metrics_{task_id}.tmp")
+        final_file_path = os.path.join(metrics_dir, f"metrics_{task_id}.csv")
+
         # Initialize common variables
-        divArr = ['/Astrophysics/','/Biological and Physical Sciences/','/Earth Science/','/Heliophysics/','/Planetary Science/']
-        docArr = ['Data','Images','Documentation','Software and Tools','Missions and Instruments']
-        
+        divArr = [
+            "/Astrophysics/",
+            "/Biological and Physical Sciences/",
+            "/Earth Science/",
+            "/Heliophysics/",
+            "/Planetary Science/",
+        ]
+        docArr = ["Data", "Images", "Documentation", "Software and Tools", "Missions and Instruments"]
+
         url = "https://sciencediscoveryengine.nasa.gov/api/v1/search.query"
-        
+
         # Create a temporary buffer for CSV data
-        with open(temp_file_path, 'w', newline='') as csvfile:
+        with open(temp_file_path, "w", newline="") as csvfile:
             writer = csv.writer(csvfile)
-            
+
             # SECTION 1: DOCUMENT COUNT METRICS
-            writer.writerow(['SECTION 1: DOCUMENT COUNT BY DIVISION AND DOCUMENT TYPE'])
+            writer.writerow(["SECTION 1: DOCUMENT COUNT BY DIVISION AND DOCUMENT TYPE"])
             writer.writerow([])
-            
+
             # Initialize document count array
-            recCounter = np.zeros((6,5))
-            
+            recCounter = np.zeros((6, 5))
+
             # Process documents through API
             pageCount = 1
             pageSize = 1000
-            
+
             print("Fetching document count metrics...")
-            
+
             while True:
                 payload = {
-                    "app": 'nasa-sba-smd',
+                    "app": "nasa-sba-smd",
                     "query": {
-                        "name": 'query-smd-primary',
+                        "name": "query-smd-primary",
                         "page": pageCount,
                         "pageSize": pageSize,
-                        "scope": 'All',
+                        "scope": "All",
                         "advanced": {},
                     },
                 }
-                
+
                 print(f"Processing page {pageCount} for document count")
                 response_data = requests.post(url, headers={}, json=payload, verify=False).json()
-                
-                num_records = len(response_data.get('records', []))
+
+                num_records = len(response_data.get("records", []))
                 if num_records == 0:
                     break
-                    
-                for record in response_data['records']:
-                    if 'sourcestr56' in record:
+
+                for record in response_data["records"]:
+                    if "sourcestr56" in record:
                         # Extract division and document type
-                        division = record.get('treepath', [''])[0]
-                        doctype = record['sourcestr56']
-                        
+                        division = record.get("treepath", [""])[0]
+                        doctype = record["sourcestr56"]
+
                         # Update counters
                         for j in range(0, len(docArr)):
                             for k in range(0, len(divArr)):
@@ -315,96 +321,102 @@ def generate_metrics(task_id):
                                     if divArr[k] in division:
                                         recCounter[k, j] += 1
                                         recCounter[5, j] += 1
-                
+
                 pageCount += 1
-            
+
             # Write document count metrics
-            writer.writerow(['Division', 'Document Type', 'Count'])
-            
+            writer.writerow(["Division", "Document Type", "Count"])
+
             # Add All Divisions data
-            writer.writerow(['All Divisions', 'Data', str(int(recCounter[5, 0]))])
-            writer.writerow(['All Divisions', 'Images', str(int(recCounter[5, 1]))])
-            writer.writerow(['All Divisions', 'Documentation', str(int(recCounter[5, 2]))])
-            writer.writerow(['All Divisions', 'Software and Tools', str(int(recCounter[5, 3]))])
-            writer.writerow(['All Divisions', 'Missions and Instruments', str(int(recCounter[5, 4]))])
-            writer.writerow(['All Divisions', 'Total', str(int(np.sum(recCounter[5, :])))])
-            
+            writer.writerow(["All Divisions", "Data", str(int(recCounter[5, 0]))])
+            writer.writerow(["All Divisions", "Images", str(int(recCounter[5, 1]))])
+            writer.writerow(["All Divisions", "Documentation", str(int(recCounter[5, 2]))])
+            writer.writerow(["All Divisions", "Software and Tools", str(int(recCounter[5, 3]))])
+            writer.writerow(["All Divisions", "Missions and Instruments", str(int(recCounter[5, 4]))])
+            writer.writerow(["All Divisions", "Total", str(int(np.sum(recCounter[5, :])))])
+
             # Add data for each division
-            division_names = ['Astrophysics', 'Biological and Physical Sciences', 'Earth Science', 'Heliophysics', 'Planetary Science']
+            division_names = [
+                "Astrophysics",
+                "Biological and Physical Sciences",
+                "Earth Science",
+                "Heliophysics",
+                "Planetary Science",
+            ]
             for i, div_name in enumerate(division_names):
-                writer.writerow([div_name, 'Data', str(int(recCounter[i, 0]))])
-                writer.writerow([div_name, 'Images', str(int(recCounter[i, 1]))])
-                writer.writerow([div_name, 'Documentation', str(int(recCounter[i, 2]))])
-                writer.writerow([div_name, 'Software and Tools', str(int(recCounter[i, 3]))])
-                writer.writerow([div_name, 'Missions and Instruments', str(int(recCounter[i, 4]))])
-                writer.writerow([div_name, 'Total', str(int(np.sum(recCounter[i, :])))])
-            
+                writer.writerow([div_name, "Data", str(int(recCounter[i, 0]))])
+                writer.writerow([div_name, "Images", str(int(recCounter[i, 1]))])
+                writer.writerow([div_name, "Documentation", str(int(recCounter[i, 2]))])
+                writer.writerow([div_name, "Software and Tools", str(int(recCounter[i, 3]))])
+                writer.writerow([div_name, "Missions and Instruments", str(int(recCounter[i, 4]))])
+                writer.writerow([div_name, "Total", str(int(np.sum(recCounter[i, :])))])
+
             # SECTION 2: SOURCES COUNT
             writer.writerow([])
-            writer.writerow(['SECTION 2: SOURCES COUNT BY DIVISION'])
+            writer.writerow(["SECTION 2: SOURCES COUNT BY DIVISION"])
             writer.writerow([])
-            
+
             # Initialize sources and divisions lists
             sources = []
             divs = []
-            
+
             # Reset page counter for new query
             pageCount = 1
-            
+
             print("Fetching sources count metrics...")
-            
+
             while True:
                 payload = {
-                    "app": 'nasa-sba-smd',
+                    "app": "nasa-sba-smd",
                     "query": {
-                        "name": 'query-smd-primary',
+                        "name": "query-smd-primary",
                         "page": pageCount,
                         "pageSize": pageSize,
-                        "scope": 'All',
+                        "scope": "All",
                         "advanced": {},
                     },
                 }
-                
+
                 print(f"Processing page {pageCount} for sources count")
                 response_data = requests.post(url, headers={}, json=payload, verify=False).json()
-                
-                num_records = len(response_data.get('records', []))
+
+                num_records = len(response_data.get("records", []))
                 if num_records == 0:
                     break
-                    
-                for record in response_data['records']:
-                    if 'sourcestr56' in record:
-                        source_name = record.get('collection', [''])[0]
-                        division = record.get('treepath', [''])[0]
-                        
+
+                for record in response_data["records"]:
+                    if "sourcestr56" in record:
+                        source_name = record.get("collection", [""])[0]
+                        division = record.get("treepath", [""])[0]
+
                         if source_name not in sources:
                             sources.append(source_name)
                             divs.append(division)
-                
+
                 pageCount += 1
-            
+
             # Count sources by division
             division_counts = {div: 0 for div in divArr}
             total_count = len(sources)
-            
+
             for i, div_path in enumerate(divs):
                 for div in divArr:
                     if div in div_path:
                         division_counts[div] += 1
-            
+
             # Write sources count metrics
-            writer.writerow(['Division', 'Count'])
-            
+            writer.writerow(["Division", "Count"])
+
             for div in divArr:
-                writer.writerow([div.strip('/'), str(division_counts[div])])
-            
-            writer.writerow(['Total', str(total_count)])
-            
+                writer.writerow([div.strip("/"), str(division_counts[div])])
+
+            writer.writerow(["Total", str(total_count)])
+
             # SECTION 3: DOCUMENTS BY SOURCE
             writer.writerow([])
-            writer.writerow(['SECTION 3: DOCUMENTS BY SOURCE'])
+            writer.writerow(["SECTION 3: DOCUMENTS BY SOURCE"])
             writer.writerow([])
-            
+
             # List of new sources to track
             new_sources = [
                 "AIM: Aeronomy of Ice in the Mesosphere",
@@ -421,42 +433,42 @@ def generate_metrics(task_id):
                 "LDOPE: Land Data Operational Products Evaluation",
                 "CPL: Cloud Physics Lidar",
                 "Direct Readout Laboratory",
-                "Center for Near Earth Object Studies (CNEOS)"
+                "Center for Near Earth Object Studies (CNEOS)",
             ]
-            
+
             # Initialize counter array for source documents
             nsCounter = np.zeros((len(new_sources), 5, 5))
-            
+
             # Reset page counter for new query
             pageCount = 1
-            
+
             print("Fetching source documents metrics...")
-            
+
             while True:
                 payload = {
-                    "app": 'nasa-sba-smd',
+                    "app": "nasa-sba-smd",
                     "query": {
-                        "name": 'query-smd-primary',
+                        "name": "query-smd-primary",
                         "page": pageCount,
                         "pageSize": pageSize,
-                        "scope": 'All',
+                        "scope": "All",
                         "advanced": {},
                     },
                 }
-                
+
                 print(f"Processing page {pageCount} for source documents")
                 response_data = requests.post(url, headers={}, json=payload, verify=False).json()
-                
-                num_records = len(response_data.get('records', []))
+
+                num_records = len(response_data.get("records", []))
                 if num_records == 0:
                     break
-                    
-                for record in response_data['records']:
-                    if 'sourcestr56' in record:
-                        source_name = record.get('treepath', [''])[0]
-                        division = record.get('treepath', [''])[0]
-                        doc_type = record.get('sourcestr56', '')
-                        
+
+                for record in response_data["records"]:
+                    if "sourcestr56" in record:
+                        source_name = record.get("treepath", [""])[0]
+                        division = record.get("treepath", [""])[0]
+                        doc_type = record.get("sourcestr56", "")
+
                         for n, new_source in enumerate(new_sources):
                             if new_source in source_name:
                                 for j, doc in enumerate(docArr):
@@ -464,48 +476,48 @@ def generate_metrics(task_id):
                                         for k, div in enumerate(divArr):
                                             if div in division:
                                                 nsCounter[n, k, j] += 1
-                
+
                 pageCount += 1
-            
+
             # Write source documents metrics
             for n, source in enumerate(new_sources):
                 total_source_docs = np.sum(nsCounter[n, :, :])
-                writer.writerow([f'Source: {source}'])
-                writer.writerow(['The total number of new documents:', str(int(total_source_docs))])
-                
+                writer.writerow([f"Source: {source}"])
+                writer.writerow(["The total number of new documents:", str(int(total_source_docs))])
+
                 for j, doc_type in enumerate(docArr):
                     doc_count = np.sum(nsCounter[n, :, j])
-                    writer.writerow([f'The total number of new {doc_type.lower()} documents:', str(int(doc_count))])
+                    writer.writerow([f"The total number of new {doc_type.lower()} documents:", str(int(doc_count))])
                 writer.writerow([])
-            
-            writer.writerow(['DIVISION BREAKDOWN'])
+
+            writer.writerow(["DIVISION BREAKDOWN"])
             writer.writerow([])
-            
+
             for d, division in enumerate(divArr):
-                div_name = division.strip('/')
+                div_name = division.strip("/")
                 total_docs = np.sum(nsCounter[:, d, :])
-                writer.writerow([f'Division: {div_name}'])
-                writer.writerow(['The total number of new documents:', str(int(total_docs))])
-                
+                writer.writerow([f"Division: {div_name}"])
+                writer.writerow(["The total number of new documents:", str(int(total_docs))])
+
                 for t, doc_type in enumerate(docArr):
                     doc_count = np.sum(nsCounter[:, d, t])
-                    writer.writerow([f'The total number of new {docArr[t].lower()} documents:', str(int(doc_count))])
+                    writer.writerow([f"The total number of new {docArr[t].lower()} documents:", str(int(doc_count))])
                 writer.writerow([])
-            
+
             total_overall = np.sum(nsCounter[:, :, :])
-            writer.writerow(['All Divisions'])
-            writer.writerow(['The total number of new documents:', str(int(total_overall))])
-            
+            writer.writerow(["All Divisions"])
+            writer.writerow(["The total number of new documents:", str(int(total_overall))])
+
             for t, doc_type in enumerate(docArr):
                 total_doc_type = np.sum(nsCounter[:, :, t])
-                writer.writerow([f'The total number of new {docArr[t].lower()} documents:', str(int(total_doc_type))])
-        
+                writer.writerow([f"The total number of new {docArr[t].lower()} documents:", str(int(total_doc_type))])
+
         # Move the file to its final location when completed
         shutil.move(temp_file_path, final_file_path)
-        
+
         print(f"Metrics generation complete. File saved to {final_file_path}")
         return True
-        
+
     except Exception as e:
         print(f"Error in generate_metrics: {str(e)}")
         return False
diff --git a/sde_collections/templates/admin/sde_collections/collection/change_list.html b/sde_collections/templates/admin/sde_collections/collection/change_list.html
index 0913672f..616c0165 100644
--- a/sde_collections/templates/admin/sde_collections/collection/change_list.html
+++ b/sde_collections/templates/admin/sde_collections/collection/change_list.html
@@ -12,4 +12,4 @@
             {{ block.super }}
         {% endblock %}
     </ul>
-{% endblock %} 
+{% endblock %}

From 922e90eda72418de2dc97b487aae2a318d681e6d Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Mon, 31 Mar 2025 15:58:46 -0500
Subject: [PATCH 4/5] Added chagelog + other minor fixes

---
 CHANGELOG.md             |  9 +++++++++
 requirements/base.txt    |  1 +
 sde_collections/admin.py | 17 +++++++++++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4ab909b0..a12582a9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -183,3 +183,12 @@ For each PR made, an entry should be added to this changelog. It should contain
       - physics_of_the_cosmos
       - stsci_space_telescope_science_institute
     - Once the front end has been updated to allow for tag edits, all astrophysics collections will be marked to be run through the pipeline
+
+- 1295-asynchronous-metrics-download-in-admin-panel
+  - Description:  Implemented asynchronous metrics download in Django admin
+  - Changes:
+    - Button Addition:Integrated a 'metrics' button
+    - Task Generation: download_metrics handles button clicks to initiate a Celery task, display a download link and manage redirection.
+    - Cleanup Mechanism: Cleans up old metrics files in the directory, keeping only current task related files.
+    - Background Processing: Runs generate_metrics task asynchronously to gather data and generate a CSV in MEDIA_ROOT/metrics/
+    - File Retrieval: get_metrics_file checks file availability and size, providing a download if ready or status messages for in progress files.
diff --git a/requirements/base.txt b/requirements/base.txt
index a4ca9cf8..0b81d058 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -34,3 +34,4 @@ tenacity==8.2.2
 tqdm==4.66.3
 unidecode==1.3.8
 xmltodict==0.13.0
+numpy==1.24.3
diff --git a/sde_collections/admin.py b/sde_collections/admin.py
index 8f897eea..dfb00fd5 100644
--- a/sde_collections/admin.py
+++ b/sde_collections/admin.py
@@ -324,14 +324,17 @@ def get_urls(self):
     def download_metrics(self, request):
         """Custom view that starts metrics generation and returns to collection list"""
         task_id = str(uuid.uuid4())
-        task = generate_metrics.delay(task_id)
+        generate_metrics.delay(task_id)
         
         download_url = request.path.rsplit('metrics/', 1)[0] + f'metrics/{task_id}/'
         
         messages.add_message(
             request,
             messages.INFO,
-            mark_safe(f"Metrics generation started. Please wait a moment and then <a href='{download_url}'>click here to download</a> when ready.")
+            mark_safe(
+                f"Metrics generation started. Please wait a moment and then "
+                f"<a href='{download_url}'>click here to download</a> when ready."
+            )
         )
         return HttpResponse(status=303, headers={"Location": request.path.replace('/metrics/', '')})
     
@@ -355,13 +358,19 @@ def get_metrics_file(self, request, task_id):
                 messages.add_message(
                     request,
                     messages.INFO,
-                    mark_safe(f"The metrics file is still being generated. <a href='{current_url}'>Click here to try again</a>.")
+                    mark_safe(
+                        f"The metrics file is still being generated. "
+                        f"<a href='{current_url}'>Click here to try again</a>."
+                    )
                 )
             else:
                 messages.add_message(
                     request,
                     messages.WARNING,
-                    mark_safe(f"The metrics file is not ready yet. <a href='{current_url}'>Click here to try again</a>.")
+                    mark_safe(
+                        f"The metrics file is not ready yet. "
+                        f"<a href='{current_url}'>Click here to try again</a>."
+                    )
                 )
             return HttpResponse(status=303, headers={"Location": request.path.replace(f'/metrics/{task_id}/', '')})
 

From 45578877041b08810b2930af530961161f8d8e70 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 31 Mar 2025 21:01:37 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 sde_collections/admin.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/sde_collections/admin.py b/sde_collections/admin.py
index cadf9f2c..5a45bc60 100644
--- a/sde_collections/admin.py
+++ b/sde_collections/admin.py
@@ -331,16 +331,16 @@ def download_metrics(self, request):
         """Custom view that starts metrics generation and returns to collection list"""
         task_id = str(uuid.uuid4())
         generate_metrics.delay(task_id)
-        
-        download_url = request.path.rsplit('metrics/', 1)[0] + f'metrics/{task_id}/'
-        
+
+        download_url = request.path.rsplit("metrics/", 1)[0] + f"metrics/{task_id}/"
+
         messages.add_message(
             request,
             messages.INFO,
             mark_safe(
                 f"Metrics generation started. Please wait a moment and then "
                 f"<a href='{download_url}'>click here to download</a> when ready."
-            )
+            ),
         )
         return HttpResponse(status=303, headers={"Location": request.path.replace("/metrics/", "")})
 
@@ -367,16 +367,15 @@ def get_metrics_file(self, request, task_id):
                     mark_safe(
                         f"The metrics file is still being generated. "
                         f"<a href='{current_url}'>Click here to try again</a>."
-                    )
+                    ),
                 )
             else:
                 messages.add_message(
                     request,
                     messages.WARNING,
                     mark_safe(
-                        f"The metrics file is not ready yet. "
-                        f"<a href='{current_url}'>Click here to try again</a>."
-                    )
+                        f"The metrics file is not ready yet. " f"<a href='{current_url}'>Click here to try again</a>."
+                    ),
                 )
             return HttpResponse(status=303, headers={"Location": request.path.replace(f"/metrics/{task_id}/", "")})