remove destination_server and add datasource

CarsonDavis · CarsonDavis · commit 6518c4745d09 · 2024-11-22T21:35:44.000-06:00
diff --git a/environmental_justice/migrations/0006_remove_environmentaljusticerow_destination_server_and_more.py b/environmental_justice/migrations/0006_remove_environmentaljusticerow_destination_server_and_more.py
@@ -0,0 +1,52 @@
+# Generated by Django 4.2.9 on 2024-11-23 03:18
+
+from django.db import migrations, models
+
+
+def migrate_destination_server_to_data_source(apps, schema_editor):
+    EnvironmentalJusticeRow = apps.get_model("environmental_justice", "EnvironmentalJusticeRow")
+
+    # Migrate prod to spreadsheet
+    EnvironmentalJusticeRow.objects.filter(destination_server="prod").update(
+        data_source="spreadsheet", destination_server=""
+    )
+
+    # Migrate dev to ml_production
+    EnvironmentalJusticeRow.objects.filter(destination_server="dev").update(
+        data_source="ml_production", destination_server=""
+    )
+
+    # Migrate test to ml_testing
+    EnvironmentalJusticeRow.objects.filter(destination_server="test").update(
+        data_source="ml_testing", destination_server=""
+    )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("environmental_justice", "0005_environmentaljusticerow_destination_server"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="environmentaljusticerow",
+            name="data_source",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("spreadsheet", "Spreadsheet"),
+                    ("ml_production", "ML Production"),
+                    ("ml_testing", "ML Testing"),
+                ],
+                default="",
+                max_length=20,
+                verbose_name="Data Source",
+            ),
+        ),
+        migrations.RunPython(migrate_destination_server_to_data_source, reverse_code=migrations.RunPython.noop),
+        migrations.RemoveField(
+            model_name="environmentaljusticerow",
+            name="destination_server",
+        ),
+    ]
diff --git a/environmental_justice/models.py b/environmental_justice/models.py
@@ -6,13 +6,13 @@ class EnvironmentalJusticeRow(models.Model):
     Environmental Justice data from the spreadsheet
     """
 
-    class DestinationServerChoices(models.TextChoices):
-        DEV = "dev", "Development"
-        TEST = "test", "Testing"
-        PROD = "prod", "Production"
+    class DataSourceChoices(models.TextChoices):
+        SPREADSHEET = "spreadsheet", "Spreadsheet"
+        ML_PRODUCTION = "ml_production", "ML Production"
+        ML_TESTING = "ml_testing", "ML Testing"
 
-    destination_server = models.CharField(
-        "Destination Server", max_length=10, choices=DestinationServerChoices.choices, default="", blank=True
+    data_source = models.CharField(
+        "Data Source", max_length=20, choices=DataSourceChoices.choices, default="", blank=True
     )
 
     dataset = models.CharField("Dataset", blank=True, default="")
diff --git a/environmental_justice/views.py b/environmental_justice/views.py
@@ -1,3 +1,4 @@
+from django.db.models import Q
 from django_filters.rest_framework import DjangoFilterBackend
 from rest_framework import viewsets
 
@@ -8,19 +9,52 @@
 class EnvironmentalJusticeRowViewSet(viewsets.ModelViewSet):
     """
     API endpoint that allows environmental justice rows to be read.
+    When combining spreadsheet and ml_production data, spreadsheet takes precedence
+    for any matching dataset values.
     """
 
     queryset = EnvironmentalJusticeRow.objects.all()
     serializer_class = EnvironmentalJusticeRowSerializer
     http_method_names = ["get"]
     filter_backends = [DjangoFilterBackend]
-    filterset_fields = ["destination_server"]
+    filterset_fields = ["data_source"]
+
+    def get_combined_queryset(self):
+        """
+        Returns combined data where:
+        1. All spreadsheet data is included
+        2. ML production data is included only if there's no spreadsheet data with matching dataset
+        """
+        # First, get all unique datasets that exist in spreadsheet
+        spreadsheet_datasets = (
+            EnvironmentalJusticeRow.objects.filter(data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET)
+            .values_list("dataset", flat=True)
+            .distinct()
+        )
+
+        # Build query to get:
+        # 1. ALL spreadsheet records
+        # 2. ML production records where dataset isn't in spreadsheet
+        combined_query = Q(data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET) | Q(
+            data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION, dataset__not_in=spreadsheet_datasets
+        )
+
+        return EnvironmentalJusticeRow.objects.filter(combined_query).order_by(
+            "dataset"
+        )  # Optional: orders results by dataset name
 
     def get_queryset(self):
         """
-        if no destination_server is provided, default to PROD
+        Handle different data_source filter scenarios:
+        - No filter: Return combined data (spreadsheet takes precedence)
+        - 'combined': Same as no filter
+        - specific source: Return data for that source only
         """
-        queryset = super().get_queryset()
-        if not self.request.query_params.get("destination_server"):
-            queryset = queryset.filter(destination_server=EnvironmentalJusticeRow.DestinationServerChoices.PROD)
-        return queryset
+        data_source = self.request.query_params.get("data_source", "combined")
+
+        # straightfoward case: return data for specific source
+        if data_source in EnvironmentalJusticeRow.DataSourceChoices.values:
+            return super().get_queryset().filter(data_source=data_source)
+
+        # Handle 'combined' or no filter case
+        return self.get_combined_queryset()