Skip to content

Commit 6518c47

Browse files
committed
remove destination_server and add datasource
1 parent 1ea5168 commit 6518c47

File tree

3 files changed

+98
-12
lines changed

3 files changed

+98
-12
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Generated by Django 4.2.9 on 2024-11-23 03:18
2+
3+
from django.db import migrations, models
4+
5+
6+
def migrate_destination_server_to_data_source(apps, schema_editor):
7+
EnvironmentalJusticeRow = apps.get_model("environmental_justice", "EnvironmentalJusticeRow")
8+
9+
# Migrate prod to spreadsheet
10+
EnvironmentalJusticeRow.objects.filter(destination_server="prod").update(
11+
data_source="spreadsheet", destination_server=""
12+
)
13+
14+
# Migrate dev to ml_production
15+
EnvironmentalJusticeRow.objects.filter(destination_server="dev").update(
16+
data_source="ml_production", destination_server=""
17+
)
18+
19+
# Migrate test to ml_testing
20+
EnvironmentalJusticeRow.objects.filter(destination_server="test").update(
21+
data_source="ml_testing", destination_server=""
22+
)
23+
24+
25+
class Migration(migrations.Migration):
26+
27+
dependencies = [
28+
("environmental_justice", "0005_environmentaljusticerow_destination_server"),
29+
]
30+
31+
operations = [
32+
migrations.AddField(
33+
model_name="environmentaljusticerow",
34+
name="data_source",
35+
field=models.CharField(
36+
blank=True,
37+
choices=[
38+
("spreadsheet", "Spreadsheet"),
39+
("ml_production", "ML Production"),
40+
("ml_testing", "ML Testing"),
41+
],
42+
default="",
43+
max_length=20,
44+
verbose_name="Data Source",
45+
),
46+
),
47+
migrations.RunPython(migrate_destination_server_to_data_source, reverse_code=migrations.RunPython.noop),
48+
migrations.RemoveField(
49+
model_name="environmentaljusticerow",
50+
name="destination_server",
51+
),
52+
]

environmental_justice/models.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ class EnvironmentalJusticeRow(models.Model):
66
Environmental Justice data from the spreadsheet
77
"""
88

9-
class DestinationServerChoices(models.TextChoices):
10-
DEV = "dev", "Development"
11-
TEST = "test", "Testing"
12-
PROD = "prod", "Production"
9+
class DataSourceChoices(models.TextChoices):
10+
SPREADSHEET = "spreadsheet", "Spreadsheet"
11+
ML_PRODUCTION = "ml_production", "ML Production"
12+
ML_TESTING = "ml_testing", "ML Testing"
1313

14-
destination_server = models.CharField(
15-
"Destination Server", max_length=10, choices=DestinationServerChoices.choices, default="", blank=True
14+
data_source = models.CharField(
15+
"Data Source", max_length=20, choices=DataSourceChoices.choices, default="", blank=True
1616
)
1717

1818
dataset = models.CharField("Dataset", blank=True, default="")

environmental_justice/views.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from django.db.models import Q
12
from django_filters.rest_framework import DjangoFilterBackend
23
from rest_framework import viewsets
34

@@ -8,19 +9,52 @@
89
class EnvironmentalJusticeRowViewSet(viewsets.ModelViewSet):
910
"""
1011
API endpoint that allows environmental justice rows to be read.
12+
When combining spreadsheet and ml_production data, spreadsheet takes precedence
13+
for any matching dataset values.
1114
"""
1215

1316
queryset = EnvironmentalJusticeRow.objects.all()
1417
serializer_class = EnvironmentalJusticeRowSerializer
1518
http_method_names = ["get"]
1619
filter_backends = [DjangoFilterBackend]
17-
filterset_fields = ["destination_server"]
20+
filterset_fields = ["data_source"]
21+
22+
def get_combined_queryset(self):
23+
"""
24+
Returns combined data where:
25+
1. All spreadsheet data is included
26+
2. ML production data is included only if there's no spreadsheet data with matching dataset
27+
"""
28+
# First, get all unique datasets that exist in spreadsheet
29+
spreadsheet_datasets = (
30+
EnvironmentalJusticeRow.objects.filter(data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET)
31+
.values_list("dataset", flat=True)
32+
.distinct()
33+
)
34+
35+
# Build query to get:
36+
# 1. ALL spreadsheet records
37+
# 2. ML production records where dataset isn't in spreadsheet
38+
combined_query = Q(data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET) | Q(
39+
data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION, dataset__not_in=spreadsheet_datasets
40+
)
41+
42+
return EnvironmentalJusticeRow.objects.filter(combined_query).order_by(
43+
"dataset"
44+
) # Optional: orders results by dataset name
1845

1946
def get_queryset(self):
2047
"""
21-
if no destination_server is provided, default to PROD
48+
Handle different data_source filter scenarios:
49+
- No filter: Return combined data (spreadsheet takes precedence)
50+
- 'combined': Same as no filter
51+
- specific source: Return data for that source only
2252
"""
23-
queryset = super().get_queryset()
24-
if not self.request.query_params.get("destination_server"):
25-
queryset = queryset.filter(destination_server=EnvironmentalJusticeRow.DestinationServerChoices.PROD)
26-
return queryset
53+
data_source = self.request.query_params.get("data_source", "combined")
54+
55+
# straightfoward case: return data for specific source
56+
if data_source in EnvironmentalJusticeRow.DataSourceChoices.values:
57+
return super().get_queryset().filter(data_source=data_source)
58+
59+
# Handle 'combined' or no filter case
60+
return self.get_combined_queryset()

0 commit comments

Comments
 (0)