Skip to content

Commit d7620ad

Browse files
committed
Merge branch 'dev' into branch_#1097
2 parents cba90fa + 52b26a0 commit d7620ad

27 files changed

+3213
-199
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,11 @@ Eventually, job creation will be done seamlessly by the webapp. Until then, edit
214214
```shell
215215
tmux new -s docker_django
216216
```
217-
Once you are inside, you can run dmshell.
217+
Once you are inside, you can run dmshell or for example a managment command:
218+
219+
```shell
220+
docker-compose -f production.yml run --rm django python manage.py deduplicate_urls
221+
```
218222

219223
Later, you can do this to get back in.
220224
```shell

environmental_justice/README.md

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Environmental Justice API
2+
3+
## Overview
4+
This API provides access to Environmental Justice data from multiple sources. It supports retrieving data from individual sources or as a combined dataset with defined precedence rules.
5+
6+
## Endpoints
7+
8+
### GET /api/environmental-justice/
9+
10+
Retrieves environmental justice data based on specified data source.
11+
12+
#### Query Parameters
13+
14+
| Parameter | Description | Default | Options |
15+
|-------------|-------------|------------|----------------------------------------------|
16+
| data_source | Data source filter | "combined" | "spreadsheet", "ml_production", "ml_testing", "combined" |
17+
18+
#### Data Source Behavior
19+
20+
1. **Single Source**
21+
- `?data_source=spreadsheet`: Returns only spreadsheet data
22+
- `?data_source=ml_production`: Returns only ML production data
23+
- `?data_source=ml_testing`: Returns only ML testing data
24+
25+
2. **Combined Data** (Default)
26+
- Access via `?data_source=combined` or no parameter
27+
- Merges data from 'spreadsheet' and 'ml_production' sources
28+
- Precedence rules:
29+
- If the same dataset exists in both sources, the spreadsheet version is used
30+
- Unique datasets from ml_production are included
31+
- ML testing data is not included in combined view
32+
33+
#### Example Requests
34+
35+
```bash
36+
# Get combined data (default)
37+
GET /api/environmental-justice/
38+
39+
# Get combined data (explicit)
40+
GET /api/environmental-justice/?data_source=combined
41+
42+
# Get only spreadsheet data
43+
GET /api/environmental-justice/?data_source=spreadsheet
44+
45+
# Get only ML production data
46+
GET /api/environmental-justice/?data_source=ml_production
47+
48+
# Get only ML testing data
49+
GET /api/environmental-justice/?data_source=ml_testing
50+
```
51+
52+
#### Response Fields
53+
54+
Each record includes the following fields:
55+
- dataset
56+
- description
57+
- description_simplified
58+
- indicators
59+
- intended_use
60+
- latency
61+
- limitations
62+
- project
63+
- source_link
64+
- strengths
65+
- format
66+
- geographic_coverage
67+
- data_visualization
68+
- spatial_resolution
69+
- temporal_extent
70+
- temporal_resolution
71+
- sde_link
72+
- data_source
73+
74+
## Data Source Definitions
75+
76+
- **spreadsheet**: Primary source data from environmental justice spreadsheets
77+
- **ml_production**: Production machine learning processed data
78+
- **ml_testing**: Testing/staging machine learning processed data
79+
80+
## Precedence Rules
81+
When retrieving combined data:
82+
1. If a dataset exists in both spreadsheet and ml_production:
83+
- The spreadsheet version takes precedence
84+
- The ml_production version is excluded
85+
2. Datasets unique to ml_production are included in the response
86+
3. ML testing data is never included in combined results
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Generated by Django 4.2.9 on 2024-11-23 03:18
2+
3+
from django.db import migrations, models
4+
5+
6+
def migrate_destination_server_to_data_source(apps, schema_editor):
7+
EnvironmentalJusticeRow = apps.get_model("environmental_justice", "EnvironmentalJusticeRow")
8+
9+
# Migrate prod to spreadsheet
10+
EnvironmentalJusticeRow.objects.filter(destination_server="prod").update(
11+
data_source="spreadsheet", destination_server=""
12+
)
13+
14+
# Migrate dev to ml_production
15+
EnvironmentalJusticeRow.objects.filter(destination_server="dev").update(
16+
data_source="ml_production", destination_server=""
17+
)
18+
19+
# Migrate test to ml_testing
20+
EnvironmentalJusticeRow.objects.filter(destination_server="test").update(
21+
data_source="ml_testing", destination_server=""
22+
)
23+
24+
25+
class Migration(migrations.Migration):
26+
27+
dependencies = [
28+
("environmental_justice", "0005_environmentaljusticerow_destination_server"),
29+
]
30+
31+
operations = [
32+
migrations.AddField(
33+
model_name="environmentaljusticerow",
34+
name="data_source",
35+
field=models.CharField(
36+
blank=True,
37+
choices=[
38+
("spreadsheet", "Spreadsheet"),
39+
("ml_production", "ML Production"),
40+
("ml_testing", "ML Testing"),
41+
],
42+
default="",
43+
max_length=20,
44+
verbose_name="Data Source",
45+
),
46+
),
47+
migrations.RunPython(migrate_destination_server_to_data_source, reverse_code=migrations.RunPython.noop),
48+
migrations.RemoveField(
49+
model_name="environmentaljusticerow",
50+
name="destination_server",
51+
),
52+
]

environmental_justice/models.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ class EnvironmentalJusticeRow(models.Model):
66
Environmental Justice data from the spreadsheet
77
"""
88

9-
class DestinationServerChoices(models.TextChoices):
10-
DEV = "dev", "Development"
11-
TEST = "test", "Testing"
12-
PROD = "prod", "Production"
9+
class DataSourceChoices(models.TextChoices):
10+
SPREADSHEET = "spreadsheet", "Spreadsheet"
11+
ML_PRODUCTION = "ml_production", "ML Production"
12+
ML_TESTING = "ml_testing", "ML Testing"
1313

14-
destination_server = models.CharField(
15-
"Destination Server", max_length=10, choices=DestinationServerChoices.choices, default="", blank=True
14+
data_source = models.CharField(
15+
"Data Source", max_length=20, choices=DataSourceChoices.choices, default="", blank=True
1616
)
1717

1818
dataset = models.CharField("Dataset", blank=True, default="")

environmental_justice/tests.py

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pytest
2+
from django.urls import include, path
3+
from rest_framework.routers import DefaultRouter
4+
from rest_framework.test import APIClient
5+
6+
from environmental_justice.views import EnvironmentalJusticeRowViewSet
7+
8+
# Create router and register our viewset
9+
router = DefaultRouter()
10+
router.register(r"environmental-justice", EnvironmentalJusticeRowViewSet)
11+
12+
# Create temporary urlpatterns for testing
13+
urlpatterns = [
14+
path("api/", include(router.urls)),
15+
]
16+
17+
18+
# Override default URL conf for testing
19+
@pytest.fixture
20+
def client():
21+
"""Return a Django REST framework API client"""
22+
return APIClient()
23+
24+
25+
@pytest.fixture(autouse=True)
26+
def setup_urls():
27+
"""Setup URLs for testing"""
28+
from django.conf import settings
29+
30+
settings.ROOT_URLCONF = __name__
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import factory
2+
from factory.django import DjangoModelFactory
3+
4+
from environmental_justice.models import EnvironmentalJusticeRow
5+
6+
7+
class EnvironmentalJusticeRowFactory(DjangoModelFactory):
8+
class Meta:
9+
model = EnvironmentalJusticeRow
10+
11+
dataset = factory.Sequence(lambda n: f"dataset_{n}")
12+
description = factory.Faker("sentence")
13+
description_simplified = factory.Faker("sentence")
14+
indicators = factory.Faker("sentence")
15+
intended_use = factory.Faker("sentence")
16+
latency = factory.Faker("word")
17+
limitations = factory.Faker("sentence")
18+
project = factory.Faker("word")
19+
source_link = factory.Faker("url")
20+
strengths = factory.Faker("sentence")
21+
format = factory.Faker("file_extension")
22+
geographic_coverage = factory.Faker("country")
23+
data_visualization = factory.Faker("sentence")
24+
spatial_resolution = factory.Faker("word")
25+
temporal_extent = factory.Faker("date")
26+
temporal_resolution = factory.Faker("word")
27+
sde_link = factory.Faker("url")
28+
data_source = EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# docker-compose -f local.yml run --rm django pytest environmental_justice/tests/test_views.py
2+
import pytest
3+
from rest_framework import status
4+
5+
from environmental_justice.models import EnvironmentalJusticeRow
6+
from environmental_justice.tests.factories import EnvironmentalJusticeRowFactory
7+
8+
9+
@pytest.mark.django_db
10+
class TestEnvironmentalJusticeRowViewSet:
11+
"""Test suite for the EnvironmentalJusticeRow API endpoints"""
12+
13+
def setup_method(self):
14+
"""Setup URL for API endpoint"""
15+
self.url = "/api/environmental-justice/"
16+
17+
def test_empty_database_returns_empty_list(self, client):
18+
"""Should return empty list when no records exist"""
19+
response = client.get(self.url)
20+
assert response.status_code == status.HTTP_200_OK
21+
assert response.json()["results"] == []
22+
assert response.json()["count"] == 0
23+
24+
def test_single_source_filtering(self, client):
25+
"""Should return records only from requested data source"""
26+
# Create records for each data source
27+
spreadsheet_record = EnvironmentalJusticeRowFactory(
28+
dataset="test_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET
29+
)
30+
ml_prod_record = EnvironmentalJusticeRowFactory(
31+
dataset="another_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION
32+
)
33+
ml_test_record = EnvironmentalJusticeRowFactory(
34+
dataset="test_dataset_3", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_TESTING
35+
)
36+
37+
# Test spreadsheet filter
38+
response = client.get(f"{self.url}?data_source=spreadsheet")
39+
assert response.status_code == status.HTTP_200_OK
40+
data = response.json()["results"]
41+
assert len(data) == 1
42+
assert data[0]["dataset"] == spreadsheet_record.dataset
43+
44+
# Test ml_production filter
45+
response = client.get(f"{self.url}?data_source=ml_production")
46+
assert response.status_code == status.HTTP_200_OK
47+
data = response.json()["results"]
48+
assert len(data) == 1
49+
assert data[0]["dataset"] == ml_prod_record.dataset
50+
51+
# Test ml_testing filter
52+
response = client.get(f"{self.url}?data_source=ml_testing")
53+
assert response.status_code == status.HTTP_200_OK
54+
data = response.json()["results"]
55+
assert len(data) == 1
56+
assert data[0]["dataset"] == ml_test_record.dataset
57+
58+
def test_combined_data_precedence(self, client):
59+
"""
60+
Should return combined data with spreadsheet taking precedence over ml_production
61+
for matching datasets
62+
"""
63+
# Create spreadsheet record
64+
EnvironmentalJusticeRowFactory(
65+
dataset="common_dataset",
66+
description="spreadsheet version",
67+
data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET,
68+
)
69+
70+
# Create ML production record with same dataset
71+
EnvironmentalJusticeRowFactory(
72+
dataset="common_dataset",
73+
description="ml version",
74+
data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION,
75+
)
76+
77+
# Create unique ML production record
78+
EnvironmentalJusticeRowFactory(
79+
dataset="unique_ml_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION
80+
)
81+
82+
# Test combined view (default)
83+
response = client.get(self.url)
84+
assert response.status_code == status.HTTP_200_OK
85+
data = response.json()["results"]
86+
assert len(data) == 2 # Should only return 2 records (not 3)
87+
88+
# Verify correct records are returned
89+
datasets = [record["dataset"] for record in data]
90+
assert "common_dataset" in datasets
91+
assert "unique_ml_dataset" in datasets
92+
93+
# Verify precedence - should get spreadsheet version of common dataset
94+
common_record = next(r for r in data if r["dataset"] == "common_dataset")
95+
assert common_record["description"] == "spreadsheet version"
96+
97+
def test_combined_explicit_parameter(self, client):
98+
"""Should handle explicit 'combined' parameter same as default"""
99+
EnvironmentalJusticeRowFactory(data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET)
100+
EnvironmentalJusticeRowFactory(
101+
dataset="unique_ml_dataset", # Ensure different dataset
102+
data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION,
103+
)
104+
105+
# Compare default and explicit combined responses
106+
default_response = client.get(self.url)
107+
combined_response = client.get(f"{self.url}?data_source=combined")
108+
109+
assert default_response.status_code == status.HTTP_200_OK
110+
assert combined_response.status_code == status.HTTP_200_OK
111+
assert default_response.json()["results"] == combined_response.json()["results"]
112+
113+
def test_invalid_data_source(self, client):
114+
"""Should return 400 error for invalid data_source parameter"""
115+
response = client.get(f"{self.url}?data_source=invalid")
116+
assert response.status_code == status.HTTP_400_BAD_REQUEST
117+
assert "Invalid data_source" in str(response.json())
118+
119+
def test_sorting_in_combined_view(self, client):
120+
"""Should return combined results sorted by dataset name"""
121+
# Create records in non-alphabetical order
122+
EnvironmentalJusticeRowFactory(
123+
dataset="zebra_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET
124+
)
125+
EnvironmentalJusticeRowFactory(
126+
dataset="alpha_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION
127+
)
128+
129+
response = client.get(self.url)
130+
assert response.status_code == status.HTTP_200_OK
131+
data = response.json()["results"]
132+
133+
# Verify sorting
134+
datasets = [record["dataset"] for record in data]
135+
assert datasets == sorted(datasets)
136+
137+
def test_http_methods_allowed(self, client):
138+
"""Should only allow GET requests"""
139+
# Test GET (should work)
140+
get_response = client.get(self.url)
141+
assert get_response.status_code == status.HTTP_200_OK
142+
143+
# Test POST (should fail)
144+
post_response = client.post(self.url, {})
145+
assert post_response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED
146+
147+
# Test PUT (should fail)
148+
put_response = client.put(self.url, {})
149+
assert put_response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED
150+
151+
# Test DELETE (should fail)
152+
delete_response = client.delete(self.url)
153+
assert delete_response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED

0 commit comments

Comments
 (0)