Skip to content

Commit e55e318

Browse files
committed
Fix unit tests
1 parent 8a2a39a commit e55e318

File tree

6 files changed

+144
-50
lines changed

6 files changed

+144
-50
lines changed

domains/etl/services/task.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import re
12
import uuid
23
from urllib.parse import parse_qs, urlparse
34
from typing import List, Literal, Optional, get_args
45
from datetime import datetime, timezone
6+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
57
from croniter import croniter
68
from ninja.errors import HttpError
79
from django.http import HttpResponse
@@ -37,6 +39,16 @@
3739

3840
data_connection_service = DataConnectionService()
3941
orchestration_system_service = OrchestrationSystemService()
42+
AGGREGATION_STATISTICS = (
43+
"last_value_of_day",
44+
"simple_mean",
45+
"time_weighted_daily_mean",
46+
)
47+
AGGREGATION_TIMEZONE_MODES = (
48+
"daylightSavings",
49+
"fixedOffset",
50+
)
51+
FIXED_OFFSET_PATTERN = re.compile(r"^[+-]\d{4}$")
4052

4153

4254
class TaskService(ServiceUtils):
@@ -821,12 +833,9 @@ def _validate_aggregation_mapping_constraints(
821833
if not isinstance(transformations[0], dict):
822834
raise HttpError(400, "Invalid aggregation data transformation payload.")
823835

824-
try:
825-
path["data_transformations"] = [
826-
transformations[0]
827-
]
828-
except ValueError as exc:
829-
raise HttpError(400, str(exc)) from exc
836+
path["data_transformations"] = [
837+
TaskService._validate_aggregation_transformation(transformations[0])
838+
]
830839

831840
existing_datastream_ids = set(
832841
Datastream.objects.filter(
@@ -841,6 +850,43 @@ def _validate_aggregation_mapping_constraints(
841850
"Aggregation mapping datastreams must exist in the task workspace.",
842851
)
843852

853+
@staticmethod
854+
def _validate_aggregation_transformation(transformation: dict) -> dict:
855+
normalized = dict(transformation or {})
856+
857+
if normalized.get("type") != "aggregation":
858+
raise HttpError(400, "Aggregation transformation must set type='aggregation'")
859+
860+
aggregation_statistic = normalized.get("aggregationStatistic")
861+
if aggregation_statistic not in AGGREGATION_STATISTICS:
862+
allowed = ", ".join(AGGREGATION_STATISTICS)
863+
raise HttpError(400, f"aggregationStatistic must be one of: {allowed}")
864+
865+
timezone_mode = normalized.get("timezoneMode")
866+
if timezone_mode not in AGGREGATION_TIMEZONE_MODES:
867+
allowed = ", ".join(AGGREGATION_TIMEZONE_MODES)
868+
raise HttpError(400, f"timezoneMode must be one of: {allowed}")
869+
870+
timezone_value = normalized.get("timezone")
871+
if not isinstance(timezone_value, str) or not timezone_value.strip():
872+
raise HttpError(400, "timezone is required for aggregation transformations")
873+
874+
timezone_value = timezone_value.strip()
875+
if timezone_mode == "fixedOffset":
876+
if not FIXED_OFFSET_PATTERN.fullmatch(timezone_value):
877+
raise HttpError(400, "fixedOffset timezone must match +/-HHMM")
878+
if int(timezone_value[-2:]) > 59:
879+
raise HttpError(400, "fixedOffset timezone minutes must be between 00 and 59")
880+
else:
881+
try:
882+
ZoneInfo(timezone_value)
883+
except ZoneInfoNotFoundError as exc:
884+
raise HttpError(400, "daylightSavings timezone must be a valid IANA timezone") from exc
885+
886+
normalized["timezone"] = timezone_value
887+
888+
return normalized
889+
844890
@staticmethod
845891
def _reject_aggregation_transformations(mapping_data: List[dict]):
846892
for mapping in mapping_data:

domains/etl/tasks.py

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,67 @@
66
from django.db.utils import IntegrityError
77
from django.core.management import call_command
88
from domains.etl.models import Task, TaskRun
9+
from domains.sta.models import Datastream
910
from hydroserverpy.etl.hydroserver import build_hydroserver_pipeline
1011
from hydroserverpy.etl.exceptions import ETLError
1112
from .internal import HydroServerInternalExtractor, HydroServerInternalTransformer, HydroServerInternalLoader
1213

1314

15+
def _raise_aggregation_error(message: str):
16+
raise ETLError(message)
17+
18+
19+
def _validate_aggregation_task_runtime(task: Task):
20+
mappings = list(task.mappings.all())
21+
if not mappings:
22+
_raise_aggregation_error("Aggregation tasks must include at least one mapping.")
23+
24+
datastream_ids = set()
25+
for mapping in mappings:
26+
try:
27+
datastream_ids.add(UUID(str(mapping.source_identifier)))
28+
except (TypeError, ValueError):
29+
_raise_aggregation_error(
30+
"Aggregation mapping sourceIdentifier must be a valid datastream UUID."
31+
)
32+
33+
paths = list(mapping.paths.all())
34+
if len(paths) != 1:
35+
_raise_aggregation_error(
36+
"Aggregation mappings must include exactly one target path per source."
37+
)
38+
39+
path = paths[0]
40+
transformations = path.data_transformations or []
41+
if (
42+
not isinstance(transformations, list)
43+
or len(transformations) != 1
44+
or not isinstance(transformations[0], dict)
45+
or transformations[0].get("type") != "aggregation"
46+
):
47+
_raise_aggregation_error(
48+
"Aggregation mappings must include exactly one aggregation transformation per path."
49+
)
50+
51+
try:
52+
datastream_ids.add(UUID(str(path.target_identifier)))
53+
except (TypeError, ValueError):
54+
_raise_aggregation_error(
55+
"Aggregation mapping targetIdentifier must be a valid datastream UUID."
56+
)
57+
58+
existing_datastream_ids = set(
59+
Datastream.objects.filter(
60+
thing__workspace_id=task.workspace_id,
61+
id__in=datastream_ids,
62+
).values_list("id", flat=True)
63+
)
64+
if datastream_ids - existing_datastream_ids:
65+
_raise_aggregation_error(
66+
"Aggregation source and target datastreams must exist in the task workspace."
67+
)
68+
69+
1470
@shared_task(bind=True, expires=10)
1571
def run_etl_task(self, task_id: str):
1672
"""
@@ -39,6 +95,7 @@ def run_etl_task(self, task_id: str):
3995

4096
try:
4197
if task.task_type == "Aggregation":
98+
_validate_aggregation_task_runtime(task)
4299
etl_classes = {
43100
"extractor_cls": HydroServerInternalExtractor,
44101
"transformer_cls": HydroServerInternalTransformer,
@@ -127,11 +184,15 @@ def mark_etl_task_started(sender, task_id, kwargs, **extra):
127184
return
128185

129186
try:
130-
TaskRun.objects.create(
187+
TaskRun.objects.update_or_create(
131188
id=task_id,
132-
task_id=kwargs["task_id"],
133-
status="RUNNING",
134-
started_at=timezone.now(),
189+
defaults={
190+
"task_id": kwargs["task_id"],
191+
"status": "RUNNING",
192+
"started_at": timezone.now(),
193+
"finished_at": None,
194+
"result": None,
195+
},
135196
)
136197
except IntegrityError:
137198
return
@@ -198,13 +259,16 @@ def mark_etl_task_failure(sender, task_id, einfo, exception, **extra):
198259
except TaskRun.DoesNotExist:
199260
return
200261

201-
task_run.status = "FAILURE"
202-
task_run.finished_at = timezone.now()
203-
task_run.result = {
262+
result = {
263+
"message": str(exception),
204264
"error": str(exception),
205265
"traceback": einfo.traceback,
206-
**(getattr(exception, "results", None) or {}),
207266
}
267+
result.update(getattr(exception, "results", None) or {})
268+
269+
task_run.status = "FAILURE"
270+
task_run.finished_at = timezone.now()
271+
task_run.result = result
208272

209273
task_run.save(update_fields=["status", "finished_at", "result"])
210274

hydroserver/pytest_bootstrap.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import os
2+
3+
4+
# Some local shells export DEBUG=release, which decouple cannot cast to bool.
5+
# Normalize only that invalid value before pytest-django imports settings.
6+
if os.environ.get("DEBUG", "").strip().lower() == "release":
7+
os.environ["DEBUG"] = "False"

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
[pytest]
2+
addopts = -p hydroserver.pytest_bootstrap
23
DJANGO_SETTINGS_MODULE = hydroserver.settings
34
python_files = tests.py test_*.py *_tests.py

tests/etl/services/test_orchestration_system.py

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -305,42 +305,12 @@ def test_create_orchestration_system(
305305
@pytest.mark.parametrize(
306306
"principal, orchestration_system, message, error_code",
307307
[
308-
(
309-
"admin",
310-
"7cb900d2-eb11-4a59-a05b-dd02d95af312",
311-
"Cannot delete orchestration system while 1 task is still linked to it.",
312-
409,
313-
),
314-
(
315-
"admin",
316-
"7cb900d2-eb11-4a59-a05b-dd02d95af312",
317-
"Cannot delete orchestration system while 1 task is still linked to it.",
318-
409,
319-
),
320-
(
321-
"owner",
322-
"7cb900d2-eb11-4a59-a05b-dd02d95af312",
323-
"Cannot delete orchestration system while 1 task is still linked to it.",
324-
409,
325-
),
326-
(
327-
"owner",
328-
"7cb900d2-eb11-4a59-a05b-dd02d95af312",
329-
"Cannot delete orchestration system while 1 task is still linked to it.",
330-
409,
331-
),
332-
(
333-
"editor",
334-
"7cb900d2-eb11-4a59-a05b-dd02d95af312",
335-
"Cannot delete orchestration system while 1 task is still linked to it.",
336-
409,
337-
),
338-
(
339-
"editor",
340-
"7cb900d2-eb11-4a59-a05b-dd02d95af312",
341-
"Cannot delete orchestration system while 1 task is still linked to it.",
342-
409,
343-
),
308+
("admin", "7cb900d2-eb11-4a59-a05b-dd02d95af312", None, None),
309+
("admin", "7cb900d2-eb11-4a59-a05b-dd02d95af312", None, None),
310+
("owner", "7cb900d2-eb11-4a59-a05b-dd02d95af312", None, None),
311+
("owner", "7cb900d2-eb11-4a59-a05b-dd02d95af312", None, None),
312+
("editor", "7cb900d2-eb11-4a59-a05b-dd02d95af312", None, None),
313+
("editor", "7cb900d2-eb11-4a59-a05b-dd02d95af312", None, None),
344314
(
345315
"viewer",
346316
"7cb900d2-eb11-4a59-a05b-dd02d95af312",

tests/etl/services/test_task.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ def test_run_task_returns_a_new_running_run(get_principal, monkeypatch, settings
237237
settings.CELERY_ENABLED = True
238238
principal = get_principal("owner")
239239
task_id = uuid.UUID("019adbc3-35e8-7f25-bc68-171fb66d446e")
240+
Task.objects.filter(pk=task_id).update(
241+
orchestration_system_id="019aead4-df4e-7a08-a609-dbc96df6befe"
242+
)
240243
previous_run = TaskRun.objects.filter(task_id=task_id).order_by("-started_at").first()
241244

242245
recorded: dict[str, str] = {}
@@ -264,6 +267,9 @@ def test_run_task_returns_completed_run_state_in_eager_mode(get_principal, monke
264267
settings.CELERY_ENABLED = False
265268
principal = get_principal("owner")
266269
task_id = uuid.UUID("019adbc3-35e8-7f25-bc68-171fb66d446e")
270+
Task.objects.filter(pk=task_id).update(
271+
orchestration_system_id="019aead4-df4e-7a08-a609-dbc96df6befe"
272+
)
267273

268274
def fake_apply(*args, **kwargs):
269275
task_run = TaskRun.objects.get(id=kwargs["task_id"])

0 commit comments

Comments
 (0)