Skip to content

Commit 1e09ab6

Browse files
committed
add uk messages
1 parent 62ba7b8 commit 1e09ab6

File tree

11 files changed

+154
-47
lines changed

11 files changed

+154
-47
lines changed

terraform/modules/services/airflow/dags/uk/cloudcasting-dag.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from airflow.providers.amazon.aws.operators.ecs import EcsRunTaskOperator
55

66
from airflow.operators.latest_only import LatestOnlyOperator
7-
from utils.slack import on_failure_callback
7+
from utils.slack import slack_message_callback
88

99
default_args = {
1010
"owner": "airflow",
@@ -23,6 +23,12 @@
2323
security_group = os.getenv("ECS_SECURITY_GROUP")
2424
cluster = f"Nowcasting-{env}"
2525

26+
cloudcasting_error_message = (
27+
"⚠️ The task {{ ti.task_id }} failed,"
28+
" but its ok. The cloudcasting is currently no critical. "
29+
"No out of hours support is required."
30+
)
31+
2632
# Tasks can still be defined in terraform, or defined here
2733

2834
region = "uk"
@@ -52,7 +58,7 @@
5258
},
5359
},
5460
task_concurrency=10,
55-
on_failure_callback=on_failure_callback,
61+
on_failure_callback=slack_message_callback(cloudcasting_error_message),
5662
awslogs_group="/aws/ecs/forecast/cloudcasting",
5763
awslogs_stream_prefix="streaming/cloudcasting-forecast",
5864
awslogs_region="eu-west-1",

terraform/modules/services/airflow/dags/uk/dayafter-dag.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from airflow import DAG
33
from airflow.providers.amazon.aws.operators.ecs import EcsRunTaskOperator
44
import os
5-
from utils.slack import on_failure_callback
5+
from utils.slack import slack_message_callback
66

77
from airflow.operators.latest_only import LatestOnlyOperator
88

@@ -25,6 +25,12 @@
2525

2626
# Tasks can still be defined in terraform, or defined here
2727

28+
day_after_error_message = (
29+
"⚠️ The task {{ ti.task_id }} failed,"
30+
" but its ok. This task is not critical for live services. "
31+
"No out of hours support is required."
32+
)
33+
2834
region = "uk"
2935

3036
with DAG(
@@ -49,7 +55,7 @@
4955
"assignPublicIp": "ENABLED",
5056
},
5157
},
52-
on_failure_callback=on_failure_callback,
58+
on_failure_callback=slack_message_callback(day_after_error_message),
5359
task_concurrency=10,
5460
awslogs_group="/aws/ecs/consumer/pvlive-national-day-after",
5561
awslogs_stream_prefix="streaming/pvlive-national-day-after-consumer",
@@ -79,7 +85,7 @@
7985
"assignPublicIp": "ENABLED",
8086
},
8187
},
82-
on_failure_callback=on_failure_callback,
88+
on_failure_callback=slack_message_callback(day_after_error_message),
8389
task_concurrency=10,
8490
awslogs_group="/aws/ecs/consumer/pvlive-gsp-day-after",
8591
awslogs_stream_prefix="streaming/pvlive-gsp-day-after-consumer",
@@ -110,7 +116,7 @@
110116
"assignPublicIp": "ENABLED",
111117
},
112118
},
113-
on_failure_callback=on_failure_callback,
119+
on_failure_callback=slack_message_callback(day_after_error_message),
114120
task_concurrency=10,
115121
awslogs_group="/aws/ecs/analysis/metrics",
116122
awslogs_stream_prefix="streaming/metrics-analysis",

terraform/modules/services/airflow/dags/uk/elasticbeanstalk.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from airflow.operators.latest_only import LatestOnlyOperator
77
from airflow.operators.python import PythonOperator
88
from utils.elastic_beanstalk import scale_elastic_beanstalk_instance
9-
from utils.slack import on_failure_callback
9+
from utils.slack import slack_message_callback
1010

1111
default_args = {
1212
"owner": "airflow",
@@ -19,6 +19,12 @@
1919
"max_active_tasks": 10,
2020
}
2121

22+
elb_error_message = (
23+
"⚠️ The task {{ ti.task_id }} failed,"
24+
" but its ok. This task tried to reset the Elastic Beanstalk instances. "
25+
"No out of hours support is required."
26+
)
27+
2228
region = "uk"
2329
env = os.getenv("ENVIRONMENT", "development")
2430
names = [
@@ -46,15 +52,15 @@
4652
python_callable=scale_elastic_beanstalk_instance,
4753
op_kwargs={"name": name, "number_of_instances": 2, "sleep_seconds": 60 * 5},
4854
task_concurrency=2,
49-
on_failure_callback=on_failure_callback,
55+
on_failure_callback=slack_message_callback(elb_error_message),
5056
)
5157

5258
elb_1 = PythonOperator(
5359
task_id=f"scale_elb_1_{name}",
5460
python_callable=scale_elastic_beanstalk_instance,
5561
op_kwargs={"name": name, "number_of_instances": 1},
5662
task_concurrency=2,
57-
on_failure_callback=on_failure_callback,
63+
on_failure_callback=slack_message_callback(elb_error_message),
5864
)
5965

6066
latest_only >> elb_2 >> elb_1

terraform/modules/services/airflow/dags/uk/forecast-gsp-dag.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
from datetime import datetime, timedelta, timezone
33
from airflow import DAG
44
from airflow.providers.amazon.aws.operators.ecs import EcsRunTaskOperator
5-
from airflow.providers.slack.notifications.slack import send_slack_notification
6-
from utils.slack import on_failure_callback
5+
from utils.slack import slack_message_callback
76

87
from airflow.operators.latest_only import LatestOnlyOperator
98

@@ -26,6 +25,30 @@
2625

2726
# Tasks can still be defined in terraform, or defined here
2827

28+
forecast_pvnet_error_message = (
29+
"⚠️ The task {{ ti.task_id }} failed,"
30+
" but its ok. PVNET-ECMWF only will run next. "
31+
"No out of hours support is required."
32+
)
33+
34+
forecast_pvnet_da_error_message = (
35+
"❌ The task {{ ti.task_id }} failed. "
36+
"This would ideally before for DA actions at 09.00"
37+
"Please see run book for appropriate actions."
38+
)
39+
40+
forecast_ecmwf_error_message = (
41+
"❌ The task {{ ti.task_id }} failed. This is only run after the main PVnet has failed. "
42+
"We have about 6 hours before this is needed. "
43+
"Please see run book for appropriate actions. "
44+
)
45+
46+
forecast_blend_error_message = (
47+
"❌ The task {{ ti.task_id }} failed."
48+
"The blending of forecast has failed. "
49+
"Please see run book for appropriate actions. "
50+
)
51+
2952
region = "uk"
3053

3154
with DAG(
@@ -53,13 +76,7 @@
5376
},
5477
},
5578
task_concurrency=10,
56-
on_failure_callback=[send_slack_notification(
57-
text="⚠️ The task {{ ti.task_id }} failed,"
58-
" but its ok. PVNET-ECMWF only will run next. "
59-
"No out of hours support is required. ⚠️",
60-
channel=f"tech-ops-airflow-{env}",
61-
username="Airflow",
62-
)],
79+
on_failure_callback=slack_message_callback(forecast_pvnet_error_message),
6380
awslogs_group="/aws/ecs/forecast/forecast_pvnet",
6481
awslogs_stream_prefix="streaming/forecast_pvnet-forecast",
6582
awslogs_region="eu-west-1",
@@ -79,7 +96,7 @@
7996
},
8097
},
8198
task_concurrency=10,
82-
on_failure_callback=on_failure_callback,
99+
on_failure_callback=slack_message_callback(forecast_ecmwf_error_message),
83100
trigger_rule="all_failed",
84101
awslogs_group="/aws/ecs/forecast/forecast_pvnet_ecmwf",
85102
awslogs_stream_prefix="streaming/forecast_pvnet_ecmwf-forecast",
@@ -100,7 +117,7 @@
100117
},
101118
},
102119
task_concurrency=10,
103-
on_failure_callback=on_failure_callback,
120+
on_failure_callback=slack_message_callback(forecast_blend_error_message),
104121
trigger_rule="one_success",
105122
awslogs_group="/aws/ecs/blend/forecast_blend",
106123
awslogs_stream_prefix="streaming/forecast_blend-blend",
@@ -136,7 +153,7 @@
136153
},
137154
},
138155
task_concurrency=10,
139-
on_failure_callback=on_failure_callback,
156+
on_failure_callback=slack_message_callback(forecast_pvnet_da_error_message),
140157
awslogs_group="/aws/ecs/forecast/forecast_pvnet_day_ahead",
141158
awslogs_stream_prefix="streaming/forecast_pvnet_day_ahead-forecast",
142159
awslogs_region="eu-west-1",
@@ -156,7 +173,7 @@
156173
},
157174
},
158175
task_concurrency=10,
159-
on_failure_callback=on_failure_callback,
176+
on_failure_callback=slack_message_callback(forecast_blend_error_message),
160177
awslogs_group="/aws/ecs/blend/forecast_blend",
161178
awslogs_stream_prefix="streaming/forecast_blend-blend",
162179
awslogs_region="eu-west-1",

terraform/modules/services/airflow/dags/uk/forecast-national-dag.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from airflow.providers.amazon.aws.operators.ecs import EcsRunTaskOperator
55

66
from airflow.operators.latest_only import LatestOnlyOperator
7-
from utils.slack import on_failure_callback
7+
from utils.slack import slack_message_callback
88

99
default_args = {
1010
"owner": "airflow",
@@ -25,6 +25,25 @@
2525

2626
# Tasks can still be defined in terraform, or defined here
2727

28+
national_xg_forecast_error_message = (
29+
"⚠️ The task {{ ti.task_id }} failed."
30+
"But its ok, this forecast is only a backup. "
31+
"No out of office hours support is required, unless other forecasts are failing"
32+
)
33+
34+
neso_forecast_consumer_error_message = (
35+
"⚠️ The task {{ ti.task_id }} failed."
36+
"But its ok, this only used for comparison. "
37+
"No out of office hours support is required."
38+
)
39+
40+
forecast_blend_error_message = (
41+
"❌ The task {{ ti.task_id }} failed."
42+
"The blending of forecast has failed. "
43+
"Please see run book for appropriate actions. "
44+
)
45+
46+
2847
region = "uk"
2948

3049
with DAG(
@@ -52,7 +71,7 @@
5271
},
5372
},
5473
task_concurrency=10,
55-
on_failure_callback=on_failure_callback,
74+
on_failure_callback=slack_message_callback(national_xg_forecast_error_message),
5675
awslogs_group="/aws/ecs/forecast/forecast_national",
5776
awslogs_stream_prefix="streaming/forecast_national-forecast",
5877
awslogs_region="eu-west-1",
@@ -72,7 +91,7 @@
7291
},
7392
},
7493
task_concurrency=10,
75-
on_failure_callback=on_failure_callback,
94+
on_failure_callback=slack_message_callback(forecast_blend_error_message),
7695
awslogs_group="/aws/ecs/blend/forecast_blend",
7796
awslogs_stream_prefix="streaming/forecast_blend-blend",
7897
awslogs_region="eu-west-1",
@@ -107,7 +126,7 @@
107126
},
108127
},
109128
task_concurrency=10,
110-
on_failure_callback=on_failure_callback,
129+
on_failure_callback=slack_message_callback(neso_forecast_consumer_error_message),
111130
awslogs_group="/aws/ecs/consume/neso-forecast",
112131
awslogs_stream_prefix="streaming/neso-forecast-consume",
113132
awslogs_region="eu-west-1",

terraform/modules/services/airflow/dags/uk/forecast-site-dag.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from datetime import datetime, timedelta, timezone
33
from airflow import DAG
44
from airflow.providers.amazon.aws.operators.ecs import EcsRunTaskOperator
5-
from utils.slack import on_failure_callback
5+
from utils.slack import slack_message_callback
66

77
from airflow.operators.latest_only import LatestOnlyOperator
88

@@ -24,6 +24,11 @@
2424

2525
# Tasks can still be defined in terraform, or defined here
2626

27+
site_forecast_error_message = (
28+
"❌ The task {{ ti.task_id }} failed."
29+
"Please see run book for appropriate actions. "
30+
)
31+
2732
region = "uk"
2833

2934
with DAG(
@@ -50,7 +55,7 @@
5055
"assignPublicIp": "ENABLED",
5156
},
5257
},
53-
on_failure_callback=on_failure_callback,
58+
on_failure_callback=slack_message_callback(site_forecast_error_message),
5459
task_concurrency=10,
5560
awslogs_group="/aws/ecs/forecast/pvsite_forecast",
5661
awslogs_stream_prefix="streaming/pvsite_forecast-forecast",

terraform/modules/services/airflow/dags/uk/nwp-dag.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from airflow.operators.bash import BashOperator
66

77
from airflow.operators.latest_only import LatestOnlyOperator
8-
from utils.slack import on_failure_callback
8+
from utils.slack import slack_message_callback
99
from utils.s3 import determine_latest_zarr
1010

1111
default_args = {
@@ -26,6 +26,22 @@
2626

2727
# Tasks can still be defined in terraform, or defined here
2828

29+
nwp_metoffice_error_message = (
30+
"⚠️ The task {{ ti.task_id }} failed."
31+
"But its ok, the forecast will automatically move over to a PVNET-ECMWF, "
32+
"which doesnt need Metoffice data. "
33+
"Metoffice status link is <https://datahub.metoffice.gov.uk/support/service-status|here> "
34+
"No out of office hours support is required, but please log in an incident log. "
35+
)
36+
37+
nwp_ecmwf_error_message = (
38+
"❌ The task {{ ti.task_id }} failed."
39+
"The forecast will continue running until it runs out of data. "
40+
"ECMWF status link is <https://status.ecmwf.int/|here> "
41+
"Please see run book for appropriate actions. "
42+
)
43+
44+
2945
region = "uk"
3046

3147
if env == "development":
@@ -58,7 +74,7 @@
5874
},
5975
},
6076
task_concurrency=10,
61-
on_failure_callback=on_failure_callback,
77+
on_failure_callback=slack_message_callback(nwp_metoffice_error_message),
6278
awslogs_group="/aws/ecs/consumer/nwp-metoffice",
6379
awslogs_stream_prefix="streaming/nwp-metoffice-consumer",
6480
awslogs_region="eu-west-1",
@@ -78,6 +94,7 @@
7894
},
7995
},
8096
task_concurrency=10,
97+
on_failure_callback=slack_message_callback(nwp_ecmwf_error_message),
8198
awslogs_group="/aws/ecs/consumer/nwp-consumer-ecmwf-uk",
8299
awslogs_stream_prefix="streaming/nwp-consumer-ecmwf-uk-consumer",
83100
awslogs_region="eu-west-1",

terraform/modules/services/airflow/dags/uk/pv-dag.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from airflow.decorators import dag
66

77
from airflow.operators.latest_only import LatestOnlyOperator
8-
from utils.slack import on_failure_callback
8+
from utils.slack import slack_message_callback
99

1010
default_args = {
1111
"owner": "airflow",
@@ -25,6 +25,12 @@
2525

2626
# Tasks can still be defined in terraform, or defined here
2727

28+
pv_consumer_error_message = (
29+
"⚠️ The task {{ ti.task_id }} failed."
30+
"But its ok, this isnt needed for any production services."
31+
"No out of office hours support is required."
32+
)
33+
2834
region = "uk"
2935

3036
with DAG(
@@ -52,7 +58,7 @@
5258
},
5359
},
5460
task_concurrency=10,
55-
on_failure_callback=on_failure_callback,
61+
on_failure_callback=slack_message_callback(pv_consumer_error_message),
5662
awslogs_group="/aws/ecs/consumer/pv",
5763
awslogs_stream_prefix="streaming/pv-consumer",
5864
awslogs_region="eu-west-1",

0 commit comments

Comments
 (0)