Skip to content

Commit dc3718f

Browse files
committed
chore: fix next net alerts
1 parent 6b4861a commit dc3718f

File tree

6 files changed

+73
-19
lines changed

6 files changed

+73
-19
lines changed

spartan/metrics/grafana/alerts/contactpoints.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,46 @@ contactPoints:
412412
with (index .CommonLabels "network")
413413
{{ "}}" }}{{ "{{" }} urlquery . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} end {{ "}}" }}
414414
disableResolveMessage: false
415+
416+
- orgId: 1
417+
name: "Slack #alerts-devnet by namespace"
418+
receivers:
419+
- uid: alertsdevnetbynamespace
420+
type: slack
421+
settings:
422+
url: $SLACK_WEBHOOK_DEVNET_URL
423+
text: |-
424+
{{ "{{" }} if gt (len .Alerts) 0 {{ "}}" }}
425+
*Alerts:*
426+
{{ "{{" }} range .Alerts {{ "}}" }}
427+
- {{ "{{" }} with (index .Labels "k8s_namespace_name") {{ "}}" }}{{ "{{" }} . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown-namespace{{ "{{" }} end {{ "}}" }}: {{ "{{" }} with (index .Annotations "summary") {{ "}}" }}{{ "{{" }} . {{ "}}" }}{{ "{{" }} else {{ "}}" }}(no summary){{ "{{" }} end {{ "}}" }}
428+
{{ "{{" }} end {{ "}}" }}
429+
{{ "{{" }} else {{ "}}" }}
430+
*Alerts:*
431+
- {{ "{{" }} with (index .CommonAnnotations "summary") {{ "}}" }}{{ "{{" }} . {{ "}}" }}{{ "{{" }} else {{ "}}" }}(no summary){{ "{{" }} end {{ "}}" }}
432+
{{ "{{" }} end {{ "}}" }}
433+
434+
*Grafana overview:*
435+
{{ "{{" }} .ExternalURL {{ "}}" }}d/aztec-network/network-overview?orgId=1&refresh=30s&var-data_source=default&var-namespace={{ "{{" }}
436+
if gt (len .Alerts) 0
437+
{{ "}}" }}{{ "{{" }}
438+
with (index (index .Alerts 0).Labels "k8s_namespace_name")
439+
{{ "}}" }}{{ "{{" }} urlquery . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} else {{ "}}" }}{{ "{{" }}
440+
with (index .CommonLabels "k8s_namespace_name")
441+
{{ "}}" }}{{ "{{" }} urlquery . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} end {{ "}}" }}{{ "{{" }} printf "\n" {{ "}}" }}
442+
443+
*GKE workloads:*
444+
https://console.cloud.google.com/kubernetes/workload/overview?project={{ "{{" }}
445+
if gt (len .Alerts) 0
446+
{{ "}}" }}{{ "{{" }}
447+
with (index (index .Alerts 0).Labels "gcp_project")
448+
{{ "}}" }}{{ "{{" }} . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} else {{ "}}" }}{{ "{{" }}
449+
with (index .CommonLabels "gcp_project")
450+
{{ "}}" }}{{ "{{" }} . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} end {{ "}}" }}&supportedpurview=project&pageState=%28%22savedViews%22%3A%28%22n%22%3A%5B%22{{ "{{" }}
451+
if gt (len .Alerts) 0
452+
{{ "}}" }}{{ "{{" }}
453+
with (index (index .Alerts 0).Labels "k8s_namespace_name")
454+
{{ "}}" }}{{ "{{" }} urlquery . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} else {{ "}}" }}{{ "{{" }}
455+
with (index .CommonLabels "k8s_namespace_name")
456+
{{ "}}" }}{{ "{{" }} urlquery . {{ "}}" }}{{ "{{" }} else {{ "}}" }}unknown{{ "{{" }} end {{ "}}" }}{{ "{{" }} end {{ "}}" }}%22%5D%29%29
457+
disableResolveMessage: false

spartan/metrics/grafana/alerts/mutetimes.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ muteTimes:
44
name: nightly_deployment
55
time_intervals:
66
- times:
7-
- start_time: "05:00"
8-
end_time: "05:30"
7+
- start_time: "04:00"
8+
end_time: "05:00"
99
weekdays: [monday, tuesday, wednesday, thursday, friday, saturday, sunday]
10-
location: Europe/London

spartan/metrics/grafana/alerts/policies.yaml

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ policies:
4545
- - k8s_namespace_name
4646
- =~
4747
- $NEXT_NET_REGEX
48+
mute_time_intervals:
49+
- nightly_deployment
50+
51+
# Devnet - only alert based on Kubernetes namespace
52+
- receiver: "Slack #alerts-devnet by namespace"
53+
object_matchers:
54+
- - k8s_namespace_name
55+
- =~
56+
- $DEVNET_NAMESPACES_REGEX
4857

4958
# Testnet - by namespace
5059
- receiver: "Slack #alerts-testnet by namespace"
@@ -74,17 +83,3 @@ policies:
7483
- =~
7584
- $MAINNET_NAMESPACES_REGEX
7685

77-
# Production - by namespace
78-
- receiver: "Slack #network-alerts channel by namespace"
79-
object_matchers:
80-
- - k8s_namespace_name
81-
- =~
82-
- $PRODUCTION_NAMESPACES_REGEX
83-
# Nightly - by namespace
84-
- receiver: "Slack #network-alerts channel by namespace"
85-
object_matchers:
86-
- - k8s_namespace_name
87-
- =~
88-
- $NIGHTLY_NAMESPACES_REGEX
89-
mute_time_intervals:
90-
- nightly_deployment

spartan/metrics/values.tmp.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,20 +117,21 @@ grafana:
117117
domain:
118118
env:
119119
PRODUCTION_NAMESPACES_REGEX: "v2-testnet|staging-public|staging-ignition"
120-
NIGHTLY_NAMESPACES_REGEX: "next-rc-1"
121120
STAGING_PUBLIC_REGEX: "staging-public"
122121
STAGING_IGNITION_REGEX: "staging-ignition|ignition-fisherman-sepolia"
123122
NEXT_SCENARIO_REGEX: "v[0-9]+-scenario|next-scenario"
124-
NEXT_NEXT_REGEX: "next-net"
123+
NEXT_NET_REGEX: "next-net"
125124
TESTNET_NAMESPACES_REGEX: "testnet|v[0-9]+-testnet"
126125
MAINNET_NAMESPACES_REGEX: "mainnet|v[0-9]+-mainnet|ignition"
126+
DEVNET_NAMESPACES_REGEX: "devnet"
127127
SLACK_WEBHOOK_URL: "http://127.0.0.1" # dummy value
128128
SLACK_WEBHOOK_STAGING_PUBLIC_URL: "http://127.0.0.1" # dummy value
129129
SLACK_WEBHOOK_STAGING_IGNITION_URL: "http://127.0.0.1" # dummy value
130130
SLACK_WEBHOOK_NEXT_SCENARIO_URL: "http://127.0.0.1" # dummy value
131131
SLACK_WEBHOOK_NEXT_NET_URL: "http://127.0.0.1" # dummy value
132132
SLACK_WEBHOOK_TESTNET_URL: "http://127.0.0.1" # dummy value
133133
SLACK_WEBHOOK_MAINNET_URL: "http://127.0.0.1" # dummy value
134+
SLACK_WEBHOOK_DEVNET_URL: "http://127.0.0.1" # dummy value
134135
datasources:
135136
datasources.yaml:
136137
apiVersion: 1

spartan/terraform/deploy-metrics/main.tf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ data "google_secret_manager_secret_version" "slack_webhook_next_net" {
9595
project = var.project
9696
}
9797

98+
data "google_secret_manager_secret_version" "slack_webhook_devnet" {
99+
secret = var.SLACK_WEBHOOK_DEVNET_SECRET_NAME
100+
project = var.project
101+
}
102+
98103
data "google_secret_manager_secret_version" "slack_webhook_testnet" {
99104
secret = var.SLACK_WEBHOOK_TESTNET_SECRET_NAME
100105
project = var.project
@@ -183,6 +188,11 @@ resource "helm_release" "aztec-gke-cluster" {
183188
value = data.google_secret_manager_secret_version.slack_webhook_next_net.secret_data
184189
}
185190

191+
set {
192+
name = "grafana.env.SLACK_WEBHOOK_DEVNET_URL"
193+
value = data.google_secret_manager_secret_version.slack_webhook_devnet.secret_data
194+
}
195+
186196
set {
187197
name = "grafana.env.SLACK_WEBHOOK_TESTNET_URL"
188198
value = data.google_secret_manager_secret_version.slack_webhook_testnet.secret_data

spartan/terraform/deploy-metrics/variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ variable "SLACK_WEBHOOK_NEXT_NET_SECRET_NAME" {
5252
default = "slack-webhook-next-net-url"
5353
}
5454

55+
variable "SLACK_WEBHOOK_DEVNET_SECRET_NAME" {
56+
description = "Webhook for devnet alerts"
57+
type = string
58+
default = "slack-webhook-devnet-url"
59+
}
60+
5561
variable "SLACK_WEBHOOK_TESTNET_SECRET_NAME" {
5662
description = "Webhook for testnet alerts"
5763
type = string

0 commit comments

Comments
 (0)