Skip to content

Commit 430733c

Browse files
committed
Ignore ContainerKilled prometheus alert - in the context of tempest
1 parent 8c89c35 commit 430733c

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

stackhpc_cloud_tests/monitoring/test_prometheus.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,33 @@ def test_prometheus_alerts_inactive(prom):
5757
assert response["status"] == "success"
5858
assert "data" in response
5959
alerts = response["data"]["alerts"] or []
60+
6061
# (MaxN) Allow for, and filter out, alerts we'd expect to see in an AIO environment.
61-
# TODO - find a way of configuring this for SCT runs in other environments.
62-
alerts_to_ignore = [
62+
# TODO - find a way of configuring this for SCT running in other environments.
63+
aio_alerts_to_ignore = [
6364
# We know our volumes are small.
64-
"StorageFillingUp",
65+
{ "alertname": "StorageFillingUp", "instance": "controller0" },
6566
# This is probably due to storage space..
66-
"ElasticsearchClusterYellow",
67+
{ "alertname": "ElasticsearchClusterYellow", "instance": "controller0" },
6768
# ..or because we're running in a single instance and it wants to be clustered across multiple nodes.
68-
"ElasticsearchUnassignedShards",
69+
{ "alertname": "ElasticsearchUnassignedShards", "instance": "controller0" },
6970
# It's a small AIO!
70-
"LowMemory",
71+
{ "alertname": "LowMemory", "instance": "controller0" },
7172
# It's only one node and expects three, see https://github.com/stackhpc/stackhpc-kayobe-config/pull/1579
72-
"RabbitMQNodeDown"
73+
{ "alertname": "RabbitMQNodeDown" },
74+
# This is probably because Tempest runs before pytest so the container has been recently stopped.
75+
{ "alertname": "ContainerKilled", "name": "tempest" }
7376
]
74-
alerts = [ alert for alert in alerts if alert["labels"]["alertname"] not in alerts_to_ignore ]
77+
78+
def alert_is_ignored(alert, alerts_to_ignore):
79+
alert_items = alert.items()
80+
for alert_to_ignore in alerts_to_ignore:
81+
alert_to_ignore_items = alert_to_ignore.items()
82+
# alert has more items than alerts_to_ignore
83+
# so here we can return True if alert_to_ignore is a subset of alerts
84+
if alert_to_ignore_items <= alert_items:
85+
return True
86+
return False
87+
88+
alerts = [ alert for alert in alerts if not alert_is_ignored(alert["labels"], aio_alerts_to_ignore) ]
7589
assert len(alerts) == 0

0 commit comments

Comments
 (0)