|
17 | 17 | ansible.builtin.shell: |
18 | 18 | cmd: | |
19 | 19 | oc patch stf default --type merge -p '{"spec": {"alertmanagerConfigManifest": "apiVersion: v1\nkind: Secret\nmetadata:\n name: 'alertmanager-default'\n namespace: 'service-telemetry'\ntype: Opaque\nstringData:\n alertmanager.yaml: |-\n global:\n resolve_timeout: 10m\n route:\n group_by: ['job']\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 12h\n receiver: 'null'\n receivers:\n - name: 'null'\n"}}' |
20 | | - changed_when: false |
| 20 | + changed_when: 'cmd_output == "servicetelemetry.infra.watch/default patched"' |
21 | 21 | register: cmd_output |
22 | 22 | failed_when: cmd_output.rc != 0 |
23 | 23 |
|
|
27 | 27 |
|
28 | 28 | # oc get secret alertmanager-default -o go-template='{{index .data "alertmanager.yaml" | base64decode }}' |
29 | 29 | # Can't use -o go-template because of the "{{" and "}}", which are mistaken for templating syntax. |
30 | | - # The alertmanager.yaml key needed to be surrounded by [".."] because of the period in the key name. |
31 | 30 | - name: "Get the updated secret" |
32 | 31 | ansible.builtin.shell: |
33 | 32 | cmd: | |
34 | | - oc get secret alertmanager-default -ojson | jq '.data | .["alertmanager.yaml"]' |
| 33 | + oc get secret alertmanager-default -ojsonpath="{ .data.alertmanager\.yaml }" |
35 | 34 | register: cmd_output |
36 | 35 | changed_when: false |
37 | 36 |
|
|
78 | 77 | cmd: >- |
79 | 78 | oc exec -it prometheus-default-0 -c prometheus -- /bin/sh -c 'curl -k -H \ |
80 | 79 | "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ |
81 | | - https://default-alertmanager-proxy:9095/api/v1/alerts' | grep 'active' | grep 'FVT_TESTING Collectd metrics receive rate is zero' |
| 80 | + https://default-alertmanager-proxy:9095/api/v1/alerts' | grep 'active' |
82 | 81 | register: cmd_output |
| 82 | + retries: 30 |
| 83 | + delay: 10 |
83 | 84 | changed_when: false |
84 | | - failed_when: cmd_output.stdout_lines | length == 0 |
| 85 | + until: '"FVT_TESTING Collectd metrics receive rate is zero" in cmd_output.stdout' |
85 | 86 |
|
86 | 87 | - name: "RHELOSP-148699 Verify that the alert is firing in Prometheus" |
87 | 88 | ansible.builtin.shell: |
88 | 89 | cmd: >- |
89 | | - /usr/bin/curl -k {{ prom_auth_string }} -g https://{{ prom_url }}/api/v1/alerts | grep 'firing' | grep 'FVT_TESTING Collectd metrics receive rate is zero' |
| 90 | + curl -k {{ prom_auth_string }} -g https://{{ prom_url }}/api/v1/alerts | jq '.data.alerts | select (.[].state == "firing") | .[].labels.alertname' |
| 91 | + register: cmd_output |
| 92 | + changed_when: false |
| 93 | + failed_when: '"FVT_TESTING Collectd metrics receive rate is zero" not in cmd_output.stdout' |
| 94 | + |
| 95 | + - name: "Check what alerts are firing in prometheus" |
| 96 | + ansible.builtin.command: |
| 97 | + cmd: >- |
| 98 | + curl -k {{ prom_auth_string }} -g https://{{ prom_url }}/api/v1/alerts |
90 | 99 | register: cmd_output |
91 | 100 | changed_when: false |
92 | | - failed_when: cmd_output.stdout_lines | length == 0 |
93 | 101 |
|
94 | 102 | always: |
95 | 103 | - name: "Delete the PrometheusRule" |
|
117 | 125 | register: output |
118 | 126 | until: output.stdout_lines | length == expected_pods.stdout_lines | length |
119 | 127 | changed_when: false |
120 | | - |
121 | 128 |
|
122 | 129 | - name: "RHELOSP-176039 Remove alertmanagerConfigManifest from the ServiceTelemetry object" |
123 | 130 | ansible.builtin.shell: |
|
0 commit comments