Skip to content

Commit f6cd436

Browse files
Incorporate separate StackHPC cloud tests for monitoring and for hosts (#1501)
CI: Add Grafana and OpenSearch Dashboards variables for SOT Depends-On: stackhpc/stackhpc-cloud-tests#3 Depends-On: stackhpc/stackhpc-cloud-tests#4 Co-authored-by: Mark Goddard <[email protected]>
1 parent cd0d44e commit f6cd436

File tree

3 files changed

+124
-20
lines changed

3 files changed

+124
-20
lines changed

.github/workflows/stackhpc-all-in-one.yml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ jobs:
468468
-v $(pwd)/sct-results:/stack/sct-results \
469469
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
470470
$KAYOBE_IMAGE \
471-
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-cloud-tests.yml' -e sot_version=${{ inputs.stackhpc_cloud_tests_version }}
471+
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-cloud-tests.yml' -e sct_version=${{ inputs.stackhpc_cloud_tests_version }}
472472
env:
473473
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}
474474

@@ -496,16 +496,20 @@ jobs:
496496
sct-results/
497497
if: ${{ !cancelled() && (steps.tempest.outcome == 'success' || steps.stackhpc-cloud-tests.outcome == 'success' || steps.diagnostics.outcome == 'success') }}
498498

499-
- name: Fail if any Tempest tests failed
499+
- name: Fail if any tests failed
500500
run: |
501-
test $(wc -l < tempest-artifacts/failed-tests) -lt 1
502-
503-
- name: Fail if any StackHPC Cloud tests failed
504-
run: |
505-
echo "Some StackHPC Cloud tests failed."
506-
echo "See HTML results artifact (sct-results) for details."
507-
exit 1
508-
if: steps.stackhpc-cloud-tests.outcome == 'failure'
501+
rc=0
502+
if [[ $(wc -l < tempest-artifacts/failed-tests) -ne 0 ]]; then
503+
echo "Some Tempest tests failed."
504+
echo "See HTML results artifact (tempest-artifacts) for details."
505+
rc=1
506+
fi
507+
if [[ $(wc -l < sct-results/failed-tests) -ne 0 ]]; then
508+
echo "Some StackHPC Cloud tests failed."
509+
echo "See HTML results artifact (sct-results) for details."
510+
rc=1
511+
fi
512+
exit $rc
509513
510514
- name: Destroy
511515
run: terraform destroy -auto-approve

etc/kayobe/ansible/stackhpc-cloud-tests.yml

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
---
22
- name: Run StackHPC Cloud tests
3-
hosts: tempest_runner
3+
hosts: tempest_runner:overcloud
44
tags:
55
- stackhpc-cloud-tests
66
vars:
77
sct_venv: "{{ virtualenv_path }}/sct-venv"
88
sct_repo: https://github.com/stackhpc/stackhpc-cloud-tests
9+
# Define the version of SCT used for testing, the github workflow overrides this with
10+
# stackhpc_cloud_tests_version so this is only used if running "locally".
911
sct_version: main
1012
sct_timeout: 30
1113
results_path_local: "{{ lookup('env', 'HOME') }}/sct-results"
1214
tasks:
1315
- name: Stackhpc Cloud tests
1416
block:
17+
- name: Assert that there is only one host in the tempest_runner group
18+
ansible.builtin.assert:
19+
that: groups.get('tempest_runner', []) | length == 1
20+
fail_msg: The tempest_runner group should contain exactly one host
21+
1522
- name: Create a temporary directory for tests repo
1623
ansible.builtin.tempfile:
1724
state: directory
@@ -45,7 +52,6 @@
4552
- name: Ensure required individual Python packages are installed
4653
ansible.builtin.pip:
4754
name:
48-
- "{{ repo_tmpdir.path }}"
4955
- pytest-html
5056
- pytest-timeout
5157
virtualenv: "{{ sct_venv }}"
@@ -60,38 +66,125 @@
6066
file: "{{ kayobe_env_config_path }}/kolla/passwords.yml"
6167
name: kolla_passwords
6268

63-
- name: Run StackHPC Cloud tests
69+
# Monitoring tests should run once, executed on the host in the
70+
# tempest_runner group.
71+
- name: Check for StackHPC Cloud monitoring tests
72+
ansible.builtin.stat:
73+
path: "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/monitoring"
74+
register: stackhpc_cloud_monitoring_tests
75+
76+
- name: Run StackHPC Cloud monitoring tests
6477
ansible.builtin.command:
6578
cmd: >
6679
{{ sct_venv }}/bin/py.test
67-
--html={{ results_tmpdir.path }}/stackhpc-cloud-tests.html
80+
--html={{ results_tmpdir.path }}/monitoring.html
6881
--self-contained-html
69-
--pyargs stackhpc_cloud_tests
7082
--timeout {{ sct_timeout }}
7183
-rfEx
7284
-vv
85+
"{{ repo_tmpdir.path }}/stackhpc_cloud_tests/monitoring"
7386
environment:
87+
GRAFANA_URL: "{{ sct_grafana_url }}"
88+
GRAFANA_USERNAME: "{{ sct_grafana_username }}"
89+
GRAFANA_PASSWORD: "{{ sct_grafana_password }}"
7490
OPENSEARCH_HOSTS: "{{ sct_opensearch_hosts }}"
7591
OPENSEARCH_PORT: "{{ sct_opensearch_port }}"
7692
OPENSEARCH_TLS: "{{ sct_opensearch_tls }}"
93+
OPENSEARCH_DASHBOARDS_URL: "{{ sct_opensearch_dashboards_url }}"
94+
OPENSEARCH_DASHBOARDS_USERNAME: "{{ sct_opensearch_dashboards_username }}"
95+
OPENSEARCH_DASHBOARDS_PASSWORD: "{{ sct_opensearch_dashboards_password }}"
7796
PROMETHEUS_URL: "{{ sct_prometheus_url }}"
7897
PROMETHEUS_USERNAME: "{{ sct_prometheus_username }}"
7998
PROMETHEUS_PASSWORD: "{{ sct_prometheus_password }}"
8099
vars:
81100
kolla_external_scheme: "{{ 'https' if kolla_enable_tls_external | bool else 'http' }}"
82101
kolla_internal_scheme: "{{ 'https' if kolla_enable_tls_internal | bool else 'http' }}"
102+
sct_grafana_url: "{{ kolla_external_scheme }}://{{ kolla_external_fqdn }}:3000"
103+
sct_grafana_username: "grafana_local_admin"
104+
sct_grafana_password: "{{ kolla_passwords.grafana_admin_password }}"
83105
sct_opensearch_hosts: "{{ kolla_internal_fqdn }}"
84106
sct_opensearch_port: 9200
85-
sct_opensearch_tls: false
107+
sct_opensearch_tls: "{{ kolla_enable_tls_internal | bool }}"
108+
sct_opensearch_dashboards_url: "{{ kolla_external_scheme }}://{{ kolla_external_fqdn }}:5601"
109+
sct_opensearch_dashboards_username: "opensearch"
110+
sct_opensearch_dashboards_password: "{{ kolla_passwords.opensearch_dashboards_password }}"
86111
sct_prometheus_url: "{{ kolla_internal_scheme }}://{{ kolla_internal_fqdn }}:9091"
87112
sct_prometheus_username: admin
88113
sct_prometheus_password: "{{ kolla_passwords.prometheus_password }}"
114+
failed_when: monitoring_results.rc not in [0, 1]
115+
register: monitoring_results
116+
when: "'tempest_runner' in group_names and stackhpc_cloud_monitoring_tests.stat.exists"
117+
118+
# Host tests should run on every host in the overcloud group.
119+
# TODO: Use TestInfra's native Ansible or SSH connection plugins for
120+
# remote test execution? That would place all results in a single file
121+
# and allow us to execute all tests from a single host.
122+
# https://testinfra.readthedocs.io/en/latest/backends.html#connection-backends
123+
- name: Check for StackHPC Cloud host tests
124+
ansible.builtin.stat:
125+
path: "{{ repo_tmpdir.path }}/stackhpc_cloud_tests/host"
126+
register: stackhpc_cloud_host_tests
127+
128+
- name: Run StackHPC Cloud host tests
129+
ansible.builtin.command:
130+
cmd: >
131+
{{ sct_venv }}/bin/py.test
132+
--html={{ results_tmpdir.path }}/host-{{ inventory_hostname }}.html
133+
--self-contained-html
134+
--timeout {{ sct_timeout }}
135+
-vv
136+
"{{ repo_tmpdir.path }}/stackhpc_cloud_tests/host"
137+
environment:
138+
DOCKER_VERSION_MIN: "{{ sct_docker_version_min }}"
139+
DOCKER_VERSION_MAX: "{{ sct_docker_version_max }}"
140+
SELINUX_STATE: "{{ sct_selinux_state }}"
141+
vars:
142+
# Inclusive min
143+
sct_docker_version_min: "24.0.0"
144+
# Exclusive max
145+
sct_docker_version_max: "28.0.0"
146+
sct_selinux_state: "{{ selinux_state }}"
147+
failed_when: host_results.rc not in [0, 1]
148+
register: host_results
149+
# Some host checks may need to run as root
150+
become: true
151+
when: "'overcloud' in group_names and stackhpc_cloud_host_tests.stat.exists"
152+
153+
# Host test results will be owned by root - we need to read and delete them
154+
- name: Change permissions on SCT host test results
155+
ansible.builtin.command:
156+
cmd: chmod 666 {{ results_tmpdir.path }}/host-{{ inventory_hostname }}.html
157+
become: true
158+
when: "'overcloud' in group_names and stackhpc_cloud_host_tests.stat.exists"
159+
89160
always:
90-
- name: Fetch results
91-
ansible.builtin.fetch:
92-
src: "{{ results_tmpdir.path }}/stackhpc-cloud-tests.html"
161+
- name: Synchronize results
162+
ansible.posix.synchronize:
163+
src: "{{ results_tmpdir.path }}/"
93164
dest: "{{ results_path_local }}/"
94-
flat: true
165+
mode: pull
166+
archive: no
167+
recursive: true
168+
# For jump host
169+
use_ssh_args: true
170+
171+
- name: Write a file containing failed test runs
172+
ansible.builtin.copy:
173+
content: |-
174+
{% for host in ansible_play_hosts_all %}
175+
{% if host not in ansible_play_hosts %}
176+
{{ host }}: Host failure
177+
{% endif %}
178+
{% if hostvars[host].monitoring_results.rc | default(0) != 0 %}
179+
monitoring.html
180+
{% endif %}
181+
{% if hostvars[host].host_results.rc | default(0) != 0 %}
182+
host-{{ host }}.html
183+
{% endif %}
184+
{% endfor %}
185+
dest: "{{ results_path_local }}/failed-tests"
186+
delegate_to: localhost
187+
run_once: true
95188

96189
- name: Clean up temporary directory
97190
ansible.builtin.file:
@@ -100,3 +193,5 @@
100193
loop:
101194
- "{{ repo_tmpdir.path }}"
102195
- "{{ results_tmpdir.path }}"
196+
# Some files used by host tests may now be owned by root
197+
become: true
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
# Target SELinux state
3+
# NOTE(MaxN) In StackHPC cloud tests we're checking the host's SELinux state matches the targeted state
4+
# but we can't access what was defined upstream so we redefine here - this must follow any upstream change.
5+
selinux_state: permissive

0 commit comments

Comments
 (0)