Skip to content

Commit 82208d1

Browse files
authored
Merge pull request #205 from stackhpc/fix/grafana859
Update grafana and fix control image build grafana issues
2 parents a4ab33f + 153fc30 commit 82208d1

File tree

14 files changed

+222
-93
lines changed

14 files changed

+222
-93
lines changed

.github/workflows/stackhpc.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ jobs:
163163
ANSIBLE_FORCE_COLOR: True
164164
OS_CLOUD: openstack
165165

166+
- name: Check MPI-based tests are shown in Grafana
167+
run: |
168+
. venv/bin/activate
169+
. environments/${{ matrix.cloud }}/activate
170+
ansible-playbook -vv ansible/ci/check_grafana.yml
171+
env:
172+
ANSIBLE_FORCE_COLOR: True
173+
OS_CLOUD: openstack
174+
166175
- name: Delete infrastructure
167176
run: |
168177
. venv/bin/activate

ansible/.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ roles/*
1010
!roles/podman/**
1111
!roles/grafana-dashboards/
1212
!roles/grafana-dashboards/**
13-
!roles/grafana-datasources/
14-
!roles/grafana-datasources/**
1513
!roles/passwords/
1614
!roles/passwords/**
1715
!roles/fail2ban/

ansible/ci/check_grafana.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Checks Slurm jobs from hpctests are shown in Grafana.
2+
# Can't actually check the dashboard programatically so this queries the datasource used by the dashboard instead.
3+
4+
- hosts: control # so proxying etc is irrelevant
5+
gather_facts: no
6+
become: no
7+
tasks:
8+
- name: Wait for slurm-stats file to exist (run by cron)
9+
ansible.builtin.wait_for:
10+
path: /var/log/slurm-stats/finished_jobs.json
11+
timeout: 315 # slurm stats cron job runs every 5 mins
12+
13+
- name: Query grafana for expected hpctests jobs
14+
grafana_elasticsearch_query:
15+
grafana_url: http://{{ grafana_api_address }}:{{ grafana_port }}
16+
grafana_username: grafana
17+
grafana_password: "{{ vault_grafana_admin_password }}"
18+
datasource: slurmstats
19+
index_pattern: filebeat-*
20+
register: _slurm_stats_jobs
21+
until: _expected_jobs | difference(_found_jobs) == []
22+
retries: 60
23+
delay: 5
24+
vars:
25+
_found_jobs: "{{ _slurm_stats_jobs.docs | map(attribute='JobName', default='(json error in slurmstats data)') }}"
26+
_expected_jobs: ['hpl-solo.sh', 'pingpong.sh', 'pingmatrix.sh']
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/usr/bin/python
2+
3+
# Copyright: (c) 2022 Steve Brasier [email protected]
4+
from __future__ import (absolute_import, division, print_function)
5+
__metaclass__ = type
6+
7+
DOCUMENTATION = r'''
8+
---
9+
module: grafana_elasticsearch_query
10+
11+
short_description: Get elasticsearch hits via grafana
12+
13+
version_added: "1.0.0"
14+
15+
description: Returns hits from selected datasource and indices.
16+
17+
author:
18+
- Steve Brasier
19+
'''
20+
21+
EXAMPLES = r'''
22+
- name: Get elasticsearch hits
23+
grafana_elasticsearch_query:
24+
grafana_url: http://{{ grafana_api_address }}:{{ grafana_port }}
25+
grafana_username: grafana
26+
grafana_password: "{{ vault_grafana_admin_password }}"
27+
datasource: slurmstats
28+
index_pattern: 'filebeat-*'
29+
'''
30+
31+
RETURN = r'''
32+
# These are examples of possible return values, and in general should use other names for return values.
33+
docs:
34+
description: List of dicts with the original json in each document.
35+
returned: always
36+
type: list
37+
'''
38+
39+
from ansible.module_utils.basic import AnsibleModule
40+
import requests
41+
import json
42+
43+
def run_module():
44+
module_args = dict(
45+
grafana_url=dict(type="str", required=True),
46+
grafana_username=dict(type="str", required=True),
47+
grafana_password=dict(type="str", required=True),
48+
datasource=dict(type="str", required=True),
49+
index_pattern=dict(type="str", required=True),
50+
)
51+
52+
result = dict(
53+
changed=False,
54+
jobs=[]
55+
)
56+
57+
module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
58+
59+
auth=(module.params['grafana_username'], module.params['grafana_password'])
60+
61+
# list datasources:
62+
datasources_api_url = module.params["grafana_url"] + '/api/datasources'
63+
r = requests.get(datasources_api_url, auth=auth)
64+
datasources = json.loads(r.text)
65+
66+
# select required datasource:
67+
ds = [s for s in datasources if s['name'] == module.params["datasource"]][0]
68+
69+
# get documents:
70+
datasource_proxy_url = module.params["grafana_url"] + '/api/datasources/proxy/' + str(ds['id']) + '/' + module.params['index_pattern'] + '/_search'
71+
r = requests.get(datasource_proxy_url, auth=auth)
72+
search = json.loads(r.text)
73+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-api-response-body:
74+
docs = [h['_source']['json'] for h in search['hits']['hits']]
75+
76+
result = {
77+
'docs': docs,
78+
}
79+
80+
module.exit_json(**result)
81+
82+
83+
def main():
84+
run_module()
85+
86+
87+
if __name__ == '__main__':
88+
main()

ansible/monitoring.yml

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -106,23 +106,10 @@
106106
- assert:
107107
that: vault_grafana_admin_password is defined
108108
fail_msg: "Must define vault_grafana_admin_password - use `ansible-playbook generate-passwords.yml` to generate a set of passwords"
109-
- import_role:
109+
- include_role:
110110
name: cloudalchemy.grafana
111111
vars:
112-
# We use internal roles to register the datasources and dashboards as the roles
113-
# does not support all options that we require.
112+
# We use internal roles to register the dashboards as the role does not support all options that we require.
114113
grafana_dashboards: []
115-
grafana_datasources: []
116-
117-
- name: Initialise grafana
118-
hosts: grafana
119-
tags:
120-
- grafana-init
121-
tasks:
122-
- assert:
123-
that: vault_grafana_admin_password is defined
124-
fail_msg: "Must define vault_grafana_admin_password - use `ansible-playbook generate-passwords.yml` to generate a set of passwords"
125-
- import_role:
126-
name: grafana-datasources
127-
- import_role:
114+
- import_role: # done in same play so it can use handlers from cloudalchemy.grafana
128115
name: grafana-dashboards

ansible/roles/grafana-dashboards/defaults/main.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,7 @@ grafana_api_url: "{{ grafana_url }}"
99

1010
grafana_security:
1111
admin_user: admin
12-
admin_password: ""
12+
admin_password: ""
13+
14+
grafana_data_dir: "/var/lib/grafana"
15+
grafana_dashboards_dir: "dashboards"

ansible/roles/grafana-dashboards/tasks/main.yml

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# The MIT License (MIT)
33

44
# Copyright (c) 2017-2018 Pawel Krupa, Roman Demachkovych
5+
# Copyright (c) 2022 Steve Brasier
56

67
# Permission is hereby granted, free of charge, to any person obtaining a copy
78
# of this software and associated documentation files (the "Software"), to deal
@@ -22,7 +23,6 @@
2223
# SOFTWARE.
2324

2425
- become: false
25-
run_once: true
2626
block:
2727
- name: Create local grafana dashboard directory
2828
tempfile:
@@ -108,17 +108,51 @@
108108
when:
109109
- grafana_dashboards | length > 0
110110

111-
- name: import grafana dashboards
112-
community.grafana.grafana_dashboard:
113-
grafana_url: "{{ grafana_api_url }}"
114-
grafana_user: "{{ grafana_security.admin_user }}"
115-
grafana_password: "{{ grafana_security.admin_password }}"
116-
path: "{{ _tmp_dashboards.path }}/{{ ((item.dashboard_id | string) + '.json') if 'dashboard_id' in item else item.dashboard_file }}"
117-
# message renamed to commit_message (Be aware if using old ansible)
118-
# https://github.com/ansible/ansible/pull/60051
119-
#commit_message: Updated by ansible
120-
state: present
121-
overwrite: true
122-
#no_log: true
123-
with_items: "{{ grafana_dashboards }}"
124-
when: grafana_state | default('started') != 'stopped'
111+
- name: Create/Update dashboards file (provisioning)
112+
become: true
113+
copy:
114+
dest: "/etc/grafana/provisioning/dashboards/ansible.yml"
115+
content: |
116+
apiVersion: 1
117+
providers:
118+
- name: 'default'
119+
orgId: 1
120+
folder: ''
121+
type: file
122+
options:
123+
path: "{{ grafana_data_dir }}/dashboards"
124+
backup: false
125+
owner: root
126+
group: grafana
127+
mode: 0640
128+
notify: restart grafana
129+
130+
- name: Register previously copied dashboards
131+
become: true
132+
find:
133+
paths: "{{ grafana_data_dir }}/dashboards"
134+
hidden: true
135+
patterns:
136+
- "*.json"
137+
register: _dashboards_present
138+
139+
- name: Import grafana dashboards
140+
become: true
141+
copy:
142+
remote_src: yes
143+
src: "{{ _tmp_dashboards.path }}/" # Note trailing / to only copy contents, not directory itself
144+
dest: "{{ grafana_data_dir }}/dashboards/"
145+
register: _dashboards_copied
146+
notify: "provisioned dashboards changed"
147+
148+
- name: Get dashboard lists
149+
set_fact:
150+
_dashboards_present_list: "{{ _dashboards_present | json_query('files[*].path') | default([]) }}"
151+
_dashboards_copied_list: "{{ _dashboards_copied | json_query('results[*].dest') | default([]) }}"
152+
153+
- name: Remove installed dashboards not defined through this role
154+
become: true
155+
file:
156+
path: "{{ item }}"
157+
state: absent
158+
with_items: "{{ _dashboards_present_list | difference( _dashboards_copied_list ) }}"

ansible/roles/grafana-datasources/defaults/main.yml

Lines changed: 0 additions & 12 deletions
This file was deleted.

ansible/roles/grafana-datasources/tasks/main.yml

Lines changed: 0 additions & 31 deletions
This file was deleted.

environments/arcus/builder.pkrvars.hcl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
flavor = "vm.alaska.cpu.general.small"
22
networks = ["a262aabd-e6bf-4440-a155-13dbc1b5db0e"] # WCDC-iLab-60
3-
source_image_name = "openhpc-220808-1510.qcow2"
3+
source_image_name = "openhpc-220810-0839.qcow2"
44
ssh_keypair_name = "slurm-app-ci"
55
security_groups = ["default", "SSH"]
66
ssh_bastion_host = "128.232.222.183"

0 commit comments

Comments
 (0)