Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,982 changes: 1,318 additions & 1,664 deletions etc/kayobe/kolla/config/grafana/dashboards/openstack/redfish.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion etc/kayobe/kolla/config/prometheus/prometheus.rules
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ groups:
rules:

- alert: PrometheusTargetMissing
expr: up{job!="redfish-exporter-seed"} == 0
expr: up{job!="redfish-exporter-seed", job!="redfish-exporter-collectlog"} == 0
for: 5m
labels:
severity: critical
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,34 @@
---
{% if seed_redfish_exporter_container_enabled | bool %}
scrape_configs:
- job_name: redfish-exporter-seed
- job_name: redfish-exporter
params:
collectlogs: ['false']
metrics_path: /redfish
scrape_timeout: 120s
scrape_interval: {{ [8 * groups['redfish_exporter_targets'] | length, 120] | max }}s
scrape_timeout: 300s
scrape_interval: {{ redfish_exporter_scrape_interval }}s
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: "{{ lookup('vars', admin_oc_net_name ~ '_ips')[groups.seed.0] }}:9610"
static_configs:
{% for host in groups.get('redfish_exporter_targets', []) %}
- targets:
- '{{ hostvars[host]["redfish_exporter_target_address"] }}'
labels:
server: '{{ host }}'
env: "{{ kayobe_environment | default('openstack') }}"
group: "{{ hostvars[host]['redfish_exporter_scrape_group'] | default('overcloud') }}"
{% endfor %}
- job_name: redfish-exporter-collectlog
params:
collectlogs: ['true']
metrics_path: /redfish
scrape_timeout: 1200s
scrape_interval: 3600s
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ scrape_configs:
static_configs:
- targets:
- '{{ kolla_internal_fqdn | put_address_in_context('url') }}:9090'
scrape_interval: 15m
{% endraw %}
scrape_interval: "{{ stackhpc_os_capacity_scrape_interval }}s"
scrape_timeout: 10m
{% raw %}
{% if kolla_enable_tls_internal | bool %}
scheme: https
{% endif %}
Expand Down
4 changes: 3 additions & 1 deletion etc/kayobe/kolla/globals.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ kolla_image_tags:
# Monitoring and alerting related settings

opensearch_heap_size: 8g
prometheus_cmdline_extras: "--storage.tsdb.retention.time=30d"
prometheus_cmdline_extras: "--storage.tsdb.retention.time=30d --query.lookback-delta={{ stackhpc_prometheus_query_lookback_delta }}"

# Additional command line flags for node exporter to enable texfile collector for disk metrics and create textfile docker volume
prometheus_node_exporter_extra_volumes:
Expand Down Expand Up @@ -56,3 +56,5 @@ prometheus_blackbox_exporter_endpoints_kayobe:
- endpoints:
- "pulp:http_2xx:{{ pulp_url }}/pulp/api/v3/status/"
enabled: "{{ seed_pulp_container_enabled | bool }}"

prometheus_openstack_exporter_interval: "{{ stackhpc_prometheus_openstack_exporter_interval }}s"
4 changes: 2 additions & 2 deletions etc/kayobe/seed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ seed_redfish_exporter_container:
image: ghcr.io/stackhpc/redfish-exporter
pre: "{{ kayobe_config_path }}/containers/redfish_exporter/pre.yml"
post: "{{ kayobe_config_path }}/containers/redfish_exporter/post.yml"
tag: "v1.0.2"
tag: "v2.0.0-stackhpc"
network_mode: host
command: ./main --config.file /redfish_exporter.yml
command: redfish_exporter --config.file /redfish_exporter.yml
volumes: "/opt/kayobe/containers/redfish_exporter/redfish_exporter.yml:/redfish_exporter.yml:ro"
restart_policy: unless-stopped

Expand Down
18 changes: 18 additions & 0 deletions etc/kayobe/stackhpc-monitoring.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
---
# StackHPC monitoring configuration
###############################################################################
# Prometheus server configuration

# How far prometheus will look back in time to find a metric.
stackhpc_prometheus_query_lookback_delta: >-
{{ [stackhpc_redfish_exporter_scrape_interval | int,
stackhpc_os_capacity_scrape_interval | int,
stackhpc_prometheus_openstack_exporter_interval | int,
300] | max + 30 }}s

###############################################################################
# Alert configuration
Expand Down Expand Up @@ -45,16 +54,25 @@ stackhpc_os_capacity_openstack_cacert: ""
stackhpc_os_capacity_openstack_verify: true
# Redfish exporter

# How often to scrape the os capacity exporter in seconds.
stackhpc_os_capacity_scrape_interval: 900

# Whether the redfish exporter is enabled.
stackhpc_enable_redfish_exporter: false

# How often to scrape the BMCs in seconds.
stackhpc_redfish_exporter_scrape_interval: "{{ [8 * groups['redfish_exporter_targets'] | length, 300] | max }}"

# Credentials
redfish_exporter_default_username: "{{ ipmi_username }}"
redfish_exporter_default_password: "{{ ipmi_password }}"

# The address of the BMC that is used to query redfish metrics.
redfish_exporter_target_address: "{{ ipmi_address }}"

# How often to scrape OpenStack Exporter in seconds.
stackhpc_prometheus_openstack_exporter_interval: 300

###############################################################################

# Whether the RADOS gateway usage exporter is enabled.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
features:
- Upgrades the redfish exporter container image to the v2.x series.
- Adds support for lenovo hardware to the redfish exporter dashboard.
- |
Adds the ``stackhpcredfish_exporter_scrape_interval``,
``stackhpc_os_capacity_scrape_interval``, and
``stackhpc_prometheus_openstack_exporter_interval``
configuration variables.
fixes:
- |
Sets the prometheus server side option ``query.lookback-delta`` to
the largest scrape interval so that metrics are not from exporters
with large scrape intervals are not marked stale before the next scrape.
- Fixes various issues with the redfish exporter dashboard.
upgrade:
- |
Increases default ``os_capacity_scrape_interval`` to ``5m``. If you already customise
this please move to the new ``stackhpc_os_capacity_scrape_interval`` variable.
Loading