Skip to content

Commit 44cd3f3

Browse files
committed
Create kepler scrape config and dashboard only when power monitoring is enabled
1 parent 0ed9296 commit 44cd3f3

File tree

11 files changed

+334
-56
lines changed

11 files changed

+334
-56
lines changed

api/v1beta1/telemetry_consts.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ const (
3232
PauseBetweenWatchAttempts = time.Duration(60) * time.Second
3333
// DefaultKeplerPort -
3434
DefaultKeplerPort = 8888
35+
// Dataplane power monitoring service name
36+
TelemetryPowerMonitoring = "telemetry-power-monitoring"
3537
)
3638

3739
// PrometheusReplicas -

controllers/metricstorage_controller.go

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ func (r *MetricStorageReconciler) reconcileNormal(
438438
instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDefinitionReadyCondition, telemetryv1.DashboardsNotEnabledMessage)
439439
instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPluginReadyCondition, telemetryv1.DashboardsNotEnabledMessage)
440440
} else {
441-
if res, err := r.createDashboardObjects(ctx, instance, eventHandler); err != nil {
441+
if res, err := r.createDashboardObjects(ctx, instance, helper, eventHandler); err != nil {
442442
return res, err
443443
}
444444
}
@@ -600,7 +600,7 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
600600
return ctrl.Result{}, err
601601
}
602602

603-
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper)
603+
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper, telemetry.ServiceName)
604604
if err != nil {
605605
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
606606
}
@@ -623,18 +623,26 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
623623
return ctrl.Result{}, err
624624
}
625625

626+
connectionInfo, err = getComputeNodesConnectionInfo(instance, helper, telemetryv1.TelemetryPowerMonitoring)
627+
if err != nil {
628+
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
629+
}
630+
626631
// kepler scrape endpoints
627632
keplerEndpoints, _ := getKeplerTargets(connectionInfo)
628633
if err != nil {
629634
Log.Info(fmt.Sprintf("Cannot get Kepler targets. Scrape configs not created. Error: %s", err))
630635
}
631636

632-
// Kepler ScrapeConfig for non-tls nodes
633-
keplerServiceName := fmt.Sprintf("%s-kepler", telemetry.ServiceName)
634-
err = r.createServiceScrapeConfig(ctx, instance, Log, "Kepler",
635-
keplerServiceName, keplerEndpoints, false) // Currently Kepler doesn't support TLS so tlsEnabled is set to false
636-
if err != nil {
637-
return ctrl.Result{}, err
637+
// keplerEndpoint is reported as empty slice when telemetry-power-monitoring service is not enabled
638+
if len(keplerEndpoints) > 0 {
639+
// Kepler ScrapeConfig for non-tls nodes
640+
keplerServiceName := fmt.Sprintf("%s-kepler", telemetry.ServiceName)
641+
err = r.createServiceScrapeConfig(ctx, instance, Log, "Kepler",
642+
keplerServiceName, keplerEndpoints, false) // Currently Kepler doesn't support TLS so tlsEnabled is set to false
643+
if err != nil {
644+
return ctrl.Result{}, err
645+
}
638646
}
639647

640648
instance.Status.Conditions.MarkTrue(telemetryv1.ScrapeConfigReadyCondition, condition.ReadyMessage)
@@ -672,7 +680,7 @@ func getKeplerTargets(nodes []ConnectionInfo) ([]string, []string) {
672680
return tls, nonTLS
673681
}
674682

675-
func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, eventHandler handler.EventHandler) (ctrl.Result, error) {
683+
func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, helper *helper.Helper, eventHandler handler.EventHandler) (ctrl.Result, error) {
676684
Log := r.GetLogger(ctx)
677685
uiPluginObj := &obsui.UIPlugin{
678686
ObjectMeta: metav1.ObjectMeta{
@@ -765,7 +773,14 @@ func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, in
765773
"grafana-dashboard-openstack-node": dashboards.OpenstackNode(datasourceName),
766774
"grafana-dashboard-openstack-vm": dashboards.OpenstackVM(datasourceName),
767775
"grafana-dashboard-openstack-rabbitmq": dashboards.OpenstackRabbitmq(datasourceName),
768-
"grafana-dashboard-openstack-kepler": dashboards.OpenstackKepler(datasourceName),
776+
}
777+
778+
// atleast one nodeset must have "telemetry-power-monitoring" service enabled for kepler dashboard to be created
779+
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper, telemetryv1.TelemetryPowerMonitoring)
780+
if err != nil {
781+
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Power monitoring dashboard not created. Error: %s", err))
782+
} else if len(connectionInfo) > 0 {
783+
dashboardCMs["grafana-dashboard-openstack-kepler"] = dashboards.OpenstackKepler(datasourceName)
769784
}
770785

771786
for dashboardName, desiredCM := range dashboardCMs {
@@ -837,6 +852,7 @@ func (r *MetricStorageReconciler) ensureWatches(
837852
func getComputeNodesConnectionInfo(
838853
instance *telemetryv1.MetricStorage,
839854
helper *helper.Helper,
855+
telemetryServiceName string,
840856
) ([]ConnectionInfo, error) {
841857
ipSetList, err := getIPSetList(instance, helper)
842858
if err != nil {
@@ -854,24 +870,23 @@ func getComputeNodesConnectionInfo(
854870
return []ConnectionInfo{}, err
855871
}
856872
nodeSetGroup := inventory.Groups[secret.Labels["openstackdataplanenodeset"]]
857-
containsTelemetry := false
873+
containsTargetService := false
858874
for _, svc := range nodeSetGroup.Vars["edpm_services"].([]interface{}) {
859-
if svc.(string) == "telemetry" {
860-
containsTelemetry = true
875+
if svc.(string) == telemetryServiceName {
876+
containsTargetService = true
861877
}
862878
}
863-
if !containsTelemetry {
864-
// Telemetry isn't deployed on this nodeset
865-
// there is no reason to include these nodes
866-
// for scraping by prometheus
879+
if !containsTargetService {
880+
// If Telemetry|TelemetryPowerMonitoring isn't
881+
// deployed on this nodeset there is no reason
882+
// to include these nodes for scraping by prometheus
867883
continue
868884
}
869885
for name, item := range nodeSetGroup.Hosts {
870886
namespacedName := &types.NamespacedName{
871887
Name: name,
872888
Namespace: instance.GetNamespace(),
873889
}
874-
875890
if len(ipSetList.Items) > 0 {
876891
// if we have IPSets, lets go to search for the IPs there
877892
address, _ = getAddressFromIPSet(instance, &item, namespacedName, helper)

kuttl-test.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,17 @@ testDirs:
2828
- tests/kuttl/suites/tls/
2929
suppress:
3030
- events # Remove spammy event logs
31+
commands:
32+
- script: |
33+
if [ ! -f ansibleee-ssh-key-id_rsa ]; then
34+
ssh-keygen -f ansibleee-ssh-key-id_rsa -N "" -t rsa -b 4096
35+
fi
36+
oc create secret generic dataplane-ansible-ssh-private-key-secret \
37+
--save-config \
38+
--dry-run=client \
39+
--from-file=authorized_keys=ansibleee-ssh-key-id_rsa.pub \
40+
--from-file=ssh-privatekey=ansibleee-ssh-key-id_rsa \
41+
--from-file=ssh-publickey=ansibleee-ssh-key-id_rsa.pub \
42+
-n telemetry-kuttl-tests \
43+
-o yaml | \
44+
oc apply -f -
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: network.openstack.org/v1beta1
2+
kind: DNSMasq
3+
metadata:
4+
name: dnsmasq
5+
namespace: telemetry-kuttl-tests
6+
spec:
7+
replicas: 1
8+
options:
9+
- key: server
10+
values:
11+
- 192.168.122.1
12+
- key: local
13+
values:
14+
- '/example.com/'
15+
debug:
16+
service: false
17+
override:
18+
service:
19+
metadata:
20+
annotations:
21+
metallb.universe.tf/address-pool: ctlplane
22+
metallb.universe.tf/allow-shared-ip: ctlplane
23+
metallb.universe.tf/loadBalancerIPs: 192.168.122.80
24+
spec:
25+
type: ClusterIP
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
openstack-edpm-ipam:
2+
vars:
3+
edpm_network_config_template: |
4+
---
5+
{% set mtu_list = [ctlplane_mtu] %}
6+
{% for network in nodeset_networks %}
7+
{{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }}
8+
{%- endfor %}
9+
{% set min_viable_mtu = mtu_list | max %}
10+
network_config:
11+
- type: interface
12+
name: nic1
13+
use_dhcp: false
14+
- type: interface
15+
name: nic2
16+
use_dhcp: false
17+
addresses:
18+
- ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_cidr }}
19+
routes:
20+
- default: true
21+
next_hop: {{ ctlplane_gateway_ip }}
22+
23+
- type: linux_bond
24+
name: bond_api
25+
use_dhcp: false
26+
bonding_options: "mode=active-backup"
27+
dns_servers: {{ ctlplane_dns_nameservers }}
28+
members:
29+
- type: interface
30+
name: nic3
31+
32+
- type: vlan
33+
vlan_id: {{ lookup('vars', networks_lower['internalapi'] ~ '_vlan_id') }}
34+
device: bond_api
35+
addresses:
36+
- ip_netmask: {{ lookup('vars', networks_lower['internalapi'] ~ '_ip') }}/{{ lookup('vars', networks_lower['internalapi'] ~ '_cidr') }}
37+
38+
- type: vlan
39+
vlan_id: {{ lookup('vars', networks_lower['storage'] ~ '_vlan_id') }}
40+
device: bond_api
41+
addresses:
42+
- ip_netmask: {{ lookup('vars', networks_lower['storage'] ~ '_ip') }}/{{ lookup('vars', networks_lower['storage'] ~ '_cidr') }}
43+
edpm_service_types:
44+
- bootstrap
45+
- download-cache
46+
- reboot-os
47+
- configure-ovs-dpdk
48+
- configure-network
49+
- validate-network
50+
- install-os
51+
- configure-os
52+
- ssh-known-hosts
53+
- run-os
54+
- install-certs
55+
- ovn
56+
- neutron-ovn
57+
- neutron-metadata
58+
- neutron-sriov
59+
- libvirt
60+
- nova
61+
- telemetry
62+
- telemetry-power-monitoring
63+
edpm_services:
64+
- bootstrap
65+
- download-cache
66+
- reboot-os
67+
- configure-ovs-dpdk
68+
- configure-network
69+
- validate-network
70+
- install-os
71+
- configure-os
72+
- ssh-known-hosts
73+
- run-os
74+
- install-certs
75+
- ovn
76+
- neutron-ovn-igmp
77+
- neutron-metadata
78+
- neutron-sriov
79+
- libvirt
80+
- nova
81+
- telemetry
82+
- telemetry-power-monitoring
83+
edpm_tls_certs_enabled: true
84+
hosts:
85+
edpm-compute-0:
86+
ansible_host: edpm-compute-0
87+
canonical_hostname: edpm-compute-0.ctlplane.example.com
88+
ctlplane_cidr: 24
89+
ctlplane_dns_nameservers:
90+
- 192.168.122.80
91+
ctlplane_gateway_ip: 192.168.122.1
92+
ctlplane_host_routes:
93+
- destination: 0.0.0.0/0
94+
nexthop: 192.168.122.1
95+
ctlplane_ip: 192.168.122.100
96+
ctlplane_mtu: 1500
97+
dns_search_domains:
98+
- ctlplane.example.com
99+
- internalapi.example.com
100+
- storage.example.com
101+
- tenant.example.com
102+
internalapi_cidr: 24
103+
internalapi_gateway_ip: null
104+
internalapi_host_routes: []
105+
internalapi_ip: 172.17.0.100
106+
internalapi_mtu: 1496
107+
internalapi_vlan_id: 52
108+
storage_cidr: 24
109+
storage_gateway_ip: null
110+
storage_host_routes: []
111+
storage_ip: 172.18.0.100
112+
storage_mtu: 1496
113+
storage_vlan_id: 53
114+
tenant_cidr: 24
115+
tenant_gateway_ip: null
116+
tenant_host_routes: []
117+
tenant_ip: 172.19.0.100
118+
tenant_mtu: 1496
119+
tenant_vlan_id: 54

tests/kuttl/suites/metricstorage/tests/01-assert.yaml

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -95,19 +95,6 @@ metadata:
9595
spec:
9696
scrapeInterval: 30s
9797
---
98-
apiVersion: monitoring.rhobs/v1alpha1
99-
kind: ScrapeConfig
100-
metadata:
101-
labels:
102-
service: metricStorage
103-
name: telemetry-kepler
104-
ownerReferences:
105-
- kind: MetricStorage
106-
name: telemetry-kuttl
107-
spec:
108-
staticConfigs:
109-
- {}
110-
---
11198
apiVersion: observability.openshift.io/v1alpha1
11299
kind: UIPlugin
113100
metadata:
@@ -159,9 +146,3 @@ kind: ConfigMap
159146
metadata:
160147
name: grafana-dashboard-openstack-rabbitmq
161148
namespace: openshift-config-managed
162-
---
163-
apiVersion: v1
164-
kind: ConfigMap
165-
metadata:
166-
name: grafana-dashboard-openstack-kepler
167-
namespace: openshift-config-managed

tests/kuttl/suites/metricstorage/tests/02-errors.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,3 @@ kind: ConfigMap
3636
metadata:
3737
name: grafana-dashboard-openstack-rabbitmq
3838
namespace: openshift-config-managed
39-
---
40-
apiVersion: v1
41-
kind: ConfigMap
42-
metadata:
43-
name: grafana-dashboard-openstack-kepler
44-
namespace: openshift-config-managed

tests/kuttl/suites/metricstorage/tests/04-assert.yaml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,3 @@ metadata:
106106
name: telemetry-kuttl
107107
spec:
108108
scrapeInterval: 40s
109-
---
110-
apiVersion: monitoring.rhobs/v1alpha1
111-
kind: ScrapeConfig
112-
metadata:
113-
labels:
114-
service: metricStorage
115-
name: telemetry-kepler
116-
ownerReferences:
117-
- kind: MetricStorage
118-
name: telemetry-kuttl
119-
spec:
120-
staticConfigs:
121-
- {}

0 commit comments

Comments
 (0)