Skip to content

Commit 25eb470

Browse files
Merge pull request #518 from yadneshk/kepler_scrapeconfig
Create kepler scrape config only when power monitoring is enabled
2 parents ef2b09e + 44cd3f3 commit 25eb470

File tree

11 files changed

+334
-56
lines changed

11 files changed

+334
-56
lines changed

api/v1beta1/telemetry_consts.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ const (
3232
PauseBetweenWatchAttempts = time.Duration(60) * time.Second
3333
// DefaultKeplerPort -
3434
DefaultKeplerPort = 8888
35+
// Dataplane power monitoring service name
36+
TelemetryPowerMonitoring = "telemetry-power-monitoring"
3537
)
3638

3739
// PrometheusReplicas -

controllers/metricstorage_controller.go

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ func (r *MetricStorageReconciler) reconcileNormal(
439439
instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDefinitionReadyCondition, telemetryv1.DashboardsNotEnabledMessage)
440440
instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPluginReadyCondition, telemetryv1.DashboardsNotEnabledMessage)
441441
} else {
442-
if res, err := r.createDashboardObjects(ctx, instance, eventHandler); err != nil {
442+
if res, err := r.createDashboardObjects(ctx, instance, helper, eventHandler); err != nil {
443443
return res, err
444444
}
445445
}
@@ -601,7 +601,7 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
601601
return ctrl.Result{}, err
602602
}
603603

604-
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper)
604+
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper, telemetry.ServiceName)
605605
if err != nil {
606606
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
607607
}
@@ -624,18 +624,26 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
624624
return ctrl.Result{}, err
625625
}
626626

627+
connectionInfo, err = getComputeNodesConnectionInfo(instance, helper, telemetryv1.TelemetryPowerMonitoring)
628+
if err != nil {
629+
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
630+
}
631+
627632
// kepler scrape endpoints
628633
keplerEndpoints, _ := getKeplerTargets(connectionInfo)
629634
if err != nil {
630635
Log.Info(fmt.Sprintf("Cannot get Kepler targets. Scrape configs not created. Error: %s", err))
631636
}
632637

633-
// Kepler ScrapeConfig for non-tls nodes
634-
keplerServiceName := fmt.Sprintf("%s-kepler", telemetry.ServiceName)
635-
err = r.createServiceScrapeConfig(ctx, instance, Log, "Kepler",
636-
keplerServiceName, keplerEndpoints, false) // Currently Kepler doesn't support TLS so tlsEnabled is set to false
637-
if err != nil {
638-
return ctrl.Result{}, err
638+
// keplerEndpoint is reported as empty slice when telemetry-power-monitoring service is not enabled
639+
if len(keplerEndpoints) > 0 {
640+
// Kepler ScrapeConfig for non-tls nodes
641+
keplerServiceName := fmt.Sprintf("%s-kepler", telemetry.ServiceName)
642+
err = r.createServiceScrapeConfig(ctx, instance, Log, "Kepler",
643+
keplerServiceName, keplerEndpoints, false) // Currently Kepler doesn't support TLS so tlsEnabled is set to false
644+
if err != nil {
645+
return ctrl.Result{}, err
646+
}
639647
}
640648

641649
instance.Status.Conditions.MarkTrue(telemetryv1.ScrapeConfigReadyCondition, condition.ReadyMessage)
@@ -673,7 +681,7 @@ func getKeplerTargets(nodes []ConnectionInfo) ([]string, []string) {
673681
return tls, nonTLS
674682
}
675683

676-
func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, eventHandler handler.EventHandler) (ctrl.Result, error) {
684+
func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, helper *helper.Helper, eventHandler handler.EventHandler) (ctrl.Result, error) {
677685
Log := r.GetLogger(ctx)
678686
uiPluginObj := &obsui.UIPlugin{
679687
ObjectMeta: metav1.ObjectMeta{
@@ -766,7 +774,14 @@ func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, in
766774
"grafana-dashboard-openstack-node": dashboards.OpenstackNode(datasourceName),
767775
"grafana-dashboard-openstack-vm": dashboards.OpenstackVM(datasourceName),
768776
"grafana-dashboard-openstack-rabbitmq": dashboards.OpenstackRabbitmq(datasourceName),
769-
"grafana-dashboard-openstack-kepler": dashboards.OpenstackKepler(datasourceName),
777+
}
778+
779+
// atleast one nodeset must have "telemetry-power-monitoring" service enabled for kepler dashboard to be created
780+
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper, telemetryv1.TelemetryPowerMonitoring)
781+
if err != nil {
782+
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Power monitoring dashboard not created. Error: %s", err))
783+
} else if len(connectionInfo) > 0 {
784+
dashboardCMs["grafana-dashboard-openstack-kepler"] = dashboards.OpenstackKepler(datasourceName)
770785
}
771786

772787
for dashboardName, desiredCM := range dashboardCMs {
@@ -838,6 +853,7 @@ func (r *MetricStorageReconciler) ensureWatches(
838853
func getComputeNodesConnectionInfo(
839854
instance *telemetryv1.MetricStorage,
840855
helper *helper.Helper,
856+
telemetryServiceName string,
841857
) ([]ConnectionInfo, error) {
842858
ipSetList, err := getIPSetList(instance, helper)
843859
if err != nil {
@@ -855,24 +871,23 @@ func getComputeNodesConnectionInfo(
855871
return []ConnectionInfo{}, err
856872
}
857873
nodeSetGroup := inventory.Groups[secret.Labels["openstackdataplanenodeset"]]
858-
containsTelemetry := false
874+
containsTargetService := false
859875
for _, svc := range nodeSetGroup.Vars["edpm_services"].([]interface{}) {
860-
if svc.(string) == "telemetry" {
861-
containsTelemetry = true
876+
if svc.(string) == telemetryServiceName {
877+
containsTargetService = true
862878
}
863879
}
864-
if !containsTelemetry {
865-
// Telemetry isn't deployed on this nodeset
866-
// there is no reason to include these nodes
867-
// for scraping by prometheus
880+
if !containsTargetService {
881+
// If Telemetry|TelemetryPowerMonitoring isn't
882+
// deployed on this nodeset there is no reason
883+
// to include these nodes for scraping by prometheus
868884
continue
869885
}
870886
for name, item := range nodeSetGroup.Hosts {
871887
namespacedName := &types.NamespacedName{
872888
Name: name,
873889
Namespace: instance.GetNamespace(),
874890
}
875-
876891
if len(ipSetList.Items) > 0 {
877892
// if we have IPSets, lets go to search for the IPs there
878893
address, _ = getAddressFromIPSet(instance, &item, namespacedName, helper)

kuttl-test.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,17 @@ testDirs:
2828
- tests/kuttl/suites/tls/
2929
suppress:
3030
- events # Remove spammy event logs
31+
commands:
32+
- script: |
33+
if [ ! -f ansibleee-ssh-key-id_rsa ]; then
34+
ssh-keygen -f ansibleee-ssh-key-id_rsa -N "" -t rsa -b 4096
35+
fi
36+
oc create secret generic dataplane-ansible-ssh-private-key-secret \
37+
--save-config \
38+
--dry-run=client \
39+
--from-file=authorized_keys=ansibleee-ssh-key-id_rsa.pub \
40+
--from-file=ssh-privatekey=ansibleee-ssh-key-id_rsa \
41+
--from-file=ssh-publickey=ansibleee-ssh-key-id_rsa.pub \
42+
-n telemetry-kuttl-tests \
43+
-o yaml | \
44+
oc apply -f -
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: network.openstack.org/v1beta1
2+
kind: DNSMasq
3+
metadata:
4+
name: dnsmasq
5+
namespace: telemetry-kuttl-tests
6+
spec:
7+
replicas: 1
8+
options:
9+
- key: server
10+
values:
11+
- 192.168.122.1
12+
- key: local
13+
values:
14+
- '/example.com/'
15+
debug:
16+
service: false
17+
override:
18+
service:
19+
metadata:
20+
annotations:
21+
metallb.universe.tf/address-pool: ctlplane
22+
metallb.universe.tf/allow-shared-ip: ctlplane
23+
metallb.universe.tf/loadBalancerIPs: 192.168.122.80
24+
spec:
25+
type: ClusterIP
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
openstack-edpm-ipam:
2+
vars:
3+
edpm_network_config_template: |
4+
---
5+
{% set mtu_list = [ctlplane_mtu] %}
6+
{% for network in nodeset_networks %}
7+
{{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }}
8+
{%- endfor %}
9+
{% set min_viable_mtu = mtu_list | max %}
10+
network_config:
11+
- type: interface
12+
name: nic1
13+
use_dhcp: false
14+
- type: interface
15+
name: nic2
16+
use_dhcp: false
17+
addresses:
18+
- ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_cidr }}
19+
routes:
20+
- default: true
21+
next_hop: {{ ctlplane_gateway_ip }}
22+
23+
- type: linux_bond
24+
name: bond_api
25+
use_dhcp: false
26+
bonding_options: "mode=active-backup"
27+
dns_servers: {{ ctlplane_dns_nameservers }}
28+
members:
29+
- type: interface
30+
name: nic3
31+
32+
- type: vlan
33+
vlan_id: {{ lookup('vars', networks_lower['internalapi'] ~ '_vlan_id') }}
34+
device: bond_api
35+
addresses:
36+
- ip_netmask: {{ lookup('vars', networks_lower['internalapi'] ~ '_ip') }}/{{ lookup('vars', networks_lower['internalapi'] ~ '_cidr') }}
37+
38+
- type: vlan
39+
vlan_id: {{ lookup('vars', networks_lower['storage'] ~ '_vlan_id') }}
40+
device: bond_api
41+
addresses:
42+
- ip_netmask: {{ lookup('vars', networks_lower['storage'] ~ '_ip') }}/{{ lookup('vars', networks_lower['storage'] ~ '_cidr') }}
43+
edpm_service_types:
44+
- bootstrap
45+
- download-cache
46+
- reboot-os
47+
- configure-ovs-dpdk
48+
- configure-network
49+
- validate-network
50+
- install-os
51+
- configure-os
52+
- ssh-known-hosts
53+
- run-os
54+
- install-certs
55+
- ovn
56+
- neutron-ovn
57+
- neutron-metadata
58+
- neutron-sriov
59+
- libvirt
60+
- nova
61+
- telemetry
62+
- telemetry-power-monitoring
63+
edpm_services:
64+
- bootstrap
65+
- download-cache
66+
- reboot-os
67+
- configure-ovs-dpdk
68+
- configure-network
69+
- validate-network
70+
- install-os
71+
- configure-os
72+
- ssh-known-hosts
73+
- run-os
74+
- install-certs
75+
- ovn
76+
- neutron-ovn-igmp
77+
- neutron-metadata
78+
- neutron-sriov
79+
- libvirt
80+
- nova
81+
- telemetry
82+
- telemetry-power-monitoring
83+
edpm_tls_certs_enabled: true
84+
hosts:
85+
edpm-compute-0:
86+
ansible_host: edpm-compute-0
87+
canonical_hostname: edpm-compute-0.ctlplane.example.com
88+
ctlplane_cidr: 24
89+
ctlplane_dns_nameservers:
90+
- 192.168.122.80
91+
ctlplane_gateway_ip: 192.168.122.1
92+
ctlplane_host_routes:
93+
- destination: 0.0.0.0/0
94+
nexthop: 192.168.122.1
95+
ctlplane_ip: 192.168.122.100
96+
ctlplane_mtu: 1500
97+
dns_search_domains:
98+
- ctlplane.example.com
99+
- internalapi.example.com
100+
- storage.example.com
101+
- tenant.example.com
102+
internalapi_cidr: 24
103+
internalapi_gateway_ip: null
104+
internalapi_host_routes: []
105+
internalapi_ip: 172.17.0.100
106+
internalapi_mtu: 1496
107+
internalapi_vlan_id: 52
108+
storage_cidr: 24
109+
storage_gateway_ip: null
110+
storage_host_routes: []
111+
storage_ip: 172.18.0.100
112+
storage_mtu: 1496
113+
storage_vlan_id: 53
114+
tenant_cidr: 24
115+
tenant_gateway_ip: null
116+
tenant_host_routes: []
117+
tenant_ip: 172.19.0.100
118+
tenant_mtu: 1496
119+
tenant_vlan_id: 54

tests/kuttl/suites/metricstorage/tests/01-assert.yaml

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -95,19 +95,6 @@ metadata:
9595
spec:
9696
scrapeInterval: 30s
9797
---
98-
apiVersion: monitoring.rhobs/v1alpha1
99-
kind: ScrapeConfig
100-
metadata:
101-
labels:
102-
service: metricStorage
103-
name: telemetry-kepler
104-
ownerReferences:
105-
- kind: MetricStorage
106-
name: telemetry-kuttl
107-
spec:
108-
staticConfigs:
109-
- {}
110-
---
11198
apiVersion: observability.openshift.io/v1alpha1
11299
kind: UIPlugin
113100
metadata:
@@ -159,9 +146,3 @@ kind: ConfigMap
159146
metadata:
160147
name: grafana-dashboard-openstack-rabbitmq
161148
namespace: openshift-config-managed
162-
---
163-
apiVersion: v1
164-
kind: ConfigMap
165-
metadata:
166-
name: grafana-dashboard-openstack-kepler
167-
namespace: openshift-config-managed

tests/kuttl/suites/metricstorage/tests/02-errors.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,3 @@ kind: ConfigMap
3636
metadata:
3737
name: grafana-dashboard-openstack-rabbitmq
3838
namespace: openshift-config-managed
39-
---
40-
apiVersion: v1
41-
kind: ConfigMap
42-
metadata:
43-
name: grafana-dashboard-openstack-kepler
44-
namespace: openshift-config-managed

tests/kuttl/suites/metricstorage/tests/04-assert.yaml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,3 @@ metadata:
106106
name: telemetry-kuttl
107107
spec:
108108
scrapeInterval: 40s
109-
---
110-
apiVersion: monitoring.rhobs/v1alpha1
111-
kind: ScrapeConfig
112-
metadata:
113-
labels:
114-
service: metricStorage
115-
name: telemetry-kepler
116-
ownerReferences:
117-
- kind: MetricStorage
118-
name: telemetry-kuttl
119-
spec:
120-
staticConfigs:
121-
- {}

0 commit comments

Comments
 (0)