diff --git a/cli/dpservice-exporter/dashboard/grafana.json b/cli/dpservice-exporter/dashboard/grafana.json index 7b878fbb8..6470ca356 100644 --- a/cli/dpservice-exporter/dashboard/grafana.json +++ b/cli/dpservice-exporter/dashboard/grafana.json @@ -24,10 +24,23 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 40, + "id": 43, "links": [], "liveNow": false, "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 18, + "panels": [], + "title": "Heap Info", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -92,7 +105,7 @@ "h": 9, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { @@ -187,7 +200,7 @@ "h": 9, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 5, "options": { @@ -282,7 +295,7 @@ "h": 9, "w": 12, "x": 0, - "y": 9 + "y": 10 }, "id": 6, "options": { @@ -377,7 +390,7 @@ "h": 9, "w": 12, "x": 12, - "y": 9 + "y": 10 }, "id": 7, "options": { @@ -472,7 +485,7 @@ "h": 9, "w": 12, "x": 0, - "y": 18 + "y": 19 }, "id": 8, "options": { @@ -567,7 +580,7 @@ "h": 9, "w": 12, "x": 12, - "y": 18 + "y": 19 }, "id": 9, "options": { @@ -598,6 +611,135 @@ "title": "DP-Service Free Count", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 17, + "panels": [], + "title": "DPDK Ethdev statistics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "DOWN" + }, + "1": { + "index": 1, + "text": "UP" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bool_on_off" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "exemplar": false, + "expr": "dpdk_ethdev_link_status{cluster=~\"$cluster\", pod=~\"$pod\", name=~\"$ethdev\"}", + "legendFormat": "{{cluster}} {{pod}} {{name}}", + "range": true, + "refId": "A" + } + ], + "title": "DPDK Ethdev PF Link Status", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -641,6 +783,7 @@ } }, "mappings": [], + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ @@ -658,12 +801,212 @@ "overrides": [] }, "gridPos": { - "h": 10, + "h": 8, "w": 24, "x": 0, - "y": 27 + "y": 37 }, - "id": 4, + "id": 13, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(dpdk_ethdev_errors_total{cluster=~\"$cluster\", pod=~\"$pod\", name=~\"$ethdev\"}[2m]) > 0", + "legendFormat": "{{cluster}} {{pod}} {{name}} {{stat}}", + "range": true, + "refId": "A" + } + ], + "title": "DPDK Ethdev Errors per Second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(dpdk_ethdev_bytes_total{cluster=~\"$cluster\", pod=~\"$pod\", name=~\"$ethdev\"}[2m])", + "legendFormat": "{{cluster}} {{pod}} {{name}} {{stat}}", + "range": true, + "refId": "A" + } + ], + "title": "DPDK Ethdev Bytes per Second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 14, "options": { "legend": { "calcs": [ @@ -687,15 +1030,29 @@ "uid": "" }, "editorMode": "code", - "expr": "sort(dpdk_interface_stat{stat_name=\"nat_used_port_count\", cluster=~\"$cluster\", pod=~\"$pod\", interface=~\"$interface\"})", - "legendFormat": "{{cluster}} {{pod}} {{interface}}", + "exemplar": false, + "expr": "rate(dpdk_ethdev_packets_total{cluster=~\"$cluster\", pod=~\"$pod\", name=~\"$ethdev\"}[2m])", + "legendFormat": "{{cluster}} {{pod}} {{name}} {{stat}}", "range": true, "refId": "A" } ], - "title": "DP-Service NAT Used Ports", + "title": "DPDK Ethdev Packets per Second", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 16, + "panels": [], + "title": "DP-Service statisics", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -759,9 +1116,9 @@ "h": 10, "w": 24, "x": 0, - "y": 37 + "y": 62 }, - "id": 3, + "id": 4, "options": { "legend": { "calcs": [ @@ -785,13 +1142,13 @@ "uid": "" }, "editorMode": "code", - "expr": "dpdk_interface_stat{stat_name=\"virtsvc_used_port_count\", cluster=~\"$cluster\", pod=~\"$pod\"}", - "legendFormat": "{{cluster}} {{pod}} {{interface}}", + "expr": "sort(dps_nat_used_ports_count{cluster=~\"$cluster\", pod=~\"$pod\", interface_id=~\"$interface\"})", + "legendFormat": "{{cluster}} {{pod}} {{interface_id}}", "range": true, "refId": "A" } ], - "title": "DP-Service VirtualService Used NAT Ports", + "title": "DP-Service NAT Used Ports per Interface", "type": "timeseries" }, { @@ -857,7 +1214,7 @@ "h": 10, "w": 24, "x": 0, - "y": 47 + "y": 72 }, "id": 11, "options": { @@ -883,8 +1240,8 @@ "uid": "" }, "editorMode": "code", - "expr": "dpdk_interface_stat{stat_name=\"firewall_rule_count\", cluster=~\"$cluster\", pod=~\"$pod\", interface=~\"$interface\"}", - "legendFormat": "{{cluster}} {{pod}} iface: {{interface}}", + "expr": "dps_firewall_rules_count{cluster=~\"$cluster\", pod=~\"$pod\", interface_id=~\"$interface\"}", + "legendFormat": "{{cluster}} {{pod}} {{interface_id}}", "range": true, "refId": "A" } @@ -949,38 +1306,113 @@ ] } }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "prometheus/lab1-vm3-prometheus dp-service-j2ht9 conntrack_table_entries" - ], - "prefix": "All except:", - "readOnly": true - } + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 82 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "expr": "dps_virtsvc_used_ports_count{cluster=~\"$cluster\", pod=~\"$pod\"}", + "legendFormat": "{{cluster}} {{pod}} {{address}}", + "range": true, + "refId": "A" + } + ], + "title": "DP-Service VirtualService Used NAT Ports", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "properties": [ + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } ] - } - ] + }, + "unit": "percent" + }, + "overrides": [] }, "gridPos": { "h": 10, "w": 24, "x": 0, - "y": 57 + "y": 92 }, "id": 10, "options": { @@ -1006,8 +1438,8 @@ "uid": "" }, "editorMode": "code", - "expr": "hash_table_saturation{table_name=~\"$hash_table\", cluster=~\"$cluster\", pod=~\"$pod\"}", - "legendFormat": "{{cluster}} {{pod}} {{table_name}}_{{stat_name}}", + "expr": "100 * dps_hash_table_saturation{table=~\"$hash_table\", cluster=~\"$cluster\", pod=~\"$pod\", stat=\"entries\"} / ignoring(stat) dps_hash_table_saturation{table=~\"$hash_table\", cluster=~\"$cluster\", pod=~\"$pod\", stat=\"capacity\"}", + "legendFormat": "{{cluster}} {{pod}} {{table}}", "range": true, "refId": "A" } @@ -1097,16 +1529,16 @@ "type": "prometheus", "uid": "" }, - "definition": "label_values(hash_table_saturation,table_name)", + "definition": "label_values(dpdk_ethdev_packets_total,name)", "hide": 0, "includeAll": true, "label": "", "multi": true, - "name": "hash_table", + "name": "ethdev", "options": [], "query": { "qryType": 1, - "query": "label_values(hash_table_saturation,table_name)", + "query": "label_values(dpdk_ethdev_packets_total,name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, @@ -1129,7 +1561,7 @@ "type": "prometheus", "uid": "" }, - "definition": "label_values(dpdk_interface_stat,interface)", + "definition": "label_values(dps_firewall_rules_count,interface_id)", "hide": 0, "includeAll": true, "label": "", @@ -1138,7 +1570,39 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(dpdk_interface_stat,interface)", + "query": "label_values(dps_firewall_rules_count,interface_id)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "" + }, + "definition": "label_values(dps_hash_table_saturation{table!~\"virtsvc_table_0|virtsvc_table_1|virtsvc_table_2|virtsvc_table_3|virtsvc_table_4|virtsvc_table_5|virtsvc_table_6|virtsvc_table_7\"},table)", + "hide": 0, + "includeAll": true, + "label": "", + "multi": true, + "name": "hash_table", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(dps_hash_table_saturation{table!~\"virtsvc_table_0|virtsvc_table_1|virtsvc_table_2|virtsvc_table_3|virtsvc_table_4|virtsvc_table_5|virtsvc_table_6|virtsvc_table_7\"},table)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, @@ -1157,6 +1621,6 @@ "timezone": "", "title": "DP-Service", "uid": "", - "version": 1, + "version": 2, "weekStart": "" - } +} diff --git a/cli/dpservice-exporter/main.go b/cli/dpservice-exporter/main.go index e7475b3d3..d16fd0ca1 100644 --- a/cli/dpservice-exporter/main.go +++ b/cli/dpservice-exporter/main.go @@ -12,6 +12,7 @@ import ( "net/netip" "os" "os/user" + "reflect" "strconv" "time" @@ -72,10 +73,7 @@ func main() { } r := prometheus.NewRegistry() - r.MustRegister(metrics.InterfaceStat) - r.MustRegister(metrics.CallCount) - r.MustRegister(metrics.HeapInfo) - r.MustRegister(metrics.HashTableSaturation) + registerAllMetrics(r, log) http.Handle("/metrics", promhttp.HandlerFor(r, promhttp.HandlerOpts{})) @@ -202,3 +200,28 @@ func periodicMetricsUpdate(log *logrus.Logger, exitChan chan struct{}) { time.Sleep(time.Duration(pollIntervalFlag) * time.Second) } } + +func registerAllMetrics(r *prometheus.Registry, log *logrus.Logger) { + metricsList := []prometheus.Collector{ + metrics.DpdkEthdevErrors, + metrics.DpdkEthdevPackets, + metrics.DpdkEthdevBytes, + metrics.DpdkEthdevMisc, + metrics.DpdkEthdevLinkStatus, + metrics.DpdkHeapInfo, + metrics.DpserviceUsedNatPortsCount, + metrics.DpserviceFwRulesCount, + metrics.DpserviceVirtsvcUsedPortsCount, + metrics.DpserviceCallCount, + metrics.DpserviceHashTableSaturation, + } + + for _, metric := range metricsList { + v := reflect.ValueOf(metric) + if v.Kind() == reflect.Ptr && v.Elem().CanInterface() { + r.MustRegister(v.Elem().Interface().(prometheus.Collector)) + } else { + log.Errorf("Invalid metric type: %T", metric) + } + } +} diff --git a/cli/dpservice-exporter/metrics/metrics.go b/cli/dpservice-exporter/metrics/metrics.go index f70106210..099b6a4af 100644 --- a/cli/dpservice-exporter/metrics/metrics.go +++ b/cli/dpservice-exporter/metrics/metrics.go @@ -50,19 +50,19 @@ func Update(conn net.Conn, hostname string, log *logrus.Logger) error { var ealHeapList EalHeapList err := queryTelemetry(conn, log, "/eal/heap_list", &ealHeapList) if err != nil { - return err + return fmt.Errorf("failed to query eal heap list: %v", err) } for _, id := range ealHeapList.Value { var ealHeapInfo EalHeapInfo err = queryTelemetry(conn, log, fmt.Sprintf("/eal/heap_info,%d", id), &ealHeapInfo) if err != nil { - return err + return fmt.Errorf("failed to query eal heap info: %v", err) } for key, value := range ealHeapInfo.Value { // Only export metrics of type float64 (/eal/heap_info contains also some string values) if v, ok := value.(float64); ok { - HeapInfo.With(prometheus.Labels{"node_name": hostname, "info": key}).Set(v) + DpdkHeapInfo.With(prometheus.Labels{"node": hostname, "info": key}).Set(v) } } } @@ -76,64 +76,91 @@ func Update(conn net.Conn, hostname string, log *logrus.Logger) error { var ethdevInfo EthdevInfo err = queryTelemetry(conn, log, fmt.Sprintf("/ethdev/info,%d", id), ðdevInfo) if err != nil { - return err + return fmt.Errorf("failed to query ethdev info: %v", err) + } + // set link status only for PF interfaces + // if interface name doesn't contain "representor" it is PF interface + if !strings.Contains(ethdevInfo.Value.Name, "representor") { + var ethdevLinkStatus EthdevLinkStatus + err = queryTelemetry(conn, log, fmt.Sprintf("/ethdev/link_status,%d", id), ðdevLinkStatus) + if err != nil { + return fmt.Errorf("failed to query ethdev link status: %v", err) + } + var linkStatus float64 + if strings.ToLower(ethdevLinkStatus.Value.Status) == "up" { + linkStatus = float64(1) + } else if strings.ToLower(ethdevLinkStatus.Value.Status) == "down" { + linkStatus = float64(0) + } else { + // if there is problem getting the link status skip this update + continue + } + DpdkEthdevLinkStatus.With(prometheus.Labels{"name": ethdevInfo.Value.Name}).Set(linkStatus) } var ethdevXstats EthdevXstats err = queryTelemetry(conn, log, fmt.Sprintf("/ethdev/xstats,%d", id), ðdevXstats) if err != nil { - return err + return fmt.Errorf("failed to query ethdev xstats: %v", err) } for statName, statValueFloat := range ethdevXstats.Value { - InterfaceStat.With(prometheus.Labels{"interface": ethdevInfo.Value.Name, "stat_name": statName}).Set(statValueFloat) + if strings.Contains(statName, "bytes") { + DpdkEthdevBytes.With(prometheus.Labels{"name": ethdevInfo.Value.Name, "stat": statName}).Set(statValueFloat) + } else if strings.Contains(statName, "packets") { + DpdkEthdevPackets.With(prometheus.Labels{"name": ethdevInfo.Value.Name, "stat": statName}).Set(statValueFloat) + } else if strings.Contains(statName, "errors") { + DpdkEthdevErrors.With(prometheus.Labels{"name": ethdevInfo.Value.Name, "stat": statName}).Set(statValueFloat) + } else { + DpdkEthdevMisc.With(prometheus.Labels{"name": ethdevInfo.Value.Name, "stat": statName}).Set(statValueFloat) + } } } - var dpserviceNatPort DpServiceNatPort - err = queryTelemetry(conn, log, "/dp_service/nat/used_port_count", &dpserviceNatPort) + var dpserviceNatPortCount DpServiceNatPortCount + err = queryTelemetry(conn, log, "/dp_service/nat/used_port_count", &dpserviceNatPortCount) if err != nil { - return err + return fmt.Errorf("failed to query used nat port count: %v", err) } - for ifName, portCount := range dpserviceNatPort.Value { - InterfaceStat.With(prometheus.Labels{"interface": ifName, "stat_name": "nat_used_port_count"}).Set(float64(portCount)) + for ifaceName, portCount := range dpserviceNatPortCount.Value { + DpserviceUsedNatPortsCount.With(prometheus.Labels{"interface_id": ifaceName}).Set(float64(portCount)) } - var dpserviceVirtsvcPort DpServiceVirtsvcPort - err = queryTelemetry(conn, log, "/dp_service/virtsvc/used_port_count", &dpserviceVirtsvcPort) + var dpserviceVirtsvcPortCount DpServiceVirtsvcPortCount + err = queryTelemetry(conn, log, "/dp_service/virtsvc/used_port_count", &dpserviceVirtsvcPortCount) if err != nil { - return err + return fmt.Errorf("failed to query used virtsvc port count: %v", err) } - for ifName, portCount := range dpserviceVirtsvcPort.Value { - InterfaceStat.With(prometheus.Labels{"interface": ifName, "stat_name": "virtsvc_used_port_count"}).Set(float64(portCount)) + for virtsvc, portCount := range dpserviceVirtsvcPortCount.Value { + DpserviceVirtsvcUsedPortsCount.With(prometheus.Labels{"address": virtsvc}).Set(float64(portCount)) } var dpserviceFirewallRuleCount DpServiceFirewallRuleCount err = queryTelemetry(conn, log, "/dp_service/firewall/rule_count", &dpserviceFirewallRuleCount) if err != nil { - return err + return fmt.Errorf("failed to query firewall rule count: %v", err) } - for ifName, fwRuleCount := range dpserviceFirewallRuleCount.Value { - InterfaceStat.With(prometheus.Labels{"interface": ifName, "stat_name": "firewall_rule_count"}).Set(float64(fwRuleCount)) + for ifaceName, fwRuleCount := range dpserviceFirewallRuleCount.Value { + DpserviceFwRulesCount.With(prometheus.Labels{"interface_id": ifaceName}).Set(float64(fwRuleCount)) } var dpserviceCallCount DpServiceGraphCallCount err = queryTelemetry(conn, log, "/dp_service/graph/call_count", &dpserviceCallCount) if err != nil { - return err + return fmt.Errorf("failed to query call count: %v", err) } for graphNodeName, callCount := range dpserviceCallCount.GraphCallCnt.Node_0_to_255 { - CallCount.With(prometheus.Labels{"node_name": hostname, "graph_node": graphNodeName}).Set(callCount) + DpserviceCallCount.With(prometheus.Labels{"node": hostname, "graph_node": graphNodeName}).Set(callCount) } var dpServiceHashTableSaturation DpServiceHashTableSaturation err = queryTelemetry(conn, log, "/dp_service/table/saturation", &dpServiceHashTableSaturation) if err != nil { - return err + return fmt.Errorf("failed to query table saturation: %v", err) } for table, saturation := range dpServiceHashTableSaturation.Value { - HashTableSaturation.With(prometheus.Labels{"table_name": table, "stat_name": "capacity"}).Set(saturation.Capacity) - HashTableSaturation.With(prometheus.Labels{"table_name": table, "stat_name": "entries"}).Set(saturation.Entries) + DpserviceHashTableSaturation.With(prometheus.Labels{"table": table, "stat": "capacity"}).Set(saturation.Capacity) + DpserviceHashTableSaturation.With(prometheus.Labels{"table": table, "stat": "entries"}).Set(saturation.Entries) } return nil } diff --git a/cli/dpservice-exporter/metrics/types.go b/cli/dpservice-exporter/metrics/types.go index 4b8333800..a8e93814f 100644 --- a/cli/dpservice-exporter/metrics/types.go +++ b/cli/dpservice-exporter/metrics/types.go @@ -6,36 +6,92 @@ package metrics import "github.com/prometheus/client_golang/prometheus" var ( - InterfaceStat = prometheus.NewGaugeVec( + DpdkEthdevErrors = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "dpdk_interface_stat", - Help: "Dp-Service interface statistic", + Name: "dpdk_ethdev_errors_total", + Help: "DPDK total ethdev errors", }, - []string{"interface", "stat_name"}, + []string{"name", "stat"}, ) - CallCount = prometheus.NewGaugeVec( + DpdkEthdevPackets = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "dpdk_graph_stat", - Help: "Dp-Service graph statistics", + Name: "dpdk_ethdev_packets_total", + Help: "DPDK total ethdev packets", }, - []string{"node_name", "graph_node"}, + []string{"name", "stat"}, ) - HeapInfo = prometheus.NewGaugeVec( + DpdkEthdevBytes = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dpdk_ethdev_bytes_total", + Help: "DPDK total ethdev bytes", + }, + []string{"name", "stat"}, + ) + + DpdkEthdevMisc = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dpdk_ethdev_misc", + Help: "Other DPDK ethdev statistics", + }, + []string{"name", "stat"}, + ) + + DpdkEthdevLinkStatus = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dpdk_ethdev_link_status", + Help: "Link status of DPDK ethdev", + }, + []string{"name"}, + ) + + DpdkHeapInfo = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "dpdk_heap_info", - Help: "Dp-Service heap info", + Help: "Dpservice heap info", + }, + []string{"node", "info"}, + ) + + DpserviceUsedNatPortsCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dps_nat_used_ports_count", + Help: "Count of used NAT ports on interface", }, - []string{"node_name", "info"}, + []string{"interface_id"}, ) - HashTableSaturation = prometheus.NewGaugeVec( + DpserviceFwRulesCount = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "hash_table_saturation", - Help: "Dp-Service hash table saturation", + Name: "dps_firewall_rules_count", + Help: "Count of firewall rules on interface", }, - []string{"table_name", "stat_name"}, + []string{"interface_id"}, + ) + + DpserviceVirtsvcUsedPortsCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dps_virtsvc_used_ports_count", + Help: "Count of used virtual service ports", + }, + []string{"address"}, + ) + + DpserviceCallCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dps_graph_call_count", + Help: "Dpservice graph statistics", + }, + []string{"node", "graph_node"}, + ) + + DpserviceHashTableSaturation = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "dps_hash_table_saturation", + Help: "Dpservice hash table saturation", + }, + []string{"table", "stat"}, ) ) @@ -49,15 +105,23 @@ type EthdevInfo struct { } `json:"/ethdev/info"` } +type EthdevLinkStatus struct { + Value struct { + Duplex string `json:"duplex,omitempty"` + Speed int `json:"speed,omitempty"` + Status string `json:"status,omitempty"` + } `json:"/ethdev/link_status"` +} + type EthdevXstats struct { Value map[string]float64 `json:"/ethdev/xstats"` } -type DpServiceNatPort struct { +type DpServiceNatPortCount struct { Value map[string]int `json:"/dp_service/nat/used_port_count"` } -type DpServiceVirtsvcPort struct { +type DpServiceVirtsvcPortCount struct { Value map[string]int `json:"/dp_service/virtsvc/used_port_count"` } diff --git a/test/local/test_telemetry.py b/test/local/test_telemetry.py index 0c88e5185..cc615452b 100644 --- a/test/local/test_telemetry.py +++ b/test/local/test_telemetry.py @@ -19,11 +19,10 @@ 'tx-0', 'tx-1', 'tx-2', 'tx-3', 'tx-4', 'tx-5', ) HEAP_INFO = ( 'Heap_id', 'Heap_size', 'Alloc_count', 'Free_count', 'Alloc_size', 'Free_size', 'Greatest_free_size' ) -IFACE_STATS = ( - 'rx_q0_errors', 'rx_q0_bytes', 'tx_q0_bytes', 'rx_q0_packets', 'tx_q0_packets', - 'rx_good_bytes', 'tx_good_bytes', 'rx_good_packets', 'tx_good_packets', - 'rx_errors', 'tx_errors', 'rx_missed_errors', 'rx_mbuf_allocation_errors', - 'nat_used_port_count', 'firewall_rule_count', +ETHDEV_STATS = ( + 'rx_good_bytes', 'rx_q0_bytes', 'tx_good_bytes', 'tx_q0_bytes', + 'rx_errors', 'rx_mbuf_allocation_errors', 'rx_missed_errors', 'rx_q0_errors', 'tx_errors', + 'rx_good_packets', 'rx_q0_packets', 'tx_good_packets', 'tx_q0_packets', ) HW_IFACE_STATS = ( 'rx_broadcast_bytes', 'rx_broadcast_packets', 'tx_broadcast_bytes', 'tx_broadcast_packets', @@ -125,35 +124,43 @@ def test_telemetry_fwall(prepare_ifaces, grpc_client): def test_telemetry_exporter(request, prepare_ifaces, start_exporter): metrics = urlopen(f"http://localhost:{exporter_port}/metrics").read().decode('utf-8') - graph_stats, heap_info, interface_stats, htable_saturation = set(), set(), set(), set() + graph_stats, heap_info, ethdev_stats, htable_saturation = set(), set(), set(), set() for metric in metrics.splitlines(): - if metric.startswith('dpdk_graph_stat'): + if metric.startswith('dps_graph_call_count'): graph_stats.add(metric.split('"')[1]) elif metric.startswith('dpdk_heap_info'): heap_info.add(metric.split('"')[1]) - elif metric.startswith('dpdk_interface_stat'): - interface_stats.add(metric.split('"')[3]) - elif metric.startswith('hash_table_saturation'): + elif metric.startswith(('dpdk_ethdev_errors_total', 'dpdk_ethdev_bytes_total', 'dpdk_ethdev_packets_total')): + ethdev_stats.add(metric.split('"')[3]) + elif metric.startswith('dps_hash_table_saturation'): htable_saturation.add(metric.split('"')[3]) + elif metric.startswith('dpdk_ethdev_link_status'): + linkStatus = metric.split(' ')[1] + assert linkStatus == '0' or linkStatus == '1', \ + "Link status must be 0 or 1" + # these metrics don't have any stat label, only checking if they are not empty + elif metric.startswith(('dpdk_ethdev_misc', 'dps_firewall_rules_count', 'dps_virtsvc_used_ports_count', 'dps_nat_used_ports_count')): + assert len(metric.split(' ')) > 1, \ + f"Empty exported metric '{metric.split('{')[0]}' found" else: assert metric.startswith("#"), \ f"Unknown exported metric '{metric.split('{')[0]}' found" # meson options (e.g. enable_virtual_services) are hard to do in these scripts, so just check manually graph_nodes = GRAPH_NODES - iface_stats = IFACE_STATS + iface_stats = ETHDEV_STATS if 'virtsvc' in graph_stats: graph_nodes += ('virtsvc',) if request.config.getoption("--hw"): iface_stats += HW_IFACE_STATS if PF1.tap == "pf1-tap": graph_nodes += ('tx-6',) - if 'rx_q1_bytes' in interface_stats: + if 'rx_q1_bytes' in ethdev_stats: iface_stats += HW_PF1_IFACE_STATS assert graph_stats == set(graph_nodes), \ "Unexpected graph telemetry in exporter output" assert heap_info == set(HEAP_INFO), \ "Unexpected heap info in exporter output" - assert interface_stats == set(iface_stats) or interface_stats == set(iface_stats + ('virtsvc_used_port_count',)), \ - "Unexpected interface statistics in exporter output" + assert ethdev_stats == set(iface_stats), \ + "Unexpected ethdev statistics in exporter output" assert htable_saturation == set(HASH_TABLES) or htable_saturation == set(HASH_TABLES + ('virtsvc_table_0', 'virtsvc_table_1')), \ "Unexpected hash table info in exporter output"