Skip to content

Commit 51f4a89

Browse files
authored
[receiver/vcenter] Adds vSAN metrics for Virtual Machines (#34082)
**Description:** <Describe what has changed.> Adds a set of vSAN metrics for Virtual Machines. ``` vcenter.vm.vsan.throughput (direction={read/write}) vcenter.vm.vsan.iops (direction={read/write}) vcenter.vm.vsan.latency.avg (direction={read/write}) ``` **Link to tracking Issue:** <Issue number if applicable> #33556 **Testing:** <Describe what testing was performed and which tests were added.> Unit tests added for scraper. Could not add client tests as `govmomi` vSAN simulator currently not implemented. Tested against live environment. **Documentation:** <Describe the documentation added.> New documentation generated
1 parent 1311344 commit 51f4a89

File tree

21 files changed

+1142
-19
lines changed

21 files changed

+1142
-19
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'enhancement'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: 'vcenterreceiver'
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Adds a number of default disabled vSAN metrics for Virtual Machines.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [33556]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dist/
2626
# Miscellaneous files
2727
*.sw[op]
2828
*.DS_Store
29+
__debug_bin*
2930

3031
# Coverage
3132
coverage/*

cmd/otelcontribcol/go.sum

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

receiver/vcenterreceiver/client.go

Lines changed: 263 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,45 @@ import (
77
"context"
88
"errors"
99
"fmt"
10+
"maps"
1011
"net/url"
12+
"reflect"
13+
"strconv"
14+
"strings"
15+
"time"
1116

1217
"github.com/vmware/govmomi"
1318
"github.com/vmware/govmomi/find"
1419
"github.com/vmware/govmomi/object"
1520
"github.com/vmware/govmomi/performance"
16-
"github.com/vmware/govmomi/property"
1721
"github.com/vmware/govmomi/view"
1822
"github.com/vmware/govmomi/vim25"
1923
"github.com/vmware/govmomi/vim25/mo"
24+
"github.com/vmware/govmomi/vim25/soap"
2025
vt "github.com/vmware/govmomi/vim25/types"
26+
"github.com/vmware/govmomi/vsan"
27+
"github.com/vmware/govmomi/vsan/types"
28+
"go.uber.org/zap"
2129
)
2230

2331
// vcenterClient is a client that collects data from a vCenter endpoint.
2432
type vcenterClient struct {
25-
moClient *govmomi.Client
26-
vimDriver *vim25.Client
27-
finder *find.Finder
28-
pc *property.Collector
29-
pm *performance.Manager
30-
vm *view.Manager
31-
cfg *Config
33+
logger *zap.Logger
34+
moClient *govmomi.Client
35+
vimDriver *vim25.Client
36+
vsanDriver *vsan.Client
37+
finder *find.Finder
38+
pm *performance.Manager
39+
vm *view.Manager
40+
cfg *Config
3241
}
3342

3443
var newVcenterClient = defaultNewVcenterClient
3544

36-
func defaultNewVcenterClient(c *Config) *vcenterClient {
45+
func defaultNewVcenterClient(l *zap.Logger, c *Config) *vcenterClient {
3746
return &vcenterClient{
38-
cfg: c,
47+
logger: l,
48+
cfg: c,
3949
}
4050
}
4151

@@ -70,10 +80,15 @@ func (vc *vcenterClient) EnsureConnection(ctx context.Context) error {
7080
}
7181
vc.moClient = client
7282
vc.vimDriver = client.Client
73-
vc.pc = property.DefaultCollector(vc.vimDriver)
7483
vc.finder = find.NewFinder(vc.vimDriver)
7584
vc.pm = performance.NewManager(vc.vimDriver)
7685
vc.vm = view.NewManager(vc.vimDriver)
86+
vsanDriver, err := vsan.NewClient(ctx, vc.vimDriver)
87+
if err != nil {
88+
vc.logger.Info(fmt.Errorf("could not create VSAN client: %w", err).Error())
89+
} else {
90+
vc.vsanDriver = vsanDriver
91+
}
7792
return nil
7893
}
7994

@@ -320,3 +335,240 @@ func (vc *vcenterClient) PerfMetricsQuery(
320335
resultsByRef: resultsByRef,
321336
}, nil
322337
}
338+
339+
// VSANQueryResults contains all returned vSAN metric related data
340+
type VSANQueryResults struct {
341+
// Contains vSAN metric data keyed by UUID string
342+
MetricResultsByUUID map[string]*VSANMetricResults
343+
}
344+
345+
// VSANMetricResults contains vSAN metric related data for a single resource
346+
type VSANMetricResults struct {
347+
// Contains UUID info for related resource
348+
UUID string
349+
// Contains returned metric value info for all metrics
350+
MetricDetails []*VSANMetricDetails
351+
}
352+
353+
// VSANMetricDetails contains vSAN metric data for a single metric
354+
type VSANMetricDetails struct {
355+
// Contains the metric label
356+
MetricLabel string
357+
// Contains the metric interval in seconds
358+
Interval int32
359+
// Contains timestamps for all metric values
360+
Timestamps []*time.Time
361+
// Contains all values for vSAN metric label
362+
Values []int64
363+
}
364+
365+
// vSANQueryType represents the type of VSAN query
366+
type vSANQueryType string
367+
368+
const (
369+
VSANQueryTypeVirtualMachines vSANQueryType = "virtual-machine:*"
370+
)
371+
372+
// getLabelsForQueryType returns the appropriate labels for each query type
373+
func (vc *vcenterClient) getLabelsForQueryType(queryType vSANQueryType) []string {
374+
switch queryType {
375+
case VSANQueryTypeVirtualMachines:
376+
return []string{
377+
"iopsRead", "iopsWrite", "throughputRead", "throughputWrite",
378+
"latencyRead", "latencyWrite",
379+
}
380+
default:
381+
return []string{}
382+
}
383+
}
384+
385+
// VSANVirtualMachines returns back virtual machine vSAN performance metrics
386+
func (vc *vcenterClient) VSANVirtualMachines(
387+
ctx context.Context,
388+
clusterRefs []*vt.ManagedObjectReference,
389+
) (*VSANQueryResults, error) {
390+
results, err := vc.vSANQuery(ctx, VSANQueryTypeVirtualMachines, clusterRefs)
391+
err = vc.handleVSANError(err, VSANQueryTypeVirtualMachines)
392+
return results, err
393+
}
394+
395+
// vSANQuery performs a vSAN query for the specified type across all clusters
396+
func (vc *vcenterClient) vSANQuery(
397+
ctx context.Context,
398+
queryType vSANQueryType,
399+
clusterRefs []*vt.ManagedObjectReference,
400+
) (*VSANQueryResults, error) {
401+
allResults := VSANQueryResults{
402+
MetricResultsByUUID: map[string]*VSANMetricResults{},
403+
}
404+
405+
for _, clusterRef := range clusterRefs {
406+
results, err := vc.vSANQueryByCluster(ctx, queryType, clusterRef)
407+
if err != nil {
408+
return &allResults, err
409+
}
410+
411+
maps.Copy(allResults.MetricResultsByUUID, results.MetricResultsByUUID)
412+
}
413+
414+
return &allResults, nil
415+
}
416+
417+
// vSANQueryByCluster performs a vSAN query for the specified type for one cluster
418+
func (vc *vcenterClient) vSANQueryByCluster(
419+
ctx context.Context,
420+
queryType vSANQueryType,
421+
clusterRef *vt.ManagedObjectReference,
422+
) (*VSANQueryResults, error) {
423+
queryResults := VSANQueryResults{
424+
MetricResultsByUUID: map[string]*VSANMetricResults{},
425+
}
426+
// Not all vCenters support vSAN so just return an empty result
427+
if vc.vsanDriver == nil {
428+
return &queryResults, nil
429+
}
430+
431+
now := time.Now()
432+
querySpec := []types.VsanPerfQuerySpec{
433+
{
434+
EntityRefId: string(queryType),
435+
StartTime: &now,
436+
EndTime: &now,
437+
Labels: vc.getLabelsForQueryType(queryType),
438+
},
439+
}
440+
rawResults, err := vc.vsanDriver.VsanPerfQueryPerf(ctx, clusterRef, querySpec)
441+
if err != nil {
442+
return nil, fmt.Errorf("problem retrieving %s vSAN metrics for cluster %s: %w", queryType, clusterRef.Value, err)
443+
}
444+
445+
queryResults.MetricResultsByUUID = map[string]*VSANMetricResults{}
446+
for _, rawResult := range rawResults {
447+
metricResults, err := vc.convertVSANResultToMetricResults(rawResult)
448+
if err != nil && metricResults != nil {
449+
return &queryResults, fmt.Errorf("problem processing %s [%s] vSAN metrics for cluster %s: %w", queryType, metricResults.UUID, clusterRef.Value, err)
450+
}
451+
if err != nil {
452+
return &queryResults, fmt.Errorf("problem processing %s vSAN metrics for cluster %s: %w", queryType, clusterRef.Value, err)
453+
}
454+
455+
queryResults.MetricResultsByUUID[metricResults.UUID] = metricResults
456+
}
457+
return &queryResults, nil
458+
}
459+
460+
func (vc *vcenterClient) handleVSANError(
461+
err error,
462+
queryType vSANQueryType,
463+
) error {
464+
faultErr := errors.Unwrap(err)
465+
if faultErr == nil {
466+
return err
467+
}
468+
if !soap.IsSoapFault(faultErr) {
469+
return err
470+
}
471+
472+
fault := soap.ToSoapFault(faultErr)
473+
msg := fault.String
474+
475+
if fault.Detail.Fault != nil {
476+
msg = reflect.TypeOf(fault.Detail.Fault).Name()
477+
}
478+
switch msg {
479+
case "NotSupported":
480+
vc.logger.Debug(fmt.Sprintf("%s vSAN metrics not supported: %s", queryType, err.Error()))
481+
return nil
482+
case "NotFound":
483+
vc.logger.Debug(fmt.Sprintf("no %s vSAN metrics found: %s", queryType, err.Error()))
484+
return nil
485+
default:
486+
return err
487+
}
488+
}
489+
490+
func (vc *vcenterClient) convertVSANResultToMetricResults(vSANResult types.VsanPerfEntityMetricCSV) (*VSANMetricResults, error) {
491+
uuid, err := vc.uuidFromEntityRefID(vSANResult.EntityRefId)
492+
if err != nil {
493+
return nil, err
494+
}
495+
496+
metricResults := VSANMetricResults{
497+
UUID: uuid,
498+
MetricDetails: []*VSANMetricDetails{},
499+
}
500+
501+
// Parse all timestamps
502+
localZone, _ := time.Now().Local().Zone()
503+
timeStrings := strings.Split(vSANResult.SampleInfo, ",")
504+
timestamps := []time.Time{}
505+
for _, timeString := range timeStrings {
506+
// Assuming the collector is making the request in the same time zone as the localized response
507+
// from the vSAN API. Not a great assumption, but otherwise it will almost definitely be wrong
508+
// if we assume that it is UTC. There is precedent for this method at least.
509+
timestamp, err := time.Parse("2006-01-02 15:04:05 MST", fmt.Sprintf("%s %s", timeString, localZone))
510+
if err != nil {
511+
return &metricResults, fmt.Errorf("problem parsing timestamp from %s: %w", timeString, err)
512+
}
513+
514+
timestamps = append(timestamps, timestamp)
515+
}
516+
517+
// Parse all metrics
518+
for _, vSANValue := range vSANResult.Value {
519+
metricDetails, err := vc.convertVSANValueToMetricDetails(vSANValue, timestamps)
520+
if err != nil {
521+
return &metricResults, err
522+
}
523+
524+
metricResults.MetricDetails = append(metricResults.MetricDetails, metricDetails)
525+
}
526+
return &metricResults, nil
527+
}
528+
529+
func (vc *vcenterClient) convertVSANValueToMetricDetails(
530+
vSANValue types.VsanPerfMetricSeriesCSV,
531+
timestamps []time.Time,
532+
) (*VSANMetricDetails, error) {
533+
metricLabel := vSANValue.MetricId.Label
534+
metricInterval := vSANValue.MetricId.MetricsCollectInterval
535+
// If not found assume the interval is 5m
536+
if metricInterval == 0 {
537+
vc.logger.Warn(fmt.Sprintf("no interval found for vSAN metric [%s] so assuming 5m", metricLabel))
538+
metricInterval = 300
539+
}
540+
metricDetails := VSANMetricDetails{
541+
MetricLabel: metricLabel,
542+
Interval: metricInterval,
543+
Timestamps: []*time.Time{},
544+
Values: []int64{},
545+
}
546+
valueStrings := strings.Split(vSANValue.Values, ",")
547+
if len(valueStrings) != len(timestamps) {
548+
return nil, fmt.Errorf("number of timestamps [%d] doesn't match number of values [%d] for metric %s", len(timestamps), len(valueStrings), metricLabel)
549+
}
550+
551+
// Match up timestamps with metric values
552+
for i, valueString := range valueStrings {
553+
value, err := strconv.ParseInt(valueString, 10, 64)
554+
if err != nil {
555+
return nil, fmt.Errorf("problem converting value [%s] for metric %s", valueString, metricLabel)
556+
}
557+
558+
metricDetails.Timestamps = append(metricDetails.Timestamps, &timestamps[i])
559+
metricDetails.Values = append(metricDetails.Values, value)
560+
}
561+
562+
return &metricDetails, nil
563+
}
564+
565+
// uuidFromEntityRefID returns the UUID portion of the EntityRefId
566+
func (vc *vcenterClient) uuidFromEntityRefID(id string) (string, error) {
567+
colonIndex := strings.Index(id, ":")
568+
if colonIndex != -1 {
569+
uuid := id[colonIndex+1:]
570+
return uuid, nil
571+
}
572+
573+
return "", fmt.Errorf("no ':' found in EntityRefId [%s] to parse UUID", id)
574+
}

0 commit comments

Comments
 (0)