Skip to content

Commit cd34549

Browse files
authored
Add Nvme scrape configs for LIS CSI (#382)
1 parent 6ae2c2d commit cd34549

File tree

8 files changed

+475
-12
lines changed

8 files changed

+475
-12
lines changed

exporter/awsemfexporter/metric_translator.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ func (mt metricTranslator) translateOTelToGroupedMetric(rm pmetric.ResourceMetri
147147
strings.HasPrefix(serviceName.Str(), "containerInsightsDCGMExporterScraper") ||
148148
strings.HasPrefix(serviceName.Str(), "containerInsightsNeuronMonitorScraper") ||
149149
strings.HasPrefix(serviceName.Str(), "containerInsightsKueueMetricsScraper") ||
150-
strings.HasPrefix(serviceName.Str(), "containerInsightsNVMeEBSScraper") {
150+
strings.HasPrefix(serviceName.Str(), "containerInsightsNVMeEBSScraper") ||
151+
strings.HasPrefix(serviceName.Str(), "containerInsightsNVMeLISScraper") {
151152
// the prometheus metrics that come from the container insight receiver need to be clearly tagged as coming from container insights
152153
metricReceiver = containerInsightsReceiver
153154
}

exporter/awsemfexporter/metric_translator_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ func TestTranslateOtToGroupedMetric(t *testing.T) {
280280
kueueMetric.Resource().Attributes().PutStr(conventions.AttributeServiceName, "containerInsightsKueueMetricsScraper")
281281
nvmeMetric := createTestResourceMetricsHelper(defaultNumberOfTestMetrics + 1)
282282
nvmeMetric.Resource().Attributes().PutStr(conventions.AttributeServiceName, "containerInsightsNVMeEBSScraper")
283+
nvmeLisMetric := createTestResourceMetricsHelper(defaultNumberOfTestMetrics + 1)
284+
nvmeLisMetric.Resource().Attributes().PutStr(conventions.AttributeServiceName, "containerInsightsNVMeLISScraper")
283285

284286
counterSumMetrics := map[string]*metricInfo{
285287
"spanCounter": {
@@ -410,6 +412,19 @@ func TestTranslateOtToGroupedMetric(t *testing.T) {
410412
"myServiceNS/containerInsightsNVMeEBSScraper",
411413
containerInsightsReceiver,
412414
},
415+
{
416+
"nvme lis receiver",
417+
nvmeLisMetric,
418+
map[string]string{
419+
"isItAnError": "false",
420+
"spanName": "testSpan",
421+
},
422+
map[string]string{
423+
"spanName": "testSpan",
424+
},
425+
"myServiceNS/containerInsightsNVMeLISScraper",
426+
containerInsightsReceiver,
427+
},
413428
}
414429

415430
for _, tc := range testCases {

receiver/awscontainerinsightreceiver/internal/nvme/metric_unit.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,21 @@ const (
1919
ebsExceededEC2IOPSTime = "aws_ebs_csi_ec2_exceeded_iops_seconds_total"
2020
ebsExceededEC2TPTime = "aws_ebs_csi_ec2_exceeded_tp_seconds_total"
2121
ebsVolumeQueueLength = "aws_ebs_csi_volume_queue_length"
22+
23+
// LIS Original Metric Names
24+
lisReadOpsTotal = "aws_ec2_instance_store_csi_read_ops_total"
25+
lisWriteOpsTotal = "aws_ec2_instance_store_csi_write_ops_total"
26+
lisReadBytesTotal = "aws_ec2_instance_store_csi_read_bytes_total"
27+
lisWriteBytesTotal = "aws_ec2_instance_store_csi_write_bytes_total"
28+
lisReadTime = "aws_ec2_instance_store_csi_read_seconds_total"
29+
lisWriteTime = "aws_ec2_instance_store_csi_write_seconds_total"
30+
lisExceededIOPSTime = "aws_ec2_instance_store_csi_ec2_exceeded_iops_seconds_total"
31+
lisExceededTPTime = "aws_ec2_instance_store_csi_ec2_exceeded_tp_seconds_total"
32+
lisVolumeQueueLength = "aws_ec2_instance_store_csi_volume_queue_length"
2233
)
2334

2435
var MetricToUnit = map[string]string{
36+
// EBS metrics
2537
ebsReadOpsTotal: containerinsight.UnitCount,
2638
ebsWriteOpsTotal: containerinsight.UnitCount,
2739
ebsReadBytesTotal: containerinsight.UnitBytes,
@@ -33,4 +45,15 @@ var MetricToUnit = map[string]string{
3345
ebsExceededEC2IOPSTime: containerinsight.UnitSecond,
3446
ebsExceededEC2TPTime: containerinsight.UnitSecond,
3547
ebsVolumeQueueLength: containerinsight.UnitCount,
48+
49+
// LIS metrics
50+
lisReadOpsTotal: containerinsight.UnitCount,
51+
lisWriteOpsTotal: containerinsight.UnitCount,
52+
lisReadBytesTotal: containerinsight.UnitBytes,
53+
lisWriteBytesTotal: containerinsight.UnitBytes,
54+
lisReadTime: containerinsight.UnitSecond,
55+
lisWriteTime: containerinsight.UnitSecond,
56+
lisExceededIOPSTime: containerinsight.UnitSecond,
57+
lisExceededTPTime: containerinsight.UnitSecond,
58+
lisVolumeQueueLength: containerinsight.UnitCount,
3659
}

receiver/awscontainerinsightreceiver/internal/nvme/nvme_ebs_scraper_config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ type hostInfoProvider interface {
2929
GetInstanceType() string
3030
}
3131

32-
func GetScraperConfig(hostInfoProvider hostInfoProvider) *config.ScrapeConfig {
32+
func GetEbsScraperConfig(hostInfoProvider hostInfoProvider) *config.ScrapeConfig {
3333
return &config.ScrapeConfig{
3434
ScrapeInterval: model.Duration(collectionInterval),
3535
ScrapeTimeout: model.Duration(collectionInterval),

receiver/awscontainerinsightreceiver/internal/nvme/nvme_ebs_scraper_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ func TestNewNVMEScraperEndToEnd(t *testing.T) {
141141
Consumer: mConsumer,
142142
Host: componenttest.NewNopHost(),
143143
HostInfoProvider: mockHostInfoProvider{},
144-
ScraperConfigs: GetScraperConfig(mockHostInfoProvider{}),
144+
ScraperConfigs: GetEbsScraperConfig(mockHostInfoProvider{}),
145145
Logger: settings.Logger,
146146
})
147147
assert.NoError(t, err)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package nvme // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver/internal/nvme"
5+
6+
import (
7+
"os"
8+
"time"
9+
10+
"github.com/prometheus/common/model"
11+
"github.com/prometheus/prometheus/config"
12+
"github.com/prometheus/prometheus/discovery"
13+
"github.com/prometheus/prometheus/discovery/kubernetes"
14+
"github.com/prometheus/prometheus/model/relabel"
15+
16+
ci "github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight"
17+
)
18+
19+
const (
20+
lisCollectionInterval = 60 * time.Second
21+
lisJobName = "containerInsightsNVMeLISScraper"
22+
lisScraperMetricsPath = "/metrics"
23+
lisScraperK8sServiceSelector = "app=nvme-csi-plugin"
24+
lisNamespaceDiscoveryName = "kube-system"
25+
)
26+
27+
func GetLisScraperConfig(hostInfoProvider hostInfoProvider) *config.ScrapeConfig {
28+
return &config.ScrapeConfig{
29+
ScrapeInterval: model.Duration(lisCollectionInterval),
30+
ScrapeTimeout: model.Duration(lisCollectionInterval),
31+
ScrapeProtocols: config.DefaultScrapeProtocols,
32+
JobName: lisJobName,
33+
Scheme: "http",
34+
MetricsPath: lisScraperMetricsPath,
35+
ScrapeFallbackProtocol: config.PrometheusText0_0_4,
36+
ServiceDiscoveryConfigs: discovery.Configs{
37+
&kubernetes.SDConfig{
38+
Role: kubernetes.RoleService,
39+
NamespaceDiscovery: kubernetes.NamespaceDiscovery{
40+
Names: []string{lisNamespaceDiscoveryName},
41+
},
42+
Selectors: []kubernetes.SelectorConfig{
43+
{
44+
Role: kubernetes.RoleService,
45+
Label: lisScraperK8sServiceSelector,
46+
},
47+
},
48+
},
49+
},
50+
MetricRelabelConfigs: getLisMetricRelabelConfig(hostInfoProvider),
51+
}
52+
}
53+
54+
func getLisMetricRelabelConfig(hostInfoProvider hostInfoProvider) []*relabel.Config {
55+
return []*relabel.Config{
56+
{
57+
SourceLabels: model.LabelNames{"__name__"},
58+
Regex: relabel.MustNewRegexp("aws_ec2_instance_store_csi_.*"),
59+
Action: relabel.Keep,
60+
},
61+
62+
// Below metrics are histogram type which are not supported for container insights yet
63+
{
64+
SourceLabels: model.LabelNames{"__name__"},
65+
Regex: relabel.MustNewRegexp(".*_bucket|.*_sum|.*_count.*"),
66+
Action: relabel.Drop,
67+
},
68+
// Below metrics are NVMe data collection metrics which are not supported to maintain parity with EBS NVMe metrics
69+
{
70+
SourceLabels: model.LabelNames{"__name__"},
71+
Regex: relabel.MustNewRegexp(".*_nvme_collector_.*"),
72+
Action: relabel.Drop,
73+
},
74+
// Inject static values (clusterName/instanceId/nodeName/volumeID)
75+
{
76+
SourceLabels: model.LabelNames{"instance_id"},
77+
TargetLabel: ci.NodeNameKey,
78+
Regex: relabel.MustNewRegexp(".*"),
79+
Replacement: os.Getenv("HOST_NAME"),
80+
Action: relabel.Replace,
81+
},
82+
{
83+
SourceLabels: model.LabelNames{"instance_id"},
84+
TargetLabel: ci.ClusterNameKey,
85+
Regex: relabel.MustNewRegexp(".*"),
86+
Replacement: hostInfoProvider.GetClusterName(),
87+
Action: relabel.Replace,
88+
},
89+
{
90+
SourceLabels: model.LabelNames{"instance_id"},
91+
TargetLabel: ci.InstanceID,
92+
Regex: relabel.MustNewRegexp("(.*)"),
93+
Replacement: "${1}",
94+
Action: relabel.Replace,
95+
},
96+
{
97+
SourceLabels: model.LabelNames{"volume_id"},
98+
TargetLabel: ci.VolumeID,
99+
Regex: relabel.MustNewRegexp("(.*)"),
100+
Replacement: "${1}",
101+
Action: relabel.Replace,
102+
},
103+
}
104+
}

0 commit comments

Comments
 (0)