Skip to content

Commit 6aa308d

Browse files
authored
Merge pull request #334 from xueweiz/cumulative
Metric format fixes on host/uptime and disk/*
2 parents 424b864 + 82c2368 commit 6aa308d

File tree

2 files changed

+23
-14
lines changed

2 files changed

+23
-14
lines changed

pkg/systemstatsmonitor/disk_collector.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import (
2929
"k8s.io/node-problem-detector/pkg/util/metrics"
3030
)
3131

32+
const deviceNameLabel = "device_name"
33+
3234
type diskCollector struct {
3335
mIOTime *metrics.Int64Metric
3436
mWeightedIO *metrics.Int64Metric
@@ -44,22 +46,25 @@ func NewDiskCollectorOrDie(diskConfig *ssmtypes.DiskStatsConfig) *diskCollector
4446
dc := diskCollector{config: diskConfig}
4547

4648
var err error
49+
50+
// Use metrics.Sum aggregation method to ensure the metric is a counter/cumulative metric.
4751
dc.mIOTime, err = metrics.NewInt64Metric(
4852
diskConfig.MetricsConfigs["disk/io_time"].DisplayName,
4953
"The IO time spent on the disk",
5054
"second",
51-
metrics.LastValue,
52-
[]string{"device"})
55+
metrics.Sum,
56+
[]string{deviceNameLabel})
5357
if err != nil {
5458
glog.Fatalf("Error initializing metric for disk/io_time: %v", err)
5559
}
5660

61+
// Use metrics.Sum aggregation method to ensure the metric is a counter/cumulative metric.
5762
dc.mWeightedIO, err = metrics.NewInt64Metric(
5863
diskConfig.MetricsConfigs["disk/weighted_io"].DisplayName,
5964
"The weighted IO on the disk",
6065
"second",
61-
metrics.LastValue,
62-
[]string{"device"})
66+
metrics.Sum,
67+
[]string{deviceNameLabel})
6368
if err != nil {
6469
glog.Fatalf("Error initializing metric for disk/weighted_io: %v", err)
6570
}
@@ -69,7 +74,7 @@ func NewDiskCollectorOrDie(diskConfig *ssmtypes.DiskStatsConfig) *diskCollector
6974
"The average queue length on the disk",
7075
"second",
7176
metrics.LastValue,
72-
[]string{"device"})
77+
[]string{deviceNameLabel})
7378
if err != nil {
7479
glog.Fatalf("Error initializing metric for disk/avg_queue_len: %v", err)
7580
}
@@ -112,13 +117,13 @@ func (dc *diskCollector) collect() {
112117
avgQueueLen = float64(ioCountersStat.WeightedIO-lastWeightedIO) / float64(ioCountersStat.IoTime-lastIOTime)
113118
}
114119

115-
// Attach label {"device": deviceName} to the metrics.
116-
tags := map[string]string{"device": deviceName}
120+
// Attach label {"device_name": deviceName} to the metrics.
121+
tags := map[string]string{deviceNameLabel: deviceName}
117122
if dc.mIOTime != nil {
118-
dc.mIOTime.Record(tags, int64(ioCountersStat.IoTime))
123+
dc.mIOTime.Record(tags, int64(ioCountersStat.IoTime-lastIOTime))
119124
}
120125
if dc.mWeightedIO != nil {
121-
dc.mWeightedIO.Record(tags, int64(ioCountersStat.WeightedIO))
126+
dc.mWeightedIO.Record(tags, int64(ioCountersStat.WeightedIO-lastWeightedIO))
122127
}
123128
if dc.mAvgQueueLen != nil {
124129
dc.mAvgQueueLen.Record(tags, avgQueueLen)

pkg/systemstatsmonitor/host_collector.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ import (
2626
)
2727

2828
type hostCollector struct {
29-
tags map[string]string
30-
uptime *metrics.Int64Metric
29+
tags map[string]string
30+
uptime *metrics.Int64Metric
31+
lastUptime int64
3132
}
3233

3334
func NewHostCollectorOrDie(hostConfig *ssmtypes.HostStatsConfig) *hostCollector {
34-
hc := hostCollector{map[string]string{}, nil}
35+
hc := hostCollector{map[string]string{}, nil, 0}
3536

3637
kernelVersion, err := host.KernelVersion()
3738
if err != nil {
@@ -45,12 +46,13 @@ func NewHostCollectorOrDie(hostConfig *ssmtypes.HostStatsConfig) *hostCollector
4546
}
4647
hc.tags["os_version"] = osVersion
4748

49+
// Use metrics.Sum aggregation method to ensure the metric is a counter/cumulative metric.
4850
if hostConfig.MetricsConfigs["host/uptime"].DisplayName != "" {
4951
hc.uptime, err = metrics.NewInt64Metric(
5052
hostConfig.MetricsConfigs["host/uptime"].DisplayName,
5153
"The uptime of the operating system",
5254
"second",
53-
metrics.LastValue,
55+
metrics.Sum,
5456
[]string{"kernel_version", "os_version"})
5557
if err != nil {
5658
glog.Fatalf("Error initializing metric for host/uptime: %v", err)
@@ -70,8 +72,10 @@ func (hc *hostCollector) collect() {
7072
glog.Errorf("Failed to retrieve uptime of the host: %v", err)
7173
return
7274
}
75+
uptimeSeconds := int64(uptime)
7376

7477
if hc.uptime != nil {
75-
hc.uptime.Record(hc.tags, int64(uptime))
78+
hc.uptime.Record(hc.tags, uptimeSeconds-hc.lastUptime)
7679
}
80+
hc.lastUptime = uptimeSeconds
7781
}

0 commit comments

Comments
 (0)