Skip to content

Commit bdad82d

Browse files
committed
Introduce disk/percent_used metric, fixes #822
The Stackdriver exports the `/guest/disk/percent_used` metric to the `custom.googleapis.com` namespace as the reserved one `compute.googleapis.com` cannot be used at this stage. This was tested within GCP Container-Optimized OS with the following: ``` /mnt/disks/scratch/node-problem-detector --enable-k8s-exporter=false --config.system-stats-monitor=/etc/node_problem_detector/system-stats-monitor.json --config.system-log-monitor=/etc/node_problem_detector/kernel-monitor.json --config.custom-plugin-monitor=/etc/node_problem_detector/boot-disk-size-consistency-monitor.json --exporter.stackdriver=/etc/node_problem_detector/stackdriver-exporter.json ``` The `/mnt/disks/scratch/` directory was mounted specifically to get execution permissions: ``` sudo mount -t tmpfs tmpfs /mnt/disks/scratch/ ```
1 parent 34b265a commit bdad82d

File tree

6 files changed

+20
-0
lines changed

6 files changed

+20
-0
lines changed

config/system-stats-monitor.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
"disk/bytes_used": {
4545
"displayName": "disk/bytes_used"
4646
},
47+
"disk/percent_used": {
48+
"displayName": "disk/percent_used"
49+
},
4750
"disk/io_time": {
4851
"displayName": "disk/io_time"
4952
},

pkg/exporters/stackdriver/stackdriver_exporter.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ var NPDMetricToSDMetric = map[metrics.MetricID]string{
5454
metrics.CPULoad15m: "compute.googleapis.com/guest/cpu/load_15m",
5555
metrics.DiskAvgQueueLenID: "compute.googleapis.com/guest/disk/queue_length",
5656
metrics.DiskBytesUsedID: "compute.googleapis.com/guest/disk/bytes_used",
57+
metrics.DiskPercentUsedID: "custom.googleapis.com/guest/disk/percent_used",
5758
metrics.DiskIOTimeID: "compute.googleapis.com/guest/disk/io_time",
5859
metrics.DiskMergedOpsCountID: "compute.googleapis.com/guest/disk/merged_operation_count",
5960
metrics.DiskOpsBytesID: "compute.googleapis.com/guest/disk/operation_bytes_count",

pkg/systemstatsmonitor/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ Below metrics are collected from `disk` component:
4848
* `disk_operation_bytes_count`: # of Bytes used for reads/writes on this device
4949
* `disk_operation_time`: [# of milliseconds spent reading/writing][iostat doc]
5050
* `disk_bytes_used`: Disk usage in Bytes. The usage state is reported under the `state` metric label (e.g. `used`, `free`). Summing values of all states yields the disk size.
51+
* `disk_percent_used`: Disk utilization percentage. The usage state is reported under the `state` metric label (e.g. `used`, `free`). The utilization is between 0.0 and 100.0.
5152
FSType and MountOptions are also reported as additional information.
5253

5354
The name of the disk block device is reported in the `device_name` metric label (e.g. `sda`).

pkg/systemstatsmonitor/disk_collector.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type diskCollector struct {
3838
mOpsBytes *metrics.Int64Metric
3939
mOpsTime *metrics.Int64Metric
4040
mBytesUsed *metrics.Int64Metric
41+
mPercentUsed *metrics.Float64Metric
4142

4243
config *ssmtypes.DiskStatsConfig
4344

@@ -149,6 +150,16 @@ func NewDiskCollectorOrDie(diskConfig *ssmtypes.DiskStatsConfig) *diskCollector
149150
if err != nil {
150151
klog.Fatalf("Error initializing metric for %q: %v", metrics.DiskBytesUsedID, err)
151152
}
153+
dc.mPercentUsed, err = metrics.NewFloat64Metric(
154+
metrics.DiskPercentUsedID,
155+
diskConfig.MetricsConfigs[string(metrics.DiskPercentUsedID)].DisplayName,
156+
"Disk utilization percentage",
157+
"%",
158+
metrics.LastValue,
159+
[]string{deviceNameLabel, fsTypeLabel, mountOptionLabel, stateLabel})
160+
if err != nil {
161+
klog.Fatalf("Error initializing metric for %q: %v", metrics.DiskPercentUsedID, err)
162+
}
152163

153164
dc.lastIOTime = make(map[string]uint64)
154165
dc.lastWeightedIO = make(map[string]uint64)
@@ -291,6 +302,8 @@ func (dc *diskCollector) collect() {
291302
opttypes := strings.Join(partition.Opts, ",")
292303
dc.mBytesUsed.Record(map[string]string{deviceNameLabel: deviceName, fsTypeLabel: fstype, mountOptionLabel: opttypes, stateLabel: "free"}, int64(usageStat.Free))
293304
dc.mBytesUsed.Record(map[string]string{deviceNameLabel: deviceName, fsTypeLabel: fstype, mountOptionLabel: opttypes, stateLabel: "used"}, int64(usageStat.Used))
305+
dc.mPercentUsed.Record(map[string]string{deviceNameLabel: deviceName, fsTypeLabel: fstype, mountOptionLabel: opttypes, stateLabel: "free"}, float64(100 - usageStat.UsedPercent))
306+
dc.mPercentUsed.Record(map[string]string{deviceNameLabel: deviceName, fsTypeLabel: fstype, mountOptionLabel: opttypes, stateLabel: "used"}, float64(usageStat.UsedPercent))
294307
}
295308

296309
}

pkg/util/metrics/metric.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ const (
3535
DiskOpsBytesID MetricID = "disk/operation_bytes_count"
3636
DiskOpsTimeID MetricID = "disk/operation_time"
3737
DiskBytesUsedID MetricID = "disk/bytes_used"
38+
DiskPercentUsedID MetricID = "disk/percent_used"
3839
HostUptimeID MetricID = "host/uptime"
3940
MemoryBytesUsedID MetricID = "memory/bytes_used"
4041
MemoryAnonymousUsedID MetricID = "memory/anonymous_used"

test/e2e/metriconly/metrics_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ var _ = ginkgo.Describe("NPD should export Prometheus metrics.", func() {
8585
assertMetricExist(gotMetrics, "disk_operation_bytes_count", map[string]string{}, false)
8686
assertMetricExist(gotMetrics, "disk_operation_time", map[string]string{}, false)
8787
assertMetricExist(gotMetrics, "disk_bytes_used", map[string]string{}, false)
88+
assertMetricExist(gotMetrics, "disk_percent_used", map[string]string{}, false)
8889
assertMetricExist(gotMetrics, "disk_io_time", map[string]string{}, false)
8990
assertMetricExist(gotMetrics, "disk_weighted_io", map[string]string{}, false)
9091
assertMetricExist(gotMetrics, "memory_bytes_used", map[string]string{}, false)

0 commit comments

Comments
 (0)