Skip to content
This repository was archived by the owner on Dec 1, 2018. It is now read-only.

Commit 3ffb7af

Browse files
committed
add disk io metrics
1 parent 6c245e7 commit 3ffb7af

File tree

4 files changed

+183
-24
lines changed

4 files changed

+183
-24
lines changed

docs/storage-schema.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ Heapster exports the following metrics to its backends.
1717
| filesystem/available | The number of available bytes remaining in a the filesystem |
1818
| filesystem/inodes | The number of available inodes in a the filesystem |
1919
| filesystem/inodes_free | The number of free inodes remaining in a the filesystem |
20+
| disk/io_read_bytes | Number of bytes read from a disk partition |
21+
| disk/io_write_bytes | Number of bytes written to a disk partition |
22+
| disk/io_read_bytes_rate | Number of bytes read from a disk partition per second |
23+
| disk/io_write_bytes_rate | Number of bytes written to a disk partition per second |
2024
| memory/limit | Memory hard limit in bytes. |
2125
| memory/major_page_faults | Number of major page faults. |
2226
| memory/major_page_faults_rate | Number of major page faults per second. |
@@ -62,7 +66,7 @@ Heapster tags each metric with the following labels.
6266
| labels | Comma-separated(Default) list of user-provided labels. Format is 'key:value' |
6367
| namespace_id | UID of the namespace of a Pod |
6468
| namespace_name | User-provided name of a Namespace |
65-
| resource_id | A unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage |
69+
| resource_id | A unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage, disk device name under disk/io_read_bytes |
6670
| make | Make of the accelerator (nvidia, amd, google etc.) |
6771
| model | Model of the accelerator (tesla-p100, tesla-k80 etc.) |
6872
| accelerator_id | ID of the accelerator |

integration/heapster_api_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,11 @@ func runMetricExportTest(fm kubeFramework, svc *kube_v1.Service) error {
359359
core.MetricFilesystemInodesFree.Name: {core.LabelResourceID.Key},
360360
core.MetricAcceleratorMemoryTotal.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key},
361361
core.MetricAcceleratorMemoryUsed.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key},
362-
core.MetricAcceleratorDutyCycle.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key}}
362+
core.MetricAcceleratorDutyCycle.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key},
363+
core.MetricDiskIORead.Name: {core.LabelResourceID.Key},
364+
core.MetricDiskIOReadRate.Name: {core.LabelResourceID.Key},
365+
core.MetricDiskIOWrite.Name: {core.LabelResourceID.Key},
366+
core.MetricDiskIOWriteRate.Name: {core.LabelResourceID.Key}}
363367

364368
for metricName, points := range ts.Metrics {
365369
md, exists := mdMap[metricName]

metrics/core/metrics.go

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package core
1616

1717
import (
18+
"fmt"
1819
"time"
1920

2021
cadvisor "github.com/google/cadvisor/info/v1"
@@ -54,7 +55,9 @@ var RateMetrics = []Metric{
5455
MetricNetworkRxRate,
5556
MetricNetworkRxErrorsRate,
5657
MetricNetworkTxRate,
57-
MetricNetworkTxErrorsRate}
58+
MetricNetworkTxErrorsRate,
59+
MetricDiskIOReadRate,
60+
MetricDiskIOWriteRate}
5861

5962
var RateMetricsMapping = map[string]Metric{
6063
MetricCpuUsage.MetricDescriptor.Name: MetricCpuUsageRate,
@@ -63,9 +66,15 @@ var RateMetricsMapping = map[string]Metric{
6366
MetricNetworkRx.MetricDescriptor.Name: MetricNetworkRxRate,
6467
MetricNetworkRxErrors.MetricDescriptor.Name: MetricNetworkRxErrorsRate,
6568
MetricNetworkTx.MetricDescriptor.Name: MetricNetworkTxRate,
66-
MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate}
69+
MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate,
70+
MetricDiskIORead.MetricDescriptor.Name: MetricDiskIOReadRate,
71+
MetricDiskIOWrite.MetricDescriptor.Name: MetricDiskIOWriteRate}
6772

6873
var LabeledMetrics = []Metric{
74+
MetricDiskIORead,
75+
MetricDiskIOReadRate,
76+
MetricDiskIOWrite,
77+
MetricDiskIOWriteRate,
6978
MetricFilesystemUsage,
7079
MetricFilesystemLimit,
7180
MetricFilesystemAvailable,
@@ -885,6 +894,110 @@ var MetricAcceleratorDutyCycle = Metric{
885894
},
886895
}
887896

897+
var MetricDiskIORead = Metric{
898+
MetricDescriptor: MetricDescriptor{
899+
Name: "disk/io_read_bytes",
900+
Description: "Cumulative number of bytes read over disk",
901+
Type: MetricCumulative,
902+
ValueType: ValueInt64,
903+
Units: UnitsBytes,
904+
Labels: metricLabels,
905+
},
906+
HasLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) bool {
907+
return spec.HasDiskIo
908+
},
909+
GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric {
910+
result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes))
911+
for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes {
912+
resourceIDKey := ioServiceBytesPerPartition.Device
913+
if resourceIDKey == "" {
914+
resourceIDKey = fmt.Sprintf("%d:%d", ioServiceBytesPerPartition.Major, ioServiceBytesPerPartition.Minor)
915+
}
916+
917+
var value uint64
918+
if v, exists := ioServiceBytesPerPartition.Stats["Read"]; exists {
919+
value = v
920+
}
921+
922+
result = append(result, LabeledMetric{
923+
Name: "disk/io_read_bytes",
924+
Labels: map[string]string{
925+
LabelResourceID.Key: resourceIDKey,
926+
},
927+
MetricValue: MetricValue{
928+
ValueType: ValueInt64,
929+
MetricType: MetricGauge,
930+
IntValue: int64(value),
931+
},
932+
})
933+
}
934+
return result
935+
},
936+
}
937+
938+
var MetricDiskIOWrite = Metric{
939+
MetricDescriptor: MetricDescriptor{
940+
Name: "disk/io_write_bytes",
941+
Description: "Cumulative number of bytes write over disk",
942+
Type: MetricCumulative,
943+
ValueType: ValueInt64,
944+
Units: UnitsBytes,
945+
Labels: metricLabels,
946+
},
947+
HasLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) bool {
948+
return spec.HasDiskIo
949+
},
950+
GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric {
951+
result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes))
952+
for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes {
953+
resourceIDKey := ioServiceBytesPerPartition.Device
954+
if resourceIDKey == "" {
955+
resourceIDKey = fmt.Sprintf("%d:%d", ioServiceBytesPerPartition.Major, ioServiceBytesPerPartition.Minor)
956+
}
957+
958+
var value uint64
959+
if v, exists := ioServiceBytesPerPartition.Stats["Write"]; exists {
960+
value = v
961+
}
962+
963+
result = append(result, LabeledMetric{
964+
Name: "disk/io_write_bytes",
965+
Labels: map[string]string{
966+
LabelResourceID.Key: resourceIDKey,
967+
},
968+
MetricValue: MetricValue{
969+
ValueType: ValueInt64,
970+
MetricType: MetricGauge,
971+
IntValue: int64(value),
972+
},
973+
})
974+
}
975+
return result
976+
},
977+
}
978+
979+
var MetricDiskIOReadRate = Metric{
980+
MetricDescriptor: MetricDescriptor{
981+
Name: "disk/io_read_bytes_rate",
982+
Description: "Rate of bytes read over disk in bytes per second",
983+
Type: MetricGauge,
984+
ValueType: ValueFloat,
985+
Units: UnitsCount,
986+
Labels: metricLabels,
987+
},
988+
}
989+
990+
var MetricDiskIOWriteRate = Metric{
991+
MetricDescriptor: MetricDescriptor{
992+
Name: "disk/io_write_bytes_rate",
993+
Description: "Rate of bytes written over disk in bytes per second",
994+
Type: MetricGauge,
995+
ValueType: ValueFloat,
996+
Units: UnitsCount,
997+
Labels: metricLabels,
998+
},
999+
}
1000+
8881001
func IsNodeAutoscalingMetric(name string) bool {
8891002
for _, autoscalingMetric := range NodeAutoscalingMetrics {
8901003
if autoscalingMetric.MetricDescriptor.Name == name {

metrics/processors/rate_calculator.go

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,31 +57,69 @@ func (this *RateCalculator) Process(batch *core.DataBatch) (*core.DataBatch, err
5757
continue
5858
}
5959

60+
var metricValNew, metricValOld core.MetricValue
61+
var foundNew, foundOld bool
62+
6063
for metricName, targetMetric := range this.rateMetricsMapping {
61-
metricValNew, foundNew := newMs.MetricValues[metricName]
62-
metricValOld, foundOld := oldMs.MetricValues[metricName]
63-
if foundNew && foundOld && metricName == core.MetricCpuUsage.MetricDescriptor.Name {
64-
// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
65-
newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) /
66-
(newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano())
67-
68-
newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
69-
ValueType: core.ValueInt64,
70-
MetricType: core.MetricGauge,
71-
IntValue: newVal,
64+
if metricName == core.MetricDiskIORead.MetricDescriptor.Name || metricName == core.MetricDiskIOWrite.MetricDescriptor.Name {
65+
for _, itemNew := range newMs.LabeledMetrics {
66+
foundNew, foundOld = false, false
67+
if itemNew.Name == metricName {
68+
metricValNew, foundNew = itemNew.MetricValue, true
69+
for _, itemOld := range oldMs.LabeledMetrics {
70+
if itemOld.Name == metricName {
71+
metricValOld, foundOld = itemOld.MetricValue, true
72+
break
73+
}
74+
}
75+
}
76+
77+
if foundNew && foundOld {
78+
if targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
79+
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
80+
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())
81+
82+
newMs.LabeledMetrics = append(newMs.LabeledMetrics, core.LabeledMetric{
83+
Name: targetMetric.MetricDescriptor.Name,
84+
Labels: itemNew.Labels,
85+
MetricValue: core.MetricValue{
86+
ValueType: core.ValueFloat,
87+
MetricType: core.MetricGauge,
88+
FloatValue: newVal,
89+
},
90+
})
91+
}
92+
} else if foundNew && !foundOld || !foundNew && foundOld {
93+
glog.V(4).Infof("Skipping rates for %s in %s: metric not found in one of old (%v) or new (%v)", metricName, key, foundOld, foundNew)
94+
}
7295
}
96+
} else {
97+
metricValNew, foundNew = newMs.MetricValues[metricName]
98+
metricValOld, foundOld = oldMs.MetricValues[metricName]
99+
100+
if foundNew && foundOld && metricName == core.MetricCpuUsage.MetricDescriptor.Name {
101+
// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
102+
newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) /
103+
(newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano())
104+
105+
newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
106+
ValueType: core.ValueInt64,
107+
MetricType: core.MetricGauge,
108+
IntValue: newVal,
109+
}
73110

74-
} else if foundNew && foundOld && targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
75-
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
76-
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())
111+
} else if foundNew && foundOld && targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
112+
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
113+
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())
77114

78-
newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
79-
ValueType: core.ValueFloat,
80-
MetricType: core.MetricGauge,
81-
FloatValue: newVal,
115+
newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
116+
ValueType: core.ValueFloat,
117+
MetricType: core.MetricGauge,
118+
FloatValue: newVal,
119+
}
120+
} else if foundNew && !foundOld || !foundNew && foundOld {
121+
glog.V(4).Infof("Skipping rates for %s in %s: metric not found in one of old (%v) or new (%v)", metricName, key, foundOld, foundNew)
82122
}
83-
} else if foundNew && !foundOld || !foundNew && foundOld {
84-
glog.V(4).Infof("Skipping rates for %s in %s: metric not found in one of old (%v) or new (%v)", metricName, key, foundOld, foundNew)
85123
}
86124
}
87125
}

0 commit comments

Comments
 (0)