diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 1914288ef8..e0ef007519 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -1585,6 +1585,44 @@ node_md_blocks_synced{device="md6"} 1.6775552e+07 node_md_blocks_synced{device="md7"} 7.813735424e+09 node_md_blocks_synced{device="md8"} 1.6775552e+07 node_md_blocks_synced{device="md9"} 0 +# HELP node_md_blocks_synced_percent Percentage of blocks synced on device. +# TYPE node_md_blocks_synced_percent gauge +node_md_blocks_synced_percent{device="md0"} 0 +node_md_blocks_synced_percent{device="md00"} 0 +node_md_blocks_synced_percent{device="md10"} 0 +node_md_blocks_synced_percent{device="md101"} 0 +node_md_blocks_synced_percent{device="md11"} 0 +node_md_blocks_synced_percent{device="md12"} 0 +node_md_blocks_synced_percent{device="md120"} 0 +node_md_blocks_synced_percent{device="md126"} 0 +node_md_blocks_synced_percent{device="md127"} 0 +node_md_blocks_synced_percent{device="md201"} 5.7 +node_md_blocks_synced_percent{device="md219"} 0 +node_md_blocks_synced_percent{device="md3"} 0 +node_md_blocks_synced_percent{device="md4"} 0 +node_md_blocks_synced_percent{device="md6"} 8.5 +node_md_blocks_synced_percent{device="md7"} 0 +node_md_blocks_synced_percent{device="md8"} 8.5 +node_md_blocks_synced_percent{device="md9"} 0 +# HELP node_md_blocks_synced_speed current sync speed (in Kilobytes/sec) +# TYPE node_md_blocks_synced_speed gauge +node_md_blocks_synced_speed{device="md0"} 0 +node_md_blocks_synced_speed{device="md00"} 0 +node_md_blocks_synced_speed{device="md10"} 0 +node_md_blocks_synced_speed{device="md101"} 0 +node_md_blocks_synced_speed{device="md11"} 0 +node_md_blocks_synced_speed{device="md12"} 0 +node_md_blocks_synced_speed{device="md120"} 0 +node_md_blocks_synced_speed{device="md126"} 0 +node_md_blocks_synced_speed{device="md127"} 0 +node_md_blocks_synced_speed{device="md201"} 114176 +node_md_blocks_synced_speed{device="md219"} 0 +node_md_blocks_synced_speed{device="md3"} 0 +node_md_blocks_synced_speed{device="md4"} 0 +node_md_blocks_synced_speed{device="md6"} 259783 +node_md_blocks_synced_speed{device="md7"} 0 +node_md_blocks_synced_speed{device="md8"} 259783 +node_md_blocks_synced_speed{device="md9"} 0 # HELP node_md_degraded Number of degraded disks on device. # TYPE node_md_degraded gauge node_md_degraded{device="md0"} 0 @@ -1596,54 +1634,71 @@ node_md_degraded{device="md6"} 1 # HELP node_md_disks Number of active/failed/spare disks of device. # TYPE node_md_disks gauge node_md_disks{device="md0",state="active"} 2 +node_md_disks{device="md0",state="down"} 0 node_md_disks{device="md0",state="failed"} 0 node_md_disks{device="md0",state="spare"} 0 node_md_disks{device="md00",state="active"} 1 +node_md_disks{device="md00",state="down"} 0 node_md_disks{device="md00",state="failed"} 0 node_md_disks{device="md00",state="spare"} 0 node_md_disks{device="md10",state="active"} 2 +node_md_disks{device="md10",state="down"} 0 node_md_disks{device="md10",state="failed"} 0 node_md_disks{device="md10",state="spare"} 0 node_md_disks{device="md101",state="active"} 3 +node_md_disks{device="md101",state="down"} 0 node_md_disks{device="md101",state="failed"} 0 node_md_disks{device="md101",state="spare"} 0 node_md_disks{device="md11",state="active"} 2 +node_md_disks{device="md11",state="down"} 0 node_md_disks{device="md11",state="failed"} 1 node_md_disks{device="md11",state="spare"} 2 node_md_disks{device="md12",state="active"} 2 +node_md_disks{device="md12",state="down"} 0 node_md_disks{device="md12",state="failed"} 0 node_md_disks{device="md12",state="spare"} 0 node_md_disks{device="md120",state="active"} 2 +node_md_disks{device="md120",state="down"} 0 node_md_disks{device="md120",state="failed"} 0 node_md_disks{device="md120",state="spare"} 0 node_md_disks{device="md126",state="active"} 2 +node_md_disks{device="md126",state="down"} 0 node_md_disks{device="md126",state="failed"} 0 node_md_disks{device="md126",state="spare"} 0 node_md_disks{device="md127",state="active"} 2 +node_md_disks{device="md127",state="down"} 0 node_md_disks{device="md127",state="failed"} 0 node_md_disks{device="md127",state="spare"} 0 node_md_disks{device="md201",state="active"} 2 +node_md_disks{device="md201",state="down"} 0 node_md_disks{device="md201",state="failed"} 0 node_md_disks{device="md201",state="spare"} 0 node_md_disks{device="md219",state="active"} 0 +node_md_disks{device="md219",state="down"} 0 node_md_disks{device="md219",state="failed"} 0 node_md_disks{device="md219",state="spare"} 3 node_md_disks{device="md3",state="active"} 8 +node_md_disks{device="md3",state="down"} 0 node_md_disks{device="md3",state="failed"} 0 node_md_disks{device="md3",state="spare"} 2 node_md_disks{device="md4",state="active"} 0 +node_md_disks{device="md4",state="down"} 0 node_md_disks{device="md4",state="failed"} 1 node_md_disks{device="md4",state="spare"} 1 node_md_disks{device="md6",state="active"} 1 +node_md_disks{device="md6",state="down"} 1 node_md_disks{device="md6",state="failed"} 1 node_md_disks{device="md6",state="spare"} 1 node_md_disks{device="md7",state="active"} 3 +node_md_disks{device="md7",state="down"} 1 node_md_disks{device="md7",state="failed"} 1 node_md_disks{device="md7",state="spare"} 0 node_md_disks{device="md8",state="active"} 2 +node_md_disks{device="md8",state="down"} 0 node_md_disks{device="md8",state="failed"} 0 node_md_disks{device="md8",state="spare"} 2 node_md_disks{device="md9",state="active"} 4 +node_md_disks{device="md9",state="down"} 0 node_md_disks{device="md9",state="failed"} 2 node_md_disks{device="md9",state="spare"} 1 # HELP node_md_disks_required Total number of disks of device. @@ -1760,6 +1815,25 @@ node_md_state{device="md9",state="check"} 0 node_md_state{device="md9",state="inactive"} 0 node_md_state{device="md9",state="recovering"} 0 node_md_state{device="md9",state="resync"} 1 +# HELP node_md_sync_time_remaining_seconds Estimated finishing time for current sync in seconds. +# TYPE node_md_sync_time_remaining_seconds gauge +node_md_sync_time_remaining_seconds{device="md0"} 0 +node_md_sync_time_remaining_seconds{device="md00"} 0 +node_md_sync_time_remaining_seconds{device="md10"} 0 +node_md_sync_time_remaining_seconds{device="md101"} 0 +node_md_sync_time_remaining_seconds{device="md11"} 0 +node_md_sync_time_remaining_seconds{device="md12"} 0 +node_md_sync_time_remaining_seconds{device="md120"} 0 +node_md_sync_time_remaining_seconds{device="md126"} 0 +node_md_sync_time_remaining_seconds{device="md127"} 0 +node_md_sync_time_remaining_seconds{device="md201"} 12 +node_md_sync_time_remaining_seconds{device="md219"} 0 +node_md_sync_time_remaining_seconds{device="md3"} 0 +node_md_sync_time_remaining_seconds{device="md4"} 0 +node_md_sync_time_remaining_seconds{device="md6"} 1020 +node_md_sync_time_remaining_seconds{device="md7"} 0 +node_md_sync_time_remaining_seconds{device="md8"} 1020 +node_md_sync_time_remaining_seconds{device="md9"} 0 # HELP node_memory_Active_anon_bytes Memory information field Active_anon_bytes. # TYPE node_memory_Active_anon_bytes gauge node_memory_Active_anon_bytes 2.068484096e+09 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 634386da8b..b913e2608b 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1607,6 +1607,44 @@ node_md_blocks_synced{device="md6"} 1.6775552e+07 node_md_blocks_synced{device="md7"} 7.813735424e+09 node_md_blocks_synced{device="md8"} 1.6775552e+07 node_md_blocks_synced{device="md9"} 0 +# HELP node_md_blocks_synced_percent Percentage of blocks synced on device. +# TYPE node_md_blocks_synced_percent gauge +node_md_blocks_synced_percent{device="md0"} 0 +node_md_blocks_synced_percent{device="md00"} 0 +node_md_blocks_synced_percent{device="md10"} 0 +node_md_blocks_synced_percent{device="md101"} 0 +node_md_blocks_synced_percent{device="md11"} 0 +node_md_blocks_synced_percent{device="md12"} 0 +node_md_blocks_synced_percent{device="md120"} 0 +node_md_blocks_synced_percent{device="md126"} 0 +node_md_blocks_synced_percent{device="md127"} 0 +node_md_blocks_synced_percent{device="md201"} 5.7 +node_md_blocks_synced_percent{device="md219"} 0 +node_md_blocks_synced_percent{device="md3"} 0 +node_md_blocks_synced_percent{device="md4"} 0 +node_md_blocks_synced_percent{device="md6"} 8.5 +node_md_blocks_synced_percent{device="md7"} 0 +node_md_blocks_synced_percent{device="md8"} 8.5 +node_md_blocks_synced_percent{device="md9"} 0 +# HELP node_md_blocks_synced_speed current sync speed (in Kilobytes/sec) +# TYPE node_md_blocks_synced_speed gauge +node_md_blocks_synced_speed{device="md0"} 0 +node_md_blocks_synced_speed{device="md00"} 0 +node_md_blocks_synced_speed{device="md10"} 0 +node_md_blocks_synced_speed{device="md101"} 0 +node_md_blocks_synced_speed{device="md11"} 0 +node_md_blocks_synced_speed{device="md12"} 0 +node_md_blocks_synced_speed{device="md120"} 0 +node_md_blocks_synced_speed{device="md126"} 0 +node_md_blocks_synced_speed{device="md127"} 0 +node_md_blocks_synced_speed{device="md201"} 114176 +node_md_blocks_synced_speed{device="md219"} 0 +node_md_blocks_synced_speed{device="md3"} 0 +node_md_blocks_synced_speed{device="md4"} 0 +node_md_blocks_synced_speed{device="md6"} 259783 +node_md_blocks_synced_speed{device="md7"} 0 +node_md_blocks_synced_speed{device="md8"} 259783 +node_md_blocks_synced_speed{device="md9"} 0 # HELP node_md_degraded Number of degraded disks on device. # TYPE node_md_degraded gauge node_md_degraded{device="md0"} 0 @@ -1618,54 +1656,71 @@ node_md_degraded{device="md6"} 1 # HELP node_md_disks Number of active/failed/spare disks of device. # TYPE node_md_disks gauge node_md_disks{device="md0",state="active"} 2 +node_md_disks{device="md0",state="down"} 0 node_md_disks{device="md0",state="failed"} 0 node_md_disks{device="md0",state="spare"} 0 node_md_disks{device="md00",state="active"} 1 +node_md_disks{device="md00",state="down"} 0 node_md_disks{device="md00",state="failed"} 0 node_md_disks{device="md00",state="spare"} 0 node_md_disks{device="md10",state="active"} 2 +node_md_disks{device="md10",state="down"} 0 node_md_disks{device="md10",state="failed"} 0 node_md_disks{device="md10",state="spare"} 0 node_md_disks{device="md101",state="active"} 3 +node_md_disks{device="md101",state="down"} 0 node_md_disks{device="md101",state="failed"} 0 node_md_disks{device="md101",state="spare"} 0 node_md_disks{device="md11",state="active"} 2 +node_md_disks{device="md11",state="down"} 0 node_md_disks{device="md11",state="failed"} 1 node_md_disks{device="md11",state="spare"} 2 node_md_disks{device="md12",state="active"} 2 +node_md_disks{device="md12",state="down"} 0 node_md_disks{device="md12",state="failed"} 0 node_md_disks{device="md12",state="spare"} 0 node_md_disks{device="md120",state="active"} 2 +node_md_disks{device="md120",state="down"} 0 node_md_disks{device="md120",state="failed"} 0 node_md_disks{device="md120",state="spare"} 0 node_md_disks{device="md126",state="active"} 2 +node_md_disks{device="md126",state="down"} 0 node_md_disks{device="md126",state="failed"} 0 node_md_disks{device="md126",state="spare"} 0 node_md_disks{device="md127",state="active"} 2 +node_md_disks{device="md127",state="down"} 0 node_md_disks{device="md127",state="failed"} 0 node_md_disks{device="md127",state="spare"} 0 node_md_disks{device="md201",state="active"} 2 +node_md_disks{device="md201",state="down"} 0 node_md_disks{device="md201",state="failed"} 0 node_md_disks{device="md201",state="spare"} 0 node_md_disks{device="md219",state="active"} 0 +node_md_disks{device="md219",state="down"} 0 node_md_disks{device="md219",state="failed"} 0 node_md_disks{device="md219",state="spare"} 3 node_md_disks{device="md3",state="active"} 8 +node_md_disks{device="md3",state="down"} 0 node_md_disks{device="md3",state="failed"} 0 node_md_disks{device="md3",state="spare"} 2 node_md_disks{device="md4",state="active"} 0 +node_md_disks{device="md4",state="down"} 0 node_md_disks{device="md4",state="failed"} 1 node_md_disks{device="md4",state="spare"} 1 node_md_disks{device="md6",state="active"} 1 +node_md_disks{device="md6",state="down"} 1 node_md_disks{device="md6",state="failed"} 1 node_md_disks{device="md6",state="spare"} 1 node_md_disks{device="md7",state="active"} 3 +node_md_disks{device="md7",state="down"} 1 node_md_disks{device="md7",state="failed"} 1 node_md_disks{device="md7",state="spare"} 0 node_md_disks{device="md8",state="active"} 2 +node_md_disks{device="md8",state="down"} 0 node_md_disks{device="md8",state="failed"} 0 node_md_disks{device="md8",state="spare"} 2 node_md_disks{device="md9",state="active"} 4 +node_md_disks{device="md9",state="down"} 0 node_md_disks{device="md9",state="failed"} 2 node_md_disks{device="md9",state="spare"} 1 # HELP node_md_disks_required Total number of disks of device. @@ -1782,6 +1837,25 @@ node_md_state{device="md9",state="check"} 0 node_md_state{device="md9",state="inactive"} 0 node_md_state{device="md9",state="recovering"} 0 node_md_state{device="md9",state="resync"} 1 +# HELP node_md_sync_time_remaining_seconds Estimated finishing time for current sync in seconds. +# TYPE node_md_sync_time_remaining_seconds gauge +node_md_sync_time_remaining_seconds{device="md0"} 0 +node_md_sync_time_remaining_seconds{device="md00"} 0 +node_md_sync_time_remaining_seconds{device="md10"} 0 +node_md_sync_time_remaining_seconds{device="md101"} 0 +node_md_sync_time_remaining_seconds{device="md11"} 0 +node_md_sync_time_remaining_seconds{device="md12"} 0 +node_md_sync_time_remaining_seconds{device="md120"} 0 +node_md_sync_time_remaining_seconds{device="md126"} 0 +node_md_sync_time_remaining_seconds{device="md127"} 0 +node_md_sync_time_remaining_seconds{device="md201"} 12 +node_md_sync_time_remaining_seconds{device="md219"} 0 +node_md_sync_time_remaining_seconds{device="md3"} 0 +node_md_sync_time_remaining_seconds{device="md4"} 0 +node_md_sync_time_remaining_seconds{device="md6"} 1020 +node_md_sync_time_remaining_seconds{device="md7"} 0 +node_md_sync_time_remaining_seconds{device="md8"} 1020 +node_md_sync_time_remaining_seconds{device="md9"} 0 # HELP node_memory_Active_anon_bytes Memory information field Active_anon_bytes. # TYPE node_memory_Active_anon_bytes gauge node_memory_Active_anon_bytes 2.068484096e+09 diff --git a/collector/fixtures/proc/mdstat b/collector/fixtures/proc/mdstat index a19bf5e3f0..6058e7bf25 100644 --- a/collector/fixtures/proc/mdstat +++ b/collector/fixtures/proc/mdstat @@ -57,4 +57,8 @@ md120 : active linear sda1[1] sdb1[0] md101 : active (read-only) raid0 sdb[2] sdd[1] sdc[0] 322560 blocks super 1.2 512k chunks +md42 : active raid1 sdb2[2](R) sdc[1] sda2[0] + 195310144 blocks [2/2] [UU] + [=>...................] recovery = 8.5% (16775552/195310144) finish=17.0min speed=259783K/sec + unused devices: diff --git a/collector/mdadm_linux.go b/collector/mdadm_linux.go index 5f76db23f1..a2a037bec9 100644 --- a/collector/mdadm_linux.go +++ b/collector/mdadm_linux.go @@ -22,10 +22,9 @@ import ( "log/slog" "os" - "github.com/prometheus/procfs/sysfs" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" + "github.com/prometheus/procfs/sysfs" ) type mdadmCollector struct { @@ -66,6 +65,12 @@ var ( []string{"device"}, prometheus.Labels{"state": "resync"}, ) + reshapeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "md", "state"), + "Indicates the state of md-device.", + []string{"device"}, + prometheus.Labels{"state": "reshaping"}, + ) checkDesc = prometheus.NewDesc( prometheus.BuildFQName(namespace, "md", "state"), "Indicates the state of md-device.", @@ -101,6 +106,27 @@ var ( nil, ) + blocksSyncedPctDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "md", "blocks_synced_percent"), + "Percentage of blocks synced on device.", + []string{"device"}, + nil, + ) + + syncTimeRemainingDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "md", "sync_time_remaining_seconds"), + "Estimated finishing time for current sync in seconds.", + []string{"device"}, + nil, + ) + + blockSyncedSpeedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "md", "blocks_synced_speed"), + "current sync speed (in Kilobytes/sec)", + []string{"device"}, + nil, + ) + mdraidDisks = prometheus.NewDesc( prometheus.BuildFQName(namespace, "md", "raid_disks"), "Number of raid disks on device.", @@ -145,7 +171,20 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { float64(mdStat.DisksTotal), mdStat.Name, ) - + ch <- prometheus.MustNewConstMetric( + disksDesc, + prometheus.GaugeValue, + float64(mdStat.DisksReplacing), + mdStat.Name, + "replacing", + ) + ch <- prometheus.MustNewConstMetric( + disksDesc, + prometheus.GaugeValue, + float64(mdStat.DisksDown), + mdStat.Name, + "down", + ) ch <- prometheus.MustNewConstMetric( disksDesc, prometheus.GaugeValue, @@ -195,6 +234,13 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { mdStat.Name, ) + ch <- prometheus.MustNewConstMetric( + reshapeDesc, + prometheus.GaugeValue, + stateVals["reshaping"], + mdStat.Name, + ) + ch <- prometheus.MustNewConstMetric( checkDesc, prometheus.GaugeValue, @@ -214,6 +260,25 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { float64(mdStat.BlocksSynced), mdStat.Name, ) + ch <- prometheus.MustNewConstMetric( + blocksSyncedPctDesc, + prometheus.GaugeValue, + float64(mdStat.BlocksSyncedPct), + mdStat.Name, + ) + ch <- prometheus.MustNewConstMetric( + syncTimeRemainingDesc, + prometheus.GaugeValue, + float64(mdStat.BlocksSyncedFinishTime*60), + mdStat.Name, + ) + ch <- prometheus.MustNewConstMetric( + blockSyncedSpeedDesc, + prometheus.GaugeValue, + float64(mdStat.BlocksSyncedSpeed), + mdStat.Name, + ) + } sysFS, err := sysfs.NewFS(*sysPath)