@@ -20,12 +20,13 @@ import (
2020 "os/exec"
2121 "log"
2222 "strings"
23+ "strconv"
2324 "regexp"
2425 "github.com/prometheus/client_golang/prometheus"
2526)
2627
2728func UsersData () []byte {
28- cmd := exec .Command ("squeue" , "-h" , "-o ' %A|%u|%T|%C' " )
29+ cmd := exec .Command ("squeue" , "-h" , "-o %A|%u|%T|%C" )
2930 stdout , err := cmd .StdoutPipe ()
3031 if err != nil {
3132 log .Fatal (err )
@@ -43,6 +44,7 @@ func UsersData() []byte {
4344type UserJobMetrics struct {
4445 pending float64
4546 running float64
47+ running_cpus float64
4648 suspended float64
4749}
4850
@@ -54,10 +56,11 @@ func ParseUsersMetrics(input []byte) map[string]*UserJobMetrics {
5456 user := strings .Split (line ,"|" )[1 ]
5557 _ ,key := users [user ]
5658 if ! key {
57- users [user ] = & UserJobMetrics {0 ,0 ,0 }
59+ users [user ] = & UserJobMetrics {0 ,0 ,0 , 0 }
5860 }
5961 state := strings .Split (line ,"|" )[2 ]
6062 state = strings .ToLower (state )
63+ cpus ,_ := strconv .ParseFloat (strings .Split (line ,"|" )[3 ],64 )
6164 pending := regexp .MustCompile (`^pending` )
6265 running := regexp .MustCompile (`^running` )
6366 suspended := regexp .MustCompile (`^suspended` )
@@ -66,6 +69,7 @@ func ParseUsersMetrics(input []byte) map[string]*UserJobMetrics {
6669 users [user ].pending ++
6770 case running .MatchString (state ) == true :
6871 users [user ].running ++
72+ users [user ].running_cpus += cpus
6973 case suspended .MatchString (state ) == true :
7074 users [user ].suspended ++
7175 }
@@ -77,6 +81,7 @@ func ParseUsersMetrics(input []byte) map[string]*UserJobMetrics {
7781type UsersCollector struct {
7882 pending * prometheus.Desc
7983 running * prometheus.Desc
84+ running_cpus * prometheus.Desc
8085 suspended * prometheus.Desc
8186}
8287
@@ -85,13 +90,15 @@ func NewUsersCollector() *UsersCollector {
8590 return & UsersCollector {
8691 pending : prometheus .NewDesc ("slurm_user_jobs_pending" , "Pending jobs for user" , labels , nil ),
8792 running : prometheus .NewDesc ("slurm_user_jobs_running" , "Running jobs for user" , labels , nil ),
93+ running_cpus : prometheus .NewDesc ("slurm_user_cpus_running" , "Running cpus for user" , labels , nil ),
8894 suspended : prometheus .NewDesc ("slurm_user_jobs_suspended" , "Suspended jobs for user" , labels , nil ),
8995 }
9096}
9197
9298func (uc * UsersCollector ) Describe (ch chan <- * prometheus.Desc ) {
9399 ch <- uc .pending
94100 ch <- uc .running
101+ ch <- uc .running_cpus
95102 ch <- uc .suspended
96103}
97104
@@ -100,6 +107,7 @@ func (uc *UsersCollector) Collect(ch chan<- prometheus.Metric) {
100107 for u := range um {
101108 ch <- prometheus .MustNewConstMetric (uc .pending , prometheus .GaugeValue , um [u ].pending , u )
102109 ch <- prometheus .MustNewConstMetric (uc .running , prometheus .GaugeValue , um [u ].running , u )
110+ ch <- prometheus .MustNewConstMetric (uc .running_cpus , prometheus .GaugeValue , um [u ].running_cpus , u )
103111 ch <- prometheus .MustNewConstMetric (uc .suspended , prometheus .GaugeValue , um [u ].suspended , u )
104112 }
105113}
0 commit comments