@@ -20,12 +20,13 @@ import (
2020 "os/exec"
2121 "log"
2222 "strings"
23+ "strconv"
2324 "regexp"
2425 "github.com/prometheus/client_golang/prometheus"
2526)
2627
2728func AccountsData () []byte {
28- cmd := exec .Command ("squeue" , "-h" , "-o ' %A|%a|%T' " )
29+ cmd := exec .Command ("squeue" , "-h" , "-o %A|%a|%T|%C " )
2930 stdout , err := cmd .StdoutPipe ()
3031 if err != nil {
3132 log .Fatal (err )
@@ -43,6 +44,7 @@ func AccountsData() []byte {
4344type JobMetrics struct {
4445 pending float64
4546 running float64
47+ running_cpus float64
4648 suspended float64
4749}
4850
@@ -54,10 +56,11 @@ func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
5456 account := strings .Split (line ,"|" )[1 ]
5557 _ ,key := accounts [account ]
5658 if ! key {
57- accounts [account ] = & JobMetrics {0 ,0 ,0 }
59+ accounts [account ] = & JobMetrics {0 ,0 ,0 , 0 }
5860 }
5961 state := strings .Split (line ,"|" )[2 ]
6062 state = strings .ToLower (state )
63+ cpus ,_ := strconv .ParseFloat (strings .Split (line ,"|" )[3 ],64 )
6164 pending := regexp .MustCompile (`^pending` )
6265 running := regexp .MustCompile (`^running` )
6366 suspended := regexp .MustCompile (`^suspended` )
@@ -66,6 +69,7 @@ func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
6669 accounts [account ].pending ++
6770 case running .MatchString (state ) == true :
6871 accounts [account ].running ++
72+ accounts [account ].running_cpus += cpus
6973 case suspended .MatchString (state ) == true :
7074 accounts [account ].suspended ++
7175 }
@@ -77,21 +81,24 @@ func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
7781type AccountsCollector struct {
7882 pending * prometheus.Desc
7983 running * prometheus.Desc
84+ running_cpus * prometheus.Desc
8085 suspended * prometheus.Desc
8186}
8287
8388func NewAccountsCollector () * AccountsCollector {
8489 labels := []string {"account" }
8590 return & AccountsCollector {
86- running : prometheus .NewDesc ("slurm_account_jobs_running" , "Running jobs for account" , labels , nil ),
8791 pending : prometheus .NewDesc ("slurm_account_jobs_pending" , "Pending jobs for account" , labels , nil ),
92+ running : prometheus .NewDesc ("slurm_account_jobs_running" , "Running jobs for account" , labels , nil ),
93+ running_cpus : prometheus .NewDesc ("slurm_account_cpus_running" , "Running cpus for account" , labels , nil ),
8894 suspended : prometheus .NewDesc ("slurm_account_jobs_suspended" , "Suspended jobs for account" , labels , nil ),
8995 }
9096}
9197
9298func (ac * AccountsCollector ) Describe (ch chan <- * prometheus.Desc ) {
9399 ch <- ac .pending
94100 ch <- ac .running
101+ ch <- ac .running_cpus
95102 ch <- ac .suspended
96103}
97104
@@ -100,6 +107,7 @@ func (ac *AccountsCollector) Collect(ch chan<- prometheus.Metric) {
100107 for a := range am {
101108 ch <- prometheus .MustNewConstMetric (ac .pending , prometheus .GaugeValue , am [a ].pending , a )
102109 ch <- prometheus .MustNewConstMetric (ac .running , prometheus .GaugeValue , am [a ].running , a )
110+ ch <- prometheus .MustNewConstMetric (ac .running_cpus , prometheus .GaugeValue , am [a ].running_cpus , a )
103111 ch <- prometheus .MustNewConstMetric (ac .suspended , prometheus .GaugeValue , am [a ].suspended , a )
104112 }
105113}
0 commit comments