Skip to content

Commit a9dfd3c

Browse files
committed
add the number of cpus used by running jobs per account
1 parent 2bc6ba6 commit a9dfd3c

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

accounts.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@ import (
2020
"os/exec"
2121
"log"
2222
"strings"
23+
"strconv"
2324
"regexp"
2425
"github.com/prometheus/client_golang/prometheus"
2526
)
2627

2728
func AccountsData() []byte {
28-
cmd := exec.Command("squeue", "-h", "-o '%A|%a|%T'")
29+
cmd := exec.Command("squeue", "-h", "-o %A|%a|%T|%C")
2930
stdout, err := cmd.StdoutPipe()
3031
if err != nil {
3132
log.Fatal(err)
@@ -43,6 +44,7 @@ func AccountsData() []byte {
4344
type JobMetrics struct {
4445
pending float64
4546
running float64
47+
running_cpus float64
4648
suspended float64
4749
}
4850

@@ -54,10 +56,11 @@ func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
5456
account := strings.Split(line,"|")[1]
5557
_,key := accounts[account]
5658
if !key {
57-
accounts[account] = &JobMetrics{0,0,0}
59+
accounts[account] = &JobMetrics{0,0,0,0}
5860
}
5961
state := strings.Split(line,"|")[2]
6062
state = strings.ToLower(state)
63+
cpus,_ := strconv.ParseFloat(strings.Split(line,"|")[3],64)
6164
pending := regexp.MustCompile(`^pending`)
6265
running := regexp.MustCompile(`^running`)
6366
suspended := regexp.MustCompile(`^suspended`)
@@ -66,6 +69,7 @@ func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
6669
accounts[account].pending++
6770
case running.MatchString(state) == true:
6871
accounts[account].running++
72+
accounts[account].running_cpus += cpus
6973
case suspended.MatchString(state) == true:
7074
accounts[account].suspended++
7175
}
@@ -77,21 +81,24 @@ func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
7781
type AccountsCollector struct {
7882
pending *prometheus.Desc
7983
running *prometheus.Desc
84+
running_cpus *prometheus.Desc
8085
suspended *prometheus.Desc
8186
}
8287

8388
func NewAccountsCollector() *AccountsCollector {
8489
labels := []string{"account"}
8590
return &AccountsCollector{
86-
running: prometheus.NewDesc("slurm_account_jobs_running", "Running jobs for account", labels, nil),
8791
pending: prometheus.NewDesc("slurm_account_jobs_pending", "Pending jobs for account", labels, nil),
92+
running: prometheus.NewDesc("slurm_account_jobs_running", "Running jobs for account", labels, nil),
93+
running_cpus: prometheus.NewDesc("slurm_account_cpus_running", "Running cpus for account", labels, nil),
8894
suspended: prometheus.NewDesc("slurm_account_jobs_suspended", "Suspended jobs for account", labels, nil),
8995
}
9096
}
9197

9298
func (ac *AccountsCollector) Describe(ch chan<- *prometheus.Desc) {
9399
ch <- ac.pending
94100
ch <- ac.running
101+
ch <- ac.running_cpus
95102
ch <- ac.suspended
96103
}
97104

@@ -100,6 +107,7 @@ func (ac *AccountsCollector) Collect(ch chan<- prometheus.Metric) {
100107
for a := range am {
101108
ch <- prometheus.MustNewConstMetric(ac.pending, prometheus.GaugeValue, am[a].pending, a)
102109
ch <- prometheus.MustNewConstMetric(ac.running, prometheus.GaugeValue, am[a].running, a)
110+
ch <- prometheus.MustNewConstMetric(ac.running_cpus, prometheus.GaugeValue, am[a].running_cpus, a)
103111
ch <- prometheus.MustNewConstMetric(ac.suspended, prometheus.GaugeValue, am[a].suspended, a)
104112
}
105113
}

0 commit comments

Comments
 (0)