Skip to content

Commit 4e02336

Browse files
committed
add the number of cpus used by running jobs per user
1 parent a9dfd3c commit 4e02336

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

users.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@ import (
2020
"os/exec"
2121
"log"
2222
"strings"
23+
"strconv"
2324
"regexp"
2425
"github.com/prometheus/client_golang/prometheus"
2526
)
2627

2728
func UsersData() []byte {
28-
cmd := exec.Command("squeue", "-h", "-o '%A|%u|%T|%C'")
29+
cmd := exec.Command("squeue", "-h", "-o %A|%u|%T|%C")
2930
stdout, err := cmd.StdoutPipe()
3031
if err != nil {
3132
log.Fatal(err)
@@ -43,6 +44,7 @@ func UsersData() []byte {
4344
type UserJobMetrics struct {
4445
pending float64
4546
running float64
47+
running_cpus float64
4648
suspended float64
4749
}
4850

@@ -54,10 +56,11 @@ func ParseUsersMetrics(input []byte) map[string]*UserJobMetrics {
5456
user := strings.Split(line,"|")[1]
5557
_,key := users[user]
5658
if !key {
57-
users[user] = &UserJobMetrics{0,0,0}
59+
users[user] = &UserJobMetrics{0,0,0,0}
5860
}
5961
state := strings.Split(line,"|")[2]
6062
state = strings.ToLower(state)
63+
cpus,_ := strconv.ParseFloat(strings.Split(line,"|")[3],64)
6164
pending := regexp.MustCompile(`^pending`)
6265
running := regexp.MustCompile(`^running`)
6366
suspended := regexp.MustCompile(`^suspended`)
@@ -66,6 +69,7 @@ func ParseUsersMetrics(input []byte) map[string]*UserJobMetrics {
6669
users[user].pending++
6770
case running.MatchString(state) == true:
6871
users[user].running++
72+
users[user].running_cpus += cpus
6973
case suspended.MatchString(state) == true:
7074
users[user].suspended++
7175
}
@@ -77,6 +81,7 @@ func ParseUsersMetrics(input []byte) map[string]*UserJobMetrics {
7781
type UsersCollector struct {
7882
pending *prometheus.Desc
7983
running *prometheus.Desc
84+
running_cpus *prometheus.Desc
8085
suspended *prometheus.Desc
8186
}
8287

@@ -85,13 +90,15 @@ func NewUsersCollector() *UsersCollector {
8590
return &UsersCollector {
8691
pending: prometheus.NewDesc("slurm_user_jobs_pending", "Pending jobs for user", labels, nil),
8792
running: prometheus.NewDesc("slurm_user_jobs_running", "Running jobs for user", labels, nil),
93+
running_cpus: prometheus.NewDesc("slurm_user_cpus_running", "Running cpus for user", labels, nil),
8894
suspended: prometheus.NewDesc("slurm_user_jobs_suspended", "Suspended jobs for user", labels, nil),
8995
}
9096
}
9197

9298
func (uc *UsersCollector) Describe(ch chan<- *prometheus.Desc) {
9399
ch <- uc.pending
94100
ch <- uc.running
101+
ch <- uc.running_cpus
95102
ch <- uc.suspended
96103
}
97104

@@ -100,6 +107,7 @@ func (uc *UsersCollector) Collect(ch chan<- prometheus.Metric) {
100107
for u := range um {
101108
ch <- prometheus.MustNewConstMetric(uc.pending, prometheus.GaugeValue, um[u].pending, u)
102109
ch <- prometheus.MustNewConstMetric(uc.running, prometheus.GaugeValue, um[u].running, u)
110+
ch <- prometheus.MustNewConstMetric(uc.running_cpus, prometheus.GaugeValue, um[u].running_cpus, u)
103111
ch <- prometheus.MustNewConstMetric(uc.suspended, prometheus.GaugeValue, um[u].suspended, u)
104112
}
105113
}

0 commit comments

Comments
 (0)