Skip to content

Commit aa0e02c

Browse files
authored
Merge pull request vpenso#13 from rug-cit-hpc/master
Add metrics for CPUs (extracted from Slurm sinfo)
2 parents a142e78 + df2b3c6 commit aa0e02c

File tree

6 files changed

+148
-1
lines changed

6 files changed

+148
-1
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ PROJECT_NAME = prometheus-slurm-exporter
22
ifndef GOPATH
33
GOPATH=$(shell pwd):/usr/share/gocode
44
endif
5-
GOFILES=main.go nodes.go queue.go scheduler.go
5+
GOFILES=cpus.go main.go nodes.go queue.go scheduler.go
66
GOBIN=bin/$(PROJECT_NAME)
77

88
build:

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@ Prometheus collector and exporter for metrics extracted from the [Slurm](https:/
44

55
## Exported Metrics
66

7+
### State of the CPUs
8+
9+
* **Allocated**: CPUs which have been allocated to a job.
10+
* **Idle**: CPUs not allocated to a job and thus available for use.
11+
* **Other**: CPUs which are unavailable for use at the moment.
12+
* **Total**: total number of CPUs.
13+
14+
[Information extracted from the SLURM **sinfo** command](https://slurm.schedmd.com/sinfo.html)
15+
716
### State of the Nodes
817

918
* **Allocated**: nodes which has been allocated to one or more jobs.

cpus.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/* Copyright 2017 Victor Penso, Matteo Dessalvi
2+
3+
This program is free software: you can redistribute it and/or modify
4+
it under the terms of the GNU General Public License as published by
5+
the Free Software Foundation, either version 3 of the License, or
6+
(at your option) any later version.
7+
8+
This program is distributed in the hope that it will be useful,
9+
but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
GNU General Public License for more details.
12+
13+
You should have received a copy of the GNU General Public License
14+
along with this program. If not, see <http://www.gnu.org/licenses/>. */
15+
16+
package main
17+
18+
import (
19+
"github.com/prometheus/client_golang/prometheus"
20+
"io/ioutil"
21+
"log"
22+
"os/exec"
23+
"strconv"
24+
"strings"
25+
)
26+
27+
type CPUsMetrics struct {
28+
alloc float64
29+
idle float64
30+
other float64
31+
total float64
32+
}
33+
34+
func CPUsGetMetrics() *CPUsMetrics {
35+
return ParseCPUsMetrics(CPUsData())
36+
}
37+
38+
func ParseCPUsMetrics(input []byte) *CPUsMetrics {
39+
var cm CPUsMetrics
40+
if strings.Contains(string(input), "/") {
41+
splitted := strings.Split(strings.TrimSpace(string(input)), "/")
42+
cm.alloc, _ = strconv.ParseFloat(splitted[0], 64)
43+
cm.idle, _ = strconv.ParseFloat(splitted[1], 64)
44+
cm.other, _ = strconv.ParseFloat(splitted[2], 64)
45+
cm.total, _ = strconv.ParseFloat(splitted[3], 64)
46+
}
47+
return &cm
48+
}
49+
50+
// Execute the sinfo command and return its output
51+
func CPUsData() []byte {
52+
cmd := exec.Command("sinfo", "-h", "-o %C")
53+
stdout, err := cmd.StdoutPipe()
54+
if err != nil {
55+
log.Fatal(err)
56+
}
57+
if err := cmd.Start(); err != nil {
58+
log.Fatal(err)
59+
}
60+
out, _ := ioutil.ReadAll(stdout)
61+
if err := cmd.Wait(); err != nil {
62+
log.Fatal(err)
63+
}
64+
return out
65+
}
66+
67+
/*
68+
* Implement the Prometheus Collector interface and feed the
69+
* Slurm scheduler metrics into it.
70+
* https://godoc.org/github.com/prometheus/client_golang/prometheus#Collector
71+
*/
72+
73+
func NewCPUsCollector() *CPUsCollector {
74+
return &CPUsCollector{
75+
alloc: prometheus.NewDesc("slurm_cpus_alloc", "Allocated CPUs", nil, nil),
76+
idle: prometheus.NewDesc("slurm_cpus_idle", "Idle CPUs", nil, nil),
77+
other: prometheus.NewDesc("slurm_cpus_other", "Mix CPUs", nil, nil),
78+
total: prometheus.NewDesc("slurm_cpus_total", "Total CPUs", nil, nil),
79+
}
80+
}
81+
82+
type CPUsCollector struct {
83+
alloc *prometheus.Desc
84+
idle *prometheus.Desc
85+
other *prometheus.Desc
86+
total *prometheus.Desc
87+
}
88+
89+
// Send all metric descriptions
90+
func (cc *CPUsCollector) Describe(ch chan<- *prometheus.Desc) {
91+
ch <- cc.alloc
92+
ch <- cc.idle
93+
ch <- cc.other
94+
ch <- cc.total
95+
}
96+
func (cc *CPUsCollector) Collect(ch chan<- prometheus.Metric) {
97+
cm := CPUsGetMetrics()
98+
ch <- prometheus.MustNewConstMetric(cc.alloc, prometheus.GaugeValue, cm.alloc)
99+
ch <- prometheus.MustNewConstMetric(cc.idle, prometheus.GaugeValue, cm.idle)
100+
ch <- prometheus.MustNewConstMetric(cc.other, prometheus.GaugeValue, cm.other)
101+
ch <- prometheus.MustNewConstMetric(cc.total, prometheus.GaugeValue, cm.total)
102+
}

cpus_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/* Copyright 2017 Victor Penso, Matteo Dessalvi
2+
3+
This program is free software: you can redistribute it and/or modify
4+
it under the terms of the GNU General Public License as published by
5+
the Free Software Foundation, either version 3 of the License, or
6+
(at your option) any later version.
7+
8+
This program is distributed in the hope that it will be useful,
9+
but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
GNU General Public License for more details.
12+
13+
You should have received a copy of the GNU General Public License
14+
along with this program. If not, see <http://www.gnu.org/licenses/>. */
15+
16+
package main
17+
18+
import (
19+
"testing"
20+
"os"
21+
"io/ioutil"
22+
)
23+
24+
func TestCPUsMetrics(t *testing.T) {
25+
// Read the input data from a file
26+
file, err := os.Open("test_data/sinfo_cpus.txt")
27+
if err != nil { t.Fatalf("Can not open test data: %v", err) }
28+
data, err := ioutil.ReadAll(file)
29+
t.Logf("%+v", ParseCPUsMetrics(data))
30+
}
31+
32+
func TestCPUssGetMetrics(t *testing.T) {
33+
t.Logf("%+v", CPUsGetMetrics())
34+
}

main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ func init() {
2828
prometheus.MustRegister(NewSchedulerCollector()) // from scheduler.go
2929
prometheus.MustRegister(NewQueueCollector()) // from queue.go
3030
prometheus.MustRegister(NewNodesCollector()) // from nodes.go
31+
prometheus.MustRegister(NewCPUsCollector()) // from cpus.go
3132
}
3233

3334
var listenAddress = flag.String(

test_data/sinfo_cpus.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
5725/877/34/6636

0 commit comments

Comments
 (0)