Skip to content

Commit 75908f5

Browse files
authored
Merge pull request vpenso#8 from MatMaul/pending-dep
Add pending because of dependency statistic
2 parents cc2b9e2 + 1d777da commit 75908f5

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

README.md

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Prometheus collector and exporter for metrics extracted from the [Slurm](https:/
2424
### Status of the Jobs
2525

2626
* **PENDING**: Jobs awaiting for resource allocation.
27+
* **PENDING_DEPENDENCY**: Jobs awaiting because of a unexecuted job dependency.
2728
* **RUNNING**: Jobs currently allocated.
2829
* **SUSPENDED**: Job has an allocation but execution has been suspended and CPUs have been released for other jobs.
2930
* **CANCELLED**: Jobs which were explicitly cancelled by the user or system administrator.
@@ -84,13 +85,7 @@ export GOPATH=$(pwd):/usr/share/gocode
8485

8586
3. Install all the necessary GOlang dependencies:
8687
```bash
87-
go get github.com/prometheus/client_golang
88-
go get github.com/prometheus/client_model
89-
go get github.com/prometheus/common
90-
go get github.com/prometheus/procfs
91-
go get github.com/beorn7/perks/quantile
92-
go get github.com/golang/protobuf/proto
93-
go get github.com/matttproud/golang_protobuf_extensions/pbutil
88+
go get github.com/prometheus/client_golang/prometheus
9489
go get github.com/sirupsen/logrus
9590
go get gopkg.in/alecthomas/kingpin.v2
9691
```

queue.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525

2626
type QueueMetrics struct {
2727
pending float64
28+
pending_dep float64
2829
running float64
2930
suspended float64
3031
cancelled float64
@@ -47,9 +48,14 @@ func ParseQueueMetrics(input []byte) *QueueMetrics {
4748
lines := strings.Split(string(input), "\n")
4849
for _, line := range lines {
4950
if strings.Contains(line,",") {
50-
state := strings.Split(line, ",")[1]
51+
splitted := strings.Split(line, ",")
52+
state := splitted[1]
5153
switch state {
52-
case "PENDING": qm.pending++
54+
case "PENDING":
55+
qm.pending++
56+
if len(splitted) > 2 && splitted[2] == "Dependency" {
57+
qm.pending_dep++
58+
}
5359
case "RUNNING": qm.running++
5460
case "SUSPENDED": qm.suspended++
5561
case "CANCELLED": qm.cancelled++
@@ -68,7 +74,7 @@ func ParseQueueMetrics(input []byte) *QueueMetrics {
6874

6975
// Execute the squeue command and return its output
7076
func QueueData() []byte {
71-
cmd := exec.Command("/usr/bin/squeue", "-h", "-o %A,%T")
77+
cmd := exec.Command("/usr/bin/squeue", "-h", "-o %A,%T,%r")
7278
stdout, err := cmd.StdoutPipe()
7379
if err != nil { log.Fatal(err) }
7480
if err := cmd.Start(); err != nil { log.Fatal(err) }
@@ -86,6 +92,7 @@ func QueueData() []byte {
8692
func NewQueueCollector() *QueueCollector {
8793
return &QueueCollector {
8894
pending: prometheus.NewDesc("slurm_queue_pending", "Pending jobs in queue", nil, nil),
95+
pending_dep: prometheus.NewDesc("slurm_queue_pending_dependency", "Pending jobs because of dependency in queue", nil, nil),
8996
running: prometheus.NewDesc("slurm_queue_running", "Running jobs in the cluster", nil, nil),
9097
suspended: prometheus.NewDesc("slurm_queue_suspended", "Suspended jobs in the cluster", nil, nil),
9198
cancelled: prometheus.NewDesc("slurm_queue_cancelled", "Cancelled jobs in the cluster", nil, nil),
@@ -101,6 +108,7 @@ func NewQueueCollector() *QueueCollector {
101108

102109
type QueueCollector struct {
103110
pending *prometheus.Desc
111+
pending_dep *prometheus.Desc
104112
running *prometheus.Desc
105113
suspended *prometheus.Desc
106114
cancelled *prometheus.Desc
@@ -115,6 +123,7 @@ func NewQueueCollector() *QueueCollector {
115123

116124
func (qc *QueueCollector) Describe(ch chan<- *prometheus.Desc) {
117125
ch <- qc.pending
126+
ch <- qc.pending_dep
118127
ch <- qc.running
119128
ch <- qc.suspended
120129
ch <- qc.cancelled
@@ -130,6 +139,7 @@ func NewQueueCollector() *QueueCollector {
130139
func (qc *QueueCollector) Collect(ch chan<- prometheus.Metric) {
131140
qm := QueueGetMetrics()
132141
ch <- prometheus.MustNewConstMetric(qc.pending, prometheus.GaugeValue, qm.pending)
142+
ch <- prometheus.MustNewConstMetric(qc.pending_dep, prometheus.GaugeValue, qm.pending_dep)
133143
ch <- prometheus.MustNewConstMetric(qc.running, prometheus.GaugeValue, qm.running)
134144
ch <- prometheus.MustNewConstMetric(qc.suspended, prometheus.GaugeValue, qm.suspended)
135145
ch <- prometheus.MustNewConstMetric(qc.cancelled, prometheus.GaugeValue, qm.cancelled)

0 commit comments

Comments
 (0)