@@ -25,6 +25,7 @@ import (
25
25
26
26
type QueueMetrics struct {
27
27
pending float64
28
+ pending_dep float64
28
29
running float64
29
30
suspended float64
30
31
cancelled float64
@@ -47,9 +48,14 @@ func ParseQueueMetrics(input []byte) *QueueMetrics {
47
48
lines := strings .Split (string (input ), "\n " )
48
49
for _ , line := range lines {
49
50
if strings .Contains (line ,"," ) {
50
- state := strings .Split (line , "," )[1 ]
51
+ splitted := strings .Split (line , "," )
52
+ state := splitted [1 ]
51
53
switch state {
52
- case "PENDING" : qm .pending ++
54
+ case "PENDING" :
55
+ qm .pending ++
56
+ if len (splitted ) > 2 && splitted [2 ] == "Dependency" {
57
+ qm .pending_dep ++
58
+ }
53
59
case "RUNNING" : qm .running ++
54
60
case "SUSPENDED" : qm .suspended ++
55
61
case "CANCELLED" : qm .cancelled ++
@@ -68,7 +74,7 @@ func ParseQueueMetrics(input []byte) *QueueMetrics {
68
74
69
75
// Execute the squeue command and return its output
70
76
func QueueData () []byte {
71
- cmd := exec .Command ("/usr/bin/squeue" , "-h" , "-o %A,%T" )
77
+ cmd := exec .Command ("/usr/bin/squeue" , "-h" , "-o %A,%T,%r " )
72
78
stdout , err := cmd .StdoutPipe ()
73
79
if err != nil { log .Fatal (err ) }
74
80
if err := cmd .Start (); err != nil { log .Fatal (err ) }
@@ -86,6 +92,7 @@ func QueueData() []byte {
86
92
func NewQueueCollector () * QueueCollector {
87
93
return & QueueCollector {
88
94
pending : prometheus .NewDesc ("slurm_queue_pending" , "Pending jobs in queue" , nil , nil ),
95
+ pending_dep : prometheus .NewDesc ("slurm_queue_pending_dependency" , "Pending jobs because of dependency in queue" , nil , nil ),
89
96
running : prometheus .NewDesc ("slurm_queue_running" , "Running jobs in the cluster" , nil , nil ),
90
97
suspended : prometheus .NewDesc ("slurm_queue_suspended" , "Suspended jobs in the cluster" , nil , nil ),
91
98
cancelled : prometheus .NewDesc ("slurm_queue_cancelled" , "Cancelled jobs in the cluster" , nil , nil ),
@@ -101,6 +108,7 @@ func NewQueueCollector() *QueueCollector {
101
108
102
109
type QueueCollector struct {
103
110
pending * prometheus.Desc
111
+ pending_dep * prometheus.Desc
104
112
running * prometheus.Desc
105
113
suspended * prometheus.Desc
106
114
cancelled * prometheus.Desc
@@ -115,6 +123,7 @@ func NewQueueCollector() *QueueCollector {
115
123
116
124
func (qc * QueueCollector ) Describe (ch chan <- * prometheus.Desc ) {
117
125
ch <- qc .pending
126
+ ch <- qc .pending_dep
118
127
ch <- qc .running
119
128
ch <- qc .suspended
120
129
ch <- qc .cancelled
@@ -130,6 +139,7 @@ func NewQueueCollector() *QueueCollector {
130
139
func (qc * QueueCollector ) Collect (ch chan <- prometheus.Metric ) {
131
140
qm := QueueGetMetrics ()
132
141
ch <- prometheus .MustNewConstMetric (qc .pending , prometheus .GaugeValue , qm .pending )
142
+ ch <- prometheus .MustNewConstMetric (qc .pending_dep , prometheus .GaugeValue , qm .pending_dep )
133
143
ch <- prometheus .MustNewConstMetric (qc .running , prometheus .GaugeValue , qm .running )
134
144
ch <- prometheus .MustNewConstMetric (qc .suspended , prometheus .GaugeValue , qm .suspended )
135
145
ch <- prometheus .MustNewConstMetric (qc .cancelled , prometheus .GaugeValue , qm .cancelled )
0 commit comments