Skip to content

Commit 819c370

Browse files
Add collector for hung_task_detect_count (#3470)
* Add collector for hung_task_detect_count --------- Signed-off-by: ScarletBlizzard <[email protected]>
1 parent f3154c9 commit 819c370

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ drm | Expose GPU metrics using sysfs / DRM, `amdgpu` is the only driver which ex
195195
drbd | Exposes Distributed Replicated Block Device statistics (to version 8.4) | Linux
196196
ethtool | Exposes network interface information and network driver statistics equivalent to `ethtool`, `ethtool -S`, and `ethtool -i`. | Linux
197197
interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD
198+
kernel_hung | Exposes number of tasks that have been detected as hung from `/proc/sys/kernel/hung_task_detect_count`. | Linux
198199
ksmd | Exposes kernel and system statistics from `/sys/kernel/mm/ksm`. | Linux
199200
lnstat | Exposes stats from `/proc/net/stat/`. | Linux
200201
logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/Software/systemd/logind/). | Linux

collector/kernel_hung_linux.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright 2018 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
//go:build !noprocesses
15+
16+
package collector
17+
18+
import (
19+
"fmt"
20+
"log/slog"
21+
22+
"github.com/prometheus/client_golang/prometheus"
23+
"github.com/prometheus/procfs"
24+
)
25+
26+
type kernelHungCollector struct {
27+
fs procfs.FS
28+
logger *slog.Logger
29+
}
30+
31+
func init() {
32+
registerCollector("kernel_hung", defaultDisabled, NewKernelHungCollector)
33+
}
34+
35+
func NewKernelHungCollector(logger *slog.Logger) (Collector, error) {
36+
fs, err := procfs.NewFS(*procPath)
37+
if err != nil {
38+
return nil, fmt.Errorf("failed to open procfs: %w", err)
39+
}
40+
return &kernelHungCollector{
41+
fs: fs,
42+
logger: logger,
43+
}, nil
44+
}
45+
46+
var (
47+
taskDetectCount = prometheus.NewDesc(
48+
prometheus.BuildFQName(namespace, "kernel_hung", "task_detect_count"),
49+
"Total number of interrupts serviced.",
50+
nil, nil,
51+
)
52+
)
53+
54+
func (c *kernelHungCollector) Update(ch chan<- prometheus.Metric) error {
55+
kernelHung, err := c.fs.KernelHung()
56+
if err != nil {
57+
return err
58+
}
59+
60+
ch <- prometheus.MustNewConstMetric(taskDetectCount, prometheus.CounterValue, float64(*kernelHung.HungTaskDetectCount))
61+
62+
return nil
63+
}

0 commit comments

Comments
 (0)