diff --git a/README.md b/README.md index 3761bcc93c..ec5adbf7ac 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,7 @@ drm | Expose GPU metrics using sysfs / DRM, `amdgpu` is the only driver which ex drbd | Exposes Distributed Replicated Block Device statistics (to version 8.4) | Linux ethtool | Exposes network interface information and network driver statistics equivalent to `ethtool`, `ethtool -S`, and `ethtool -i`. | Linux interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD +kernel_hung | Exposes number of tasks that have been detected as hung from `/proc/sys/kernel/hung_task_detect_count`. | Linux ksmd | Exposes kernel and system statistics from `/sys/kernel/mm/ksm`. | Linux lnstat | Exposes stats from `/proc/net/stat/`. | Linux logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/Software/systemd/logind/). | Linux diff --git a/collector/kernel_hung_linux.go b/collector/kernel_hung_linux.go new file mode 100644 index 0000000000..606c48567f --- /dev/null +++ b/collector/kernel_hung_linux.go @@ -0,0 +1,62 @@ +// Copyright 2018 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noprocesses +// +build !noprocesses + +package collector + +import ( + "fmt" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs" +) + +type kernelHungCollector struct { + fs procfs.FS + taskDetectCount *prometheus.Desc + logger *slog.Logger +} + +func init() { + registerCollector("kernel_hung", defaultDisabled, NewKernelHungCollector) +} + +func NewKernelHungCollector(logger *slog.Logger) (Collector, error) { + fs, err := procfs.NewFS(*procPath) + if err != nil { + return nil, fmt.Errorf("failed to open procfs: %w", err) + } + return &kernelHungCollector{ + fs: fs, + taskDetectCount: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "kernel_hung", "task_detect_count"), + "Total number of interrupts serviced.", + nil, nil, + ), + logger: logger, + }, nil +} + +func (c *kernelHungCollector) Update(ch chan<- prometheus.Metric) error { + kernelHung, err := c.fs.KernelHung() + if err != nil { + return err + } + + ch <- prometheus.MustNewConstMetric(c.taskDetectCount, prometheus.CounterValue, float64(*kernelHung.HungTaskDetectCount)) + + return nil +}