Skip to content

Commit 541f737

Browse files
ShashwatHiregoudarYour Name
authored andcommitted
improving the nvme exporter
Signed-off-by: GitHub <[email protected]> Signed-off-by: Your Name <[email protected]>
1 parent 2179f0a commit 541f737

File tree

4 files changed

+149
-14
lines changed

4 files changed

+149
-14
lines changed

collector/fixtures/e2e-64k-page-output.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2825,7 +2825,7 @@ node_nfsd_server_rpcs_total 18628
28252825
node_nfsd_server_threads 8
28262826
# HELP node_nvme_info Non-numeric data from /sys/class/nvme/<device>, value is always 1.
28272827
# TYPE node_nvme_info gauge
2828-
node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1
2828+
node_nvme_info{cntlid="1997",device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1
28292829
# HELP node_os_info A metric with a constant '1' value labeled by build_id, id, id_like, image_id, image_version, name, pretty_name, variant, variant_id, version, version_codename, version_id.
28302830
# TYPE node_os_info gauge
28312831
node_os_info{build_id="",id="ubuntu",id_like="debian",image_id="",image_version="",name="Ubuntu",pretty_name="Ubuntu 20.04.2 LTS",variant="",variant_id="",version="20.04.2 LTS (Focal Fossa)",version_codename="focal",version_id="20.04"} 1

collector/fixtures/e2e-output.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2847,7 +2847,7 @@ node_nfsd_server_rpcs_total 18628
28472847
node_nfsd_server_threads 8
28482848
# HELP node_nvme_info Non-numeric data from /sys/class/nvme/<device>, value is always 1.
28492849
# TYPE node_nvme_info gauge
2850-
node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1
2850+
node_nvme_info{cntlid="1997",device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1
28512851
# HELP node_os_info A metric with a constant '1' value labeled by build_id, id, id_like, image_id, image_version, name, pretty_name, variant, variant_id, version, version_codename, version_id.
28522852
# TYPE node_os_info gauge
28532853
node_os_info{build_id="",id="ubuntu",id_like="debian",image_id="",image_version="",name="Ubuntu",pretty_name="Ubuntu 20.04.2 LTS",variant="",variant_id="",version="20.04.2 LTS (Focal Fossa)",version_codename="focal",version_id="20.04"} 1

collector/fixtures/sys.ttar

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,11 @@ Lines: 1
21792179
1B2QEXP7
21802180
Mode: 444
21812181
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2182+
Path: sys/class/nvme/nvme0/cntlid
2183+
Lines: 1
2184+
1997
2185+
Mode: 666
2186+
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21822187
Path: sys/class/nvme/nvme0/model
21832188
Lines: 1
21842189
Samsung SSD 970 PRO 512GB

collector/nvme_linux.go

Lines changed: 142 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,26 @@ package collector
1919
import (
2020
"errors"
2121
"fmt"
22-
"log/slog"
2322
"os"
23+
"path/filepath"
24+
"regexp"
25+
"strconv"
26+
"strings"
2427

2528
"github.com/prometheus/client_golang/prometheus"
2629
"github.com/prometheus/procfs/sysfs"
30+
"log/slog"
2731
)
2832

2933
type nvmeCollector struct {
30-
fs sysfs.FS
31-
logger *slog.Logger
34+
fs sysfs.FS
35+
logger *slog.Logger
36+
namespaceInfo *prometheus.Desc
37+
namespaceCapacityBytes *prometheus.Desc
38+
namespaceSizeBytes *prometheus.Desc
39+
namespaceUsedBytes *prometheus.Desc
40+
namespaceLogicalBlockSizeBytes *prometheus.Desc
41+
info *prometheus.Desc
3242
}
3343

3444
func init() {
@@ -42,9 +52,51 @@ func NewNVMeCollector(logger *slog.Logger) (Collector, error) {
4252
return nil, fmt.Errorf("failed to open sysfs: %w", err)
4353
}
4454

55+
info := prometheus.NewDesc(
56+
prometheus.BuildFQName(namespace, "nvme", "info"),
57+
"Non-numeric data from /sys/class/nvme/<device>, value is always 1.",
58+
[]string{"device", "firmware_revision", "model", "serial", "state", "cntlid"},
59+
nil,
60+
)
61+
namespaceInfo := prometheus.NewDesc(
62+
prometheus.BuildFQName(namespace, "nvme", "namespace_info"),
63+
"Information about NVMe namespaces. Value is always 1",
64+
[]string{"device", "nsid", "ana_state"}, nil,
65+
)
66+
67+
namespaceCapacityBytes := prometheus.NewDesc(
68+
prometheus.BuildFQName(namespace, "nvme", "namespace_capacity_bytes"),
69+
"Capacity of the NVMe namespace in bytes. Computed as namespace_size * namespace_logical_block_size",
70+
[]string{"device", "nsid"}, nil,
71+
)
72+
73+
namespaceSizeBytes := prometheus.NewDesc(
74+
prometheus.BuildFQName(namespace, "nvme", "namespace_size_bytes"),
75+
"Size of the NVMe namespace in bytes. Available in /sys/class/nvme/<device>/<namespace>/size",
76+
[]string{"device", "nsid"}, nil,
77+
)
78+
79+
namespaceUsedBytes := prometheus.NewDesc(
80+
prometheus.BuildFQName(namespace, "nvme", "namespace_used_bytes"),
81+
"Used space of the NVMe namespace in bytes. Available in /sys/class/nvme/<device>/<namespace>/nuse",
82+
[]string{"device", "nsid"}, nil,
83+
)
84+
85+
namespaceLogicalBlockSizeBytes := prometheus.NewDesc(
86+
prometheus.BuildFQName(namespace, "nvme", "namespace_logical_block_size_bytes"),
87+
"Logical block size of the NVMe namespace in bytes. Usually 4Kb. Available in /sys/class/nvme/<device>/<namespace>/queue/logical_block_size",
88+
[]string{"device", "nsid"}, nil,
89+
)
90+
4591
return &nvmeCollector{
46-
fs: fs,
47-
logger: logger,
92+
fs: fs,
93+
logger: logger,
94+
namespaceInfo: namespaceInfo,
95+
namespaceCapacityBytes: namespaceCapacityBytes,
96+
namespaceSizeBytes: namespaceSizeBytes,
97+
namespaceUsedBytes: namespaceUsedBytes,
98+
namespaceLogicalBlockSizeBytes: namespaceLogicalBlockSizeBytes,
99+
info: info,
48100
}, nil
49101
}
50102

@@ -59,14 +111,92 @@ func (c *nvmeCollector) Update(ch chan<- prometheus.Metric) error {
59111
}
60112

61113
for _, device := range devices {
62-
infoDesc := prometheus.NewDesc(
63-
prometheus.BuildFQName(namespace, "nvme", "info"),
64-
"Non-numeric data from /sys/class/nvme/<device>, value is always 1.",
65-
[]string{"device", "firmware_revision", "model", "serial", "state"},
66-
nil,
67-
)
68114
infoValue := 1.0
69-
ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State)
115+
116+
devicePath := filepath.Join(*sysPath, "class/nvme", device.Name)
117+
cntlid, err := readUintFromFile(filepath.Join(devicePath, "cntlid"))
118+
if err != nil {
119+
c.logger.Debug("failed to read cntlid", "device", device.Name, "err", err)
120+
}
121+
ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State, strconv.FormatUint(cntlid, 10))
122+
// Find namespace directories.
123+
namespacePaths, err := filepath.Glob(filepath.Join(devicePath, "nvme[0-9]*c[0-9]*n[0-9]*"))
124+
if err != nil {
125+
c.logger.Error("failed to list NVMe namespaces", "device", device.Name, "err", err)
126+
continue
127+
}
128+
re := regexp.MustCompile(`nvme[0-9]+c[0-9]+n([0-9]+)`)
129+
130+
for _, namespacePath := range namespacePaths {
131+
132+
// Read namespace data.
133+
match := re.FindStringSubmatch(filepath.Base(namespacePath))
134+
if len(match) == 0 {
135+
continue
136+
}
137+
nsid := match[1]
138+
nuse, err := readUintFromFile(filepath.Join(namespacePath, "nuse"))
139+
if err != nil {
140+
c.logger.Debug("failed to read nuse", "device", device.Name, "namespace", match[0], "err", err)
141+
}
142+
nsze, err := readUintFromFile(filepath.Join(namespacePath, "size"))
143+
if err != nil {
144+
c.logger.Debug("failed to read size", "device", device.Name, "namespace", match[0], "err", err)
145+
}
146+
lbaSize, err := readUintFromFile(filepath.Join(namespacePath, "queue", "logical_block_size"))
147+
if err != nil {
148+
c.logger.Debug("failed to read queue/logical_block_size", "device", device.Name, "namespace", match[0], "err", err)
149+
}
150+
ncap := nsze * lbaSize
151+
anaState := "unknown"
152+
anaStateSysfs, err := os.ReadFile(filepath.Join(namespacePath, "ana_state"))
153+
if err == nil {
154+
anaState = strings.TrimSpace(string(anaStateSysfs))
155+
} else {
156+
c.logger.Debug("failed to read ana_state", "device", device.Name, "namespace", match[0], "err", err)
157+
}
158+
159+
ch <- prometheus.MustNewConstMetric(
160+
c.namespaceInfo,
161+
prometheus.GaugeValue,
162+
1.0,
163+
device.Name,
164+
nsid,
165+
anaState,
166+
)
167+
168+
ch <- prometheus.MustNewConstMetric(
169+
c.namespaceCapacityBytes,
170+
prometheus.GaugeValue,
171+
float64(ncap),
172+
device.Name,
173+
nsid,
174+
)
175+
176+
ch <- prometheus.MustNewConstMetric(
177+
c.namespaceSizeBytes,
178+
prometheus.GaugeValue,
179+
float64(nsze),
180+
device.Name,
181+
nsid,
182+
)
183+
184+
ch <- prometheus.MustNewConstMetric(
185+
c.namespaceUsedBytes,
186+
prometheus.GaugeValue,
187+
float64(nuse*lbaSize),
188+
device.Name,
189+
nsid,
190+
)
191+
192+
ch <- prometheus.MustNewConstMetric(
193+
c.namespaceLogicalBlockSizeBytes,
194+
prometheus.GaugeValue,
195+
float64(lbaSize),
196+
device.Name,
197+
nsid,
198+
)
199+
}
70200
}
71201

72202
return nil

0 commit comments

Comments
 (0)