Skip to content

Commit 73fd6de

Browse files
author
Samu
authored
Enrich block device metrics with udev tags (#627)
1 parent 2003317 commit 73fd6de

File tree

4 files changed

+139
-19
lines changed

4 files changed

+139
-19
lines changed

cmd/agent/daemon/pipeline/controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ func (m *mockCloudVolumeMetricsWriter) Write(metrics ...CloudVolumeMetric) error
13821382

13831383
type mockStorageInfoProvider struct{}
13841384

1385-
func (m *mockStorageInfoProvider) BuildFilesystemMetrics(ctx context.Context, timestamp time.Time) ([]FilesystemMetric, error) {
1385+
func (m *mockStorageInfoProvider) CollectFilesystemMetrics(ctx context.Context, timestamp time.Time) ([]FilesystemMetric, error) {
13861386
return []FilesystemMetric{
13871387
{
13881388
NodeName: "test-node",
@@ -1396,7 +1396,7 @@ func (m *mockStorageInfoProvider) BuildFilesystemMetrics(ctx context.Context, ti
13961396
}, nil
13971397
}
13981398

1399-
func (m *mockStorageInfoProvider) BuildBlockDeviceMetrics(timestamp time.Time) ([]BlockDeviceMetric, error) {
1399+
func (m *mockStorageInfoProvider) CollectBlockDeviceMetrics(timestamp time.Time) ([]BlockDeviceMetric, error) {
14001400
return []BlockDeviceMetric{
14011401
{
14021402
Name: "sda",

cmd/agent/daemon/pipeline/storage_info_provider.go

Lines changed: 85 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,17 @@ const hostPathRoot = "/proc/1/root"
2828

2929
// BlockDeviceMetric represents enhanced block device metrics with accurate sector sizes
3030
type BlockDeviceMetric struct {
31-
Name string `avro:"name"`
32-
NodeName string `avro:"node_name"`
33-
NodeTemplate *string `avro:"node_template"`
34-
Path string `avro:"path"`
35-
SizeBytes *int64 `avro:"size_bytes"`
36-
DiskType string `avro:"disk_type"` // HDD, SSD
37-
PartitionOf string `avro:"partition_of"` // parent device for partitions
38-
Holders []string `avro:"holders"` // devices using this device
39-
IsVirtual bool `avro:"is_virtual"` // dm-* or md* devices
40-
RaidLevel string `avro:"raid_level"` // raid0, raid1, raid5, etc
31+
Name string `avro:"name"`
32+
NodeName string `avro:"node_name"`
33+
NodeTemplate *string `avro:"node_template"`
34+
Path string `avro:"path"`
35+
SizeBytes *int64 `avro:"size_bytes"`
36+
DiskType string `avro:"disk_type"` // HDD, SSD
37+
PartitionOf string `avro:"partition_of"` // parent device for partitions
38+
Holders []string `avro:"holders"` // devices using this device
39+
IsVirtual bool `avro:"is_virtual"` // dm-* or md* devices
40+
RaidLevel string `avro:"raid_level"` // raid0, raid1, raid5, etc
41+
LVMInfo map[string]string `avro:"lvm_info"` // LVM metadata for this device
4142

4243
ReadIOPS float64 `avro:"read_iops"`
4344
WriteIOPS float64 `avro:"write_iops"`
@@ -136,8 +137,8 @@ type storageMetricsState struct {
136137
}
137138

138139
type StorageInfoProvider interface {
139-
BuildFilesystemMetrics(ctx context.Context, timestamp time.Time) ([]FilesystemMetric, error)
140-
BuildBlockDeviceMetrics(timestamp time.Time) ([]BlockDeviceMetric, error)
140+
CollectFilesystemMetrics(ctx context.Context, timestamp time.Time) ([]FilesystemMetric, error)
141+
CollectBlockDeviceMetrics(timestamp time.Time) ([]BlockDeviceMetric, error)
141142
CollectNodeStatsSummary(ctx context.Context) (*NodeStatsSummaryMetric, error)
142143
CollectPodVolumeMetrics(ctx context.Context) ([]K8sPodVolumeMetric, error)
143144
CollectCloudVolumeMetrics(ctx context.Context) ([]CloudVolumeMetric, error)
@@ -448,7 +449,7 @@ func (s *SysfsStorageInfoProvider) CollectCloudVolumeMetrics(ctx context.Context
448449
return metrics, nil
449450
}
450451

451-
func (s *SysfsStorageInfoProvider) BuildFilesystemMetrics(ctx context.Context, timestamp time.Time) ([]FilesystemMetric, error) {
452+
func (s *SysfsStorageInfoProvider) CollectFilesystemMetrics(ctx context.Context, timestamp time.Time) ([]FilesystemMetric, error) {
452453
// Read mount information from /proc/1/mountinfo
453454
mounts, err := readMountInfo("/proc/1/mountinfo")
454455
if err != nil {
@@ -621,7 +622,7 @@ func (s *SysfsStorageInfoProvider) getLVMDMDevice(device string) []string {
621622
}
622623
}
623624

624-
func (s *SysfsStorageInfoProvider) BuildBlockDeviceMetrics(timestamp time.Time) ([]BlockDeviceMetric, error) {
625+
func (s *SysfsStorageInfoProvider) CollectBlockDeviceMetrics(timestamp time.Time) ([]BlockDeviceMetric, error) {
625626
// Read stats from /proc/diskstats
626627
diskStats, err := readProcDiskStats()
627628
if err != nil {
@@ -662,6 +663,7 @@ func (s *SysfsStorageInfoProvider) buildBlockDeviceMetric(blockName string, stat
662663
holders := s.getHolders(blockName)
663664
raidLevel := s.getRaidLevel(blockName)
664665
logicalSectorSize := s.getLogicalSectorSize(blockName)
666+
lvmInfo := s.getLVMInfo(blockName)
665667

666668
diskSize, err := s.getDeviceSize(blockName)
667669
if err != nil {
@@ -684,6 +686,7 @@ func (s *SysfsStorageInfoProvider) buildBlockDeviceMetric(blockName string, stat
684686
Holders: holders,
685687
IsVirtual: isVirtualDevice(blockName),
686688
RaidLevel: raidLevel,
689+
LVMInfo: lvmInfo,
687690
Timestamp: timestamp,
688691
InFlightRequests: safeUint64ToInt64(stats.InFlight),
689692

@@ -930,6 +933,74 @@ func (s *SysfsStorageInfoProvider) getHolders(deviceName string) []string {
930933
return holders
931934
}
932935

936+
// getLVMInfo returns LVM metadata (dm_name, lv_name, vg_name) for device-mapper devices
937+
func (s *SysfsStorageInfoProvider) getLVMInfo(deviceName string) map[string]string {
938+
if !strings.HasPrefix(deviceName, "dm-") {
939+
return nil
940+
}
941+
942+
requiredTags := map[string]string{
943+
"DM_NAME": "dm_name",
944+
"DM_LV_NAME": "lv_name",
945+
"DM_VG_NAME": "vg_name",
946+
}
947+
948+
// Read device major:minor from /sys/block/<device>/dev
949+
devPath := filepath.Join(s.sysBlockPrefix, "sys", "block", deviceName, "dev")
950+
devData, err := os.ReadFile(devPath)
951+
if err != nil {
952+
return nil
953+
}
954+
955+
majorMinor := strings.TrimSpace(string(devData))
956+
if majorMinor == "" {
957+
return nil
958+
}
959+
960+
// Open udev database file from host: /proc/1/root/run/udev/data/b<major>:<minor>
961+
udevDBPath := filepath.Join(s.hostRootPath, "run", "udev", "data", "b"+majorMinor)
962+
udevDBFile, err := os.Open(udevDBPath)
963+
if err != nil {
964+
return nil
965+
}
966+
defer udevDBFile.Close()
967+
968+
tags := make(map[string]string)
969+
970+
scanner := bufio.NewScanner(udevDBFile)
971+
for scanner.Scan() {
972+
line := scanner.Text()
973+
974+
// Only process device property lines (E:)
975+
if !strings.HasPrefix(line, "E:") {
976+
continue
977+
}
978+
property := strings.TrimPrefix(line, "E:")
979+
980+
parts := strings.SplitN(property, "=", 2)
981+
if len(parts) != 2 {
982+
continue
983+
}
984+
985+
key, value := parts[0], parts[1]
986+
if name, ok := requiredTags[key]; ok && value != "" {
987+
tags[name] = value
988+
}
989+
990+
// Exit early if desired tags are found
991+
if len(tags) == len(requiredTags) {
992+
break
993+
}
994+
}
995+
996+
if err := scanner.Err(); err != nil {
997+
s.log.Errorf("failed to scan udev database for device %s: %v", deviceName, err)
998+
return nil
999+
}
1000+
1001+
return tags
1002+
}
1003+
9331004
func safeUint64ToInt64(val uint64) int64 {
9341005
if val > math.MaxInt64 {
9351006
return math.MaxInt64

cmd/agent/daemon/pipeline/storage_info_provider_test.go

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package pipeline
22

33
import (
44
"os"
5+
"path/filepath"
56
"strconv"
67
"strings"
78
"testing"
@@ -389,7 +390,7 @@ func TestReadProcDiskStats(t *testing.T) {
389390
}
390391
}
391392

392-
func TestBuildBlockDeviceMetrics(t *testing.T) {
393+
func TestCollectBlockDeviceMetrics(t *testing.T) {
393394
diskStatsData := ` 8 0 sda 1000 100 50000 2000 500 50 25000 1000 0 1500 3000
394395
8 1 sda1 800 80 40000 1600 400 40 20000 800 0 1200 2400`
395396

@@ -487,6 +488,54 @@ func TestBuildBlockDeviceMetrics(t *testing.T) {
487488
// NodeTemplate will be nil since kubeClient is nil in this test
488489
}
489490

491+
func TestCollectBlockDeviceMetricsWithLVM(t *testing.T) {
492+
r := require.New(t)
493+
tmpDir := t.TempDir()
494+
495+
// Create sysfs structure for dm-0
496+
dm0Path := filepath.Join(tmpDir, "sys/block/dm-0")
497+
err := os.MkdirAll(dm0Path, 0755)
498+
r.NoError(err)
499+
err = os.WriteFile(filepath.Join(dm0Path, "dev"), []byte("253:0\n"), 0644)
500+
r.NoError(err)
501+
502+
// Create udev database for dm-0 with LVM tags
503+
udevDataDir := filepath.Join(tmpDir, "proc/1/root/run/udev/data")
504+
err = os.MkdirAll(udevDataDir, 0755)
505+
r.NoError(err)
506+
507+
udevContent := `E:DM_NAME=vg-lv_data
508+
E:DM_VG_NAME=vg
509+
E:DM_LV_NAME=lv_data
510+
E:DM_UUID=LVM-abcd1234
511+
E:DEVNAME=/dev/dm-0
512+
E:SUBSYSTEM=block
513+
`
514+
err = os.WriteFile(filepath.Join(udevDataDir, "b253:0"), []byte(udevContent), 0644)
515+
r.NoError(err)
516+
517+
provider := &SysfsStorageInfoProvider{
518+
log: logging.NewTestLog(),
519+
nodeName: "test-node",
520+
kubeClient: nil, // Not needed for this test
521+
sysBlockPrefix: tmpDir,
522+
hostRootPath: filepath.Join(tmpDir, "proc/1/root"),
523+
storageState: &storageMetricsState{
524+
blockDevices: make(map[string]*BlockDeviceMetric),
525+
},
526+
}
527+
528+
now := time.Now().UTC()
529+
dm0Metric := provider.buildBlockDeviceMetric("dm-0", DiskStats{}, now)
530+
r.Equal("dm-0", dm0Metric.Name)
531+
532+
// LVM metadata is populated for dm-0
533+
r.Len(dm0Metric.LVMInfo, 3)
534+
r.Equal("vg-lv_data", dm0Metric.LVMInfo["dm_name"])
535+
r.Equal("vg", dm0Metric.LVMInfo["vg_name"])
536+
r.Equal("lv_data", dm0Metric.LVMInfo["lv_name"])
537+
}
538+
490539
func TestCalculateBlockDeviceRates(t *testing.T) {
491540
tests := []struct {
492541
name string

cmd/agent/daemon/pipeline/storage_pipeline.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func (c *Controller) processBlockDeviceMetrics(timestamp time.Time) error {
5555
return fmt.Errorf("block device metrics writer not initialized")
5656
}
5757

58-
blockMetrics, err := c.storageInfoProvider.BuildBlockDeviceMetrics(timestamp)
58+
blockMetrics, err := c.storageInfoProvider.CollectBlockDeviceMetrics(timestamp)
5959
if err != nil {
6060
return fmt.Errorf("failed to collect block device metrics: %w", err)
6161
}
@@ -74,7 +74,7 @@ func (c *Controller) processFilesystemMetrics(ctx context.Context, timestamp tim
7474
return fmt.Errorf("filesystem metrics writer not initialized")
7575
}
7676

77-
fsMetrics, err := c.storageInfoProvider.BuildFilesystemMetrics(ctx, timestamp)
77+
fsMetrics, err := c.storageInfoProvider.CollectFilesystemMetrics(ctx, timestamp)
7878
if err != nil {
7979
return fmt.Errorf("failed to collect filesystem metrics: %w", err)
8080
}

0 commit comments

Comments
 (0)