process metrics linux: better performance + tests

jelmd · jelmd · commit add67120cf07 · 2026-02-12T02:23:46.000+01:00
rebased on master (v1.35.2 -&gt; v1.40.2)
diff --git a/process_metrics_linux.go b/process_metrics_linux.go
diff --git a/process_metrics_linux_test.go b/process_metrics_linux_test.go
@@ -1,51 +1,108 @@
 package metrics
 
-import "testing"
-
-func TestGetMaxFilesLimit(t *testing.T) {
-	f := func(want uint64, path string, wantErr bool) {
-		t.Helper()
-		got, err := getMaxFilesLimit(path)
-		if err != nil && !wantErr {
-			t.Fatalf("unexpected error: %v", err)
-		}
-		if got != want {
-			t.Fatalf("unexpected result: %d, want: %d at getMaxFilesLimit", got, want)
-		}
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"regexp"
+	"strings"
+	"testing"
+)
 
+var testdir string
+
+func init() {
+	testdir, _ = os.Getwd()
+	testdir += "/testdata/"
+}
+
+func getTestData(filename string, t *testing.T) string {
+	data, err := os.ReadFile(testdir + filename)
+	if err != nil {
+		t.Fatalf("%v", err)
+	}
+	s := string(data)
+	if filename == "linux.proc_metrics.out" {
+		// since linux stat.starttime is relative to boot, we need to adjust
+		// the expected results regarding this.
+		m := regexp.MustCompile("process_start_time_seconds [0-9]+")
+		n := fmt.Sprintf("process_start_time_seconds %d", startTimeSeconds)
+		return m.ReplaceAllString(s, n)
 	}
-	f(1024, "testdata/limits", false)
-	f(0, "testdata/bad_path", true)
-	f(0, "testdata/limits_bad", true)
+	return s
 }
 
-func TestGetOpenFDsCount(t *testing.T) {
-	f := func(want uint64, path string, wantErr bool) {
-		t.Helper()
-		got, err := getOpenFDsCount(path)
-		if (err != nil && !wantErr) || (err == nil && wantErr) {
-			t.Fatalf("unexpected error: %v", err)
-		}
-		if got != want {
-			t.Fatalf("unexpected result: %d, want: %d at getOpenFDsCount", got, want)
+func stripComments(input string) string {
+	var builder strings.Builder
+	lines := strings.Split(input, "\n")
+	for _, line := range lines {
+		s := strings.TrimSpace(line)
+		if strings.HasPrefix(s, "#") || s == "" {
+			continue
 		}
+		builder.WriteString(line + "\n")
 	}
-	f(5, "testdata/fd/", false)
-	f(0, "testdata/fd/0", true)
-	f(0, "testdata/limits", true)
+	return builder.String()
 }
 
-func TestGetMemStats(t *testing.T) {
-	f := func(want memStats, path string, wantErr bool) {
-		t.Helper()
-		got, err := getMemStats(path)
-		if (err != nil && !wantErr) || (err == nil && wantErr) {
-			t.Fatalf("unexpected error: %v", err)
-		}
-		if got != nil && *got != want {
-			t.Fatalf("unexpected result: %d, want: %d at getMemStats", *got, want)
+func Test_processMetrics(t *testing.T) {
+	diffFormat := "Test %s:\n\tgot:\n'%v'\n\twant:\n'%v'"
+	tests := []struct {
+		name  string
+		wantW string
+		fn    func(w io.Writer)
+	}{
+		{"pm", getTestData("linux.proc_metrics.out", t), writeProcessMetrics},
+		{"fdm", getTestData("linux.fd_metrics.out", t), writeFDMetrics},
+	}
+	for _, compact := range []bool{true, false} {
+		ExposeMetadata(!compact)
+		for _, tt := range tests {
+			want := tt.wantW
+			if compact {
+				want = stripComments(want)
+			}
+			t.Run(tt.name, func(t *testing.T) {
+				w := &bytes.Buffer{}
+				tt.fn(w)
+				if gotW := w.String(); gotW != want {
+					t.Errorf(diffFormat, tt.name, gotW, want)
+				}
+			})
 		}
 	}
-	f(memStats{vmPeak: 2130489344, rssPeak: 200679424, rssAnon: 121602048, rssFile: 11362304}, "testdata/status", false)
-	f(memStats{}, "testdata/status_bad", true)
+
+	// missing /proc/<pid>/io file - just omit the process_io_* metric entries
+	// see https://github.com/VictoriaMetrics/metrics/issues/42
+	tt := tests[0]
+	want := stripComments(tt.wantW)
+	m := regexp.MustCompile("process_io_[_a-z]+ [0-9]+\n")
+	wantW := m.ReplaceAllString(want, "")
+	testfiles[FD_IO] = "/doesNotExist"
+	ExposeMetadata(false) // no need to check comments again
+	init2()
+	t.Run(tt.name, func(t *testing.T) {
+		w := &bytes.Buffer{}
+		tt.fn(w)
+		if gotW := w.String(); gotW != wantW {
+			t.Errorf(diffFormat, tt.name, gotW, wantW)
+		}
+	})
+
+	// bad limits: just omit the process_max_fds metric entry
+	tt = tests[1]
+	want = stripComments(tt.wantW)
+	m = regexp.MustCompile("process_max_fds [0-9]+\n")
+	wantW = m.ReplaceAllString(want, "")
+	testfiles[FD_LIMITS] = "/limits_bad"
+	init2()
+	t.Run(tt.name, func(t *testing.T) {
+		w := &bytes.Buffer{}
+		tt.fn(w)
+		if gotW := w.String(); gotW != wantW {
+			t.Errorf(diffFormat, tt.name, gotW, wantW)
+		}
+	})
+
 }
diff --git a/testdata/limits b/testdata/limits
diff --git a/testdata/linux.fd_metrics.out b/testdata/linux.fd_metrics.out
@@ -0,0 +1,6 @@
+# HELP process_max_fds
+# TYPE process_max_fds gauge
+process_max_fds 2048
+# HELP process_open_fds
+# TYPE process_open_fds gauge
+process_open_fds 5
diff --git a/testdata/linux.proc_metrics.out b/testdata/linux.proc_metrics.out
@@ -0,0 +1,63 @@
+# HELP process_io_read_bytes
+# TYPE process_io_read_bytes gauge
+process_io_read_bytes 0
+# HELP process_io_written_bytes
+# TYPE process_io_written_bytes gauge
+process_io_written_bytes 0
+# HELP process_io_read_syscalls
+# TYPE process_io_read_syscalls gauge
+process_io_read_syscalls 0
+# HELP process_io_write_syscalls
+# TYPE process_io_write_syscalls gauge
+process_io_write_syscalls 0
+# HELP process_io_storage_read_bytes
+# TYPE process_io_storage_read_bytes gauge
+process_io_storage_read_bytes 0
+# HELP process_io_storage_written_bytes
+# TYPE process_io_storage_written_bytes gauge
+process_io_storage_written_bytes 0
+# HELP process_psi_cpu_some_us
+# TYPE process_psi_cpu_some_us counter
+process_psi_cpu_some_us 8655568
+# HELP process_psi_cpu_full_us
+# TYPE process_psi_cpu_full_us counter
+process_psi_cpu_full_us 8558482
+# HELP process_psi_io_some_us
+# TYPE process_psi_io_some_us counter
+process_psi_io_some_us 10791243
+# HELP process_psi_io_full_us
+# TYPE process_psi_io_full_us counter
+process_psi_io_full_us 10763684
+# HELP process_psi_memory_some_us
+# TYPE process_psi_memory_some_us counter
+process_psi_memory_some_us 0
+# HELP process_psi_memory_full_us
+# TYPE process_psi_memory_full_us counter
+process_psi_memory_full_us 0
+# HELP process_system_cpu_seconds
+# TYPE process_system_cpu_seconds counter
+process_system_cpu_seconds 0.08
+# HELP process_total_cpu_seconds
+# TYPE process_total_cpu_seconds counter
+process_total_cpu_seconds 0.18
+# HELP process_user_cpu_seconds
+# TYPE process_user_cpu_seconds counter
+process_user_cpu_seconds 0.1
+# HELP process_major_pagefaults
+# TYPE process_major_pagefaults counter
+process_major_pagefaults 0
+# HELP process_minor_pagefaults
+# TYPE process_minor_pagefaults counter
+process_minor_pagefaults 3111
+# HELP process_num_threads
+# TYPE process_num_threads gauge
+process_num_threads 1
+# HELP process_resident_memory_bytes
+# TYPE process_resident_memory_bytes gauge
+process_resident_memory_bytes 4231168
+# HELP process_start_time_seconds
+# TYPE process_start_time_seconds gauge
+process_start_time_seconds 1742515804
+# HELP process_virtual_memory_bytes
+# TYPE process_virtual_memory_bytes gauge
+process_virtual_memory_bytes 25751552
diff --git a/testdata/linux.ps_io b/testdata/linux.ps_io
@@ -0,0 +1,57 @@
+Name:	tcsh
+Umask:	0022
+State:	S (sleeping)
+Tgid:	847024
+Ngid:	0
+Pid:	847024
+PPid:	847023
+TracerPid:	0
+Uid:	6018	6018	6018	6018
+Gid:	1502	1502	1502	1502
+FDSize:	64
+Groups:	14 1501 1502 1504 1510 1520 1530 
+NStgid:	847024
+NSpid:	847024
+NSpgid:	847024
+NSsid:	847024
+VmPeak:	   25148 kB
+VmSize:	   25148 kB
+VmLck:	       0 kB
+VmPin:	       0 kB
+VmHWM:	    4132 kB
+VmRSS:	    4132 kB
+RssAnon:	    1588 kB
+RssFile:	    2544 kB
+RssShmem:	       0 kB
+VmData:	    2584 kB
+VmStk:	     132 kB
+VmExe:	     304 kB
+VmLib:	    1972 kB
+VmPTE:	      68 kB
+VmSwap:	       0 kB
+HugetlbPages:	       0 kB
+CoreDumping:	0
+THP_enabled:	1
+Threads:	1
+SigQ:	0/254739
+SigPnd:	0000000000000000
+ShdPnd:	0000000000000000
+SigBlk:	0000000000000002
+SigIgn:	0000000000384004
+SigCgt:	0000000009812003
+CapInh:	0000000000000000
+CapPrm:	0000000000000000
+CapEff:	0000000000000000
+CapBnd:	000001ffffffffff
+CapAmb:	0000000000000000
+NoNewPrivs:	0
+Seccomp:	0
+Seccomp_filters:	0
+Speculation_Store_Bypass:	thread vulnerable
+SpeculationIndirectBranch:	conditional enabled
+Cpus_allowed:	ffffff
+Cpus_allowed_list:	0-23
+Mems_allowed:	00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
+Mems_allowed_list:	0
+voluntary_ctxt_switches:	821
+nonvoluntary_ctxt_switches:	6
diff --git a/testdata/linux.ps_limits b/testdata/linux.ps_limits
@@ -0,0 +1,17 @@
+Limit                     Soft Limit           Hard Limit           Units     
+Max cpu time              unlimited            unlimited            seconds   
+Max file size             unlimited            unlimited            bytes     
+Max data size             unlimited            unlimited            bytes     
+Max stack size            8388608              unlimited            bytes     
+Max core file size        unlimited            unlimited            bytes     
+Max resident set          unlimited            unlimited            bytes     
+Max processes             254739               254739               processes 
+Max open files            2048                 1048576              files     
+Max locked memory         8351289344           8351289344           bytes     
+Max address space         unlimited            unlimited            bytes     
+Max file locks            unlimited            unlimited            locks     
+Max pending signals       254739               254739               signals   
+Max msgqueue size         819200               819200               bytes     
+Max nice priority         0                    0                    
+Max realtime priority     0                    0                    
+Max realtime timeout      unlimited            unlimited            us        
diff --git a/testdata/linux.ps_psi_cpu b/testdata/linux.ps_psi_cpu
@@ -0,0 +1,2 @@
+some avg10=0.00 avg60=0.00 avg300=0.00 total=8655568
+full avg10=0.00 avg60=0.00 avg300=0.00 total=8558482
diff --git a/testdata/linux.ps_psi_io b/testdata/linux.ps_psi_io
@@ -0,0 +1,2 @@
+some avg10=0.00 avg60=0.00 avg300=0.00 total=10791243
+full avg10=0.00 avg60=0.00 avg300=0.00 total=10763684
diff --git a/testdata/linux.ps_psi_mem b/testdata/linux.ps_psi_mem
@@ -0,0 +1,2 @@
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+full avg10=0.00 avg60=0.00 avg300=0.00 total=0
diff --git a/testdata/linux.ps_stat b/testdata/linux.ps_stat
@@ -0,0 +1 @@
+847024 (tcsh) S 847023 847024 847024 34820 946306 4194304 3111 8084 0 5 10 8 13 10 20 0 1 0 70598402 25751552 1033 18446744073709551615 94248001904640 94248002214501 140727550087072 0 0 0 2 3686404 159457283 1 0 0 17 14 0 0 0 0 0 94248002292016 94248002313406 94249019904000 140727550090699 140727550090705 140727550090705 140727550091246 0
diff --git a/testdata/linux.ps_status b/testdata/linux.ps_status
@@ -0,0 +1,57 @@
+Name:	tcsh
+Umask:	0022
+State:	S (sleeping)
+Tgid:	847024
+Ngid:	0
+Pid:	847024
+PPid:	847023
+TracerPid:	0
+Uid:	6018	6018	6018	6018
+Gid:	1502	1502	1502	1502
+FDSize:	64
+Groups:	14 1501 1502 1504 1510 1520 1530 
+NStgid:	847024
+NSpid:	847024
+NSpgid:	847024
+NSsid:	847024
+VmPeak:	   25148 kB
+VmSize:	   25148 kB
+VmLck:	       0 kB
+VmPin:	       0 kB
+VmHWM:	    4132 kB
+VmRSS:	    4132 kB
+RssAnon:	    1588 kB
+RssFile:	    2544 kB
+RssShmem:	       0 kB
+VmData:	    2584 kB
+VmStk:	     132 kB
+VmExe:	     304 kB
+VmLib:	    1972 kB
+VmPTE:	      68 kB
+VmSwap:	       0 kB
+HugetlbPages:	       0 kB
+CoreDumping:	0
+THP_enabled:	1
+Threads:	1
+SigQ:	0/254739
+SigPnd:	0000000000000000
+ShdPnd:	0000000000000000
+SigBlk:	0000000000000002
+SigIgn:	0000000000384004
+SigCgt:	0000000009812003
+CapInh:	0000000000000000
+CapPrm:	0000000000000000
+CapEff:	0000000000000000
+CapBnd:	000001ffffffffff
+CapAmb:	0000000000000000
+NoNewPrivs:	0
+Seccomp:	0
+Seccomp_filters:	0
+Speculation_Store_Bypass:	thread vulnerable
+SpeculationIndirectBranch:	conditional enabled
+Cpus_allowed:	ffffff
+Cpus_allowed_list:	0-23
+Mems_allowed:	00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
+Mems_allowed_list:	0
+voluntary_ctxt_switches:	810
+nonvoluntary_ctxt_switches:	6

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+some avg10=0.00 avg60=0.00 avg300=0.00 total=8655568`
	`2`	`+full avg10=0.00 avg60=0.00 avg300=0.00 total=8558482`