diff --git a/e2e/metrics_assertions_test.go b/e2e/metrics_assertions_test.go index 7edb0ff6f..6c7cf99e1 100644 --- a/e2e/metrics_assertions_test.go +++ b/e2e/metrics_assertions_test.go @@ -138,3 +138,29 @@ func assertAgentKnownServerCount(expectedServerCount int) func(context.Context, return ctx } } + +// assertServerCountMetric asserts that the server count metric is set to the expected value. +func assertServerCountMetric(expectedServerCount int) func(context.Context, *testing.T, *envconf.Config) context.Context { + return func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { + client := cfg.Client() + + serverPods := &corev1.PodList{} + err := client.Resources().List(ctx, serverPods, resources.WithLabelSelector("k8s-app=konnectivity-server")) + if err != nil { + t.Fatalf("couldn't get server pods (label selector 'k8s-app=konnectivity-server'): %v", err) + } + + for _, serverPod := range serverPods.Items { + serverCount, err := getMetricsGaugeValue(cfg.Client().RESTConfig(), serverPod.Namespace, serverPod.Name, 8095, "konnectivity_network_proxy_server_server_count") + if err != nil { + t.Fatalf("couldn't get server metric 'konnectivity_network_proxy_server_server_count' for pod %v: %v", serverPod.Name, err) + } + + if serverCount != expectedServerCount { + t.Errorf("incorrect server count metric (want: %v, got: %v)", expectedServerCount, serverCount) + } + } + + return ctx + } +} diff --git a/e2e/static_count_test.go b/e2e/static_count_test.go index 7a3fe8c01..d30297d51 100644 --- a/e2e/static_count_test.go +++ b/e2e/static_count_test.go @@ -92,6 +92,7 @@ func TestSingleServer_SingleAgent_StaticCount(t *testing.T) { feature = feature.Assess("konnectivity server has a connected client", assertServersAreConnected(1)) feature = feature.Assess("konnectivity agent is connected to a server", assertAgentsAreConnected(1)) feature = feature.Assess("agents correctly count 1 server", assertAgentKnownServerCount(1)) + feature = feature.Assess("server count metric is set correctly", assertServerCountMetric(1)) feature = feature.Teardown(deleteDeployment(agentDeployment)) feature = feature.Teardown(deleteDeployment(serverDeployment)) @@ -113,6 +114,7 @@ func TestMultiServer_MultiAgent_StaticCount(t *testing.T) { feature = feature.Assess("all servers connected to all clients", assertServersAreConnected(replicas)) feature = feature.Assess("all agents connected to all servers", assertAgentsAreConnected(replicas)) feature = feature.Assess("agents correctly count all servers", assertAgentKnownServerCount(replicas)) + feature = feature.Assess("server count metric is set correctly", assertServerCountMetric(replicas)) feature = feature.Teardown(deleteDeployment(agentDeployment)) feature = feature.Teardown(deleteDeployment(serverDeployment)) diff --git a/pkg/server/metrics/metrics.go b/pkg/server/metrics/metrics.go index 02c350dfa..50ffbbd4f 100644 --- a/pkg/server/metrics/metrics.go +++ b/pkg/server/metrics/metrics.go @@ -64,6 +64,7 @@ type ServerMetrics struct { leaseDeletes *prometheus.CounterVec leaseListLatencies *prometheus.HistogramVec leaseLists *prometheus.CounterVec + serverCount prometheus.Gauge } // newServerMetrics create a new ServerMetrics, configured with default metric names. @@ -209,6 +210,14 @@ func newServerMetrics() *ServerMetrics { }, []string{"http_status_code", "reason"}, ) + serverCount := prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: Subsystem, + Name: "server_count", + Help: "Number of proxy server instances in the cluster", + }, + ) streamPackets := commonmetrics.MakeStreamPacketsTotalMetric(Namespace, Subsystem) streamErrors := commonmetrics.MakeStreamErrorsTotalMetric(Namespace, Subsystem) prometheus.MustRegister(endpointLatencies) @@ -228,6 +237,7 @@ func newServerMetrics() *ServerMetrics { prometheus.MustRegister(leaseDeletes) prometheus.MustRegister(leaseListLatencies) prometheus.MustRegister(leaseLists) + prometheus.MustRegister(serverCount) return &ServerMetrics{ endpointLatencies: endpointLatencies, frontendLatencies: frontendLatencies, @@ -246,6 +256,7 @@ func newServerMetrics() *ServerMetrics { leaseDeletes: leaseDeletes, leaseListLatencies: leaseListLatencies, leaseLists: leaseLists, + serverCount: serverCount, } } @@ -315,6 +326,11 @@ func (s *ServerMetrics) SetEstablishedConnCount(count int) { s.establishedConns.WithLabelValues().Set(float64(count)) } +// SetServerCount sets the number of proxy server instances in the cluster. +func (s *ServerMetrics) SetServerCount(count int) { + s.serverCount.Set(float64(count)) +} + // FullRecvChannel retrieves the metric for counting full receive channels. func (s *ServerMetrics) FullRecvChannel(serviceMethod string) prometheus.Gauge { return s.fullRecvChannels.With(prometheus.Labels{"service_method": serviceMethod}) diff --git a/pkg/server/metrics/metrics_test.go b/pkg/server/metrics/metrics_test.go new file mode 100644 index 000000000..e49c5fcac --- /dev/null +++ b/pkg/server/metrics/metrics_test.go @@ -0,0 +1,62 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" +) + +// TestServerCountMetric tests the server count metric. +func TestServerCountMetric(t *testing.T) { + // Reset metrics to ensure clean state + Metrics.Reset() + + // Test setting server count + expectedCount := 3 + Metrics.SetServerCount(expectedCount) + + // Verify the metric value + actualCount := testutil.ToFloat64(Metrics.serverCount) + if actualCount != float64(expectedCount) { + t.Errorf("Expected server count %d, got %f", expectedCount, actualCount) + } + + // Test updating server count + newCount := 5 + Metrics.SetServerCount(newCount) + actualCount = testutil.ToFloat64(Metrics.serverCount) + if actualCount != float64(newCount) { + t.Errorf("Expected updated server count %d, got %f", newCount, actualCount) + } +} + +// TestServerCountMetricRegistration tests the registration of the server count metric. +func TestServerCountMetricRegistration(t *testing.T) { + // Verify the metric is properly registered + registry := prometheus.NewRegistry() + registry.MustRegister(Metrics.serverCount) + + // Set a value and check if it's accessible + Metrics.SetServerCount(2) + actualCount := testutil.ToFloat64(Metrics.serverCount) + if actualCount != 2.0 { + t.Errorf("Expected server count 2, got %f", actualCount) + } +} diff --git a/pkg/server/server.go b/pkg/server/server.go index e609584f1..3eda8656a 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -389,6 +389,8 @@ func NewProxyServer(serverID string, proxyStrategies []proxystrategies.ProxyStra } } + metrics.Metrics.SetServerCount(serverCount) + return &ProxyServer{ established: make(map[string](map[int64]*ProxyClientConnection)), PendingDial: NewPendingDialManager(),