Skip to content

Add Server count metric #760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions e2e/metrics_assertions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,29 @@ func assertAgentKnownServerCount(expectedServerCount int) func(context.Context,
return ctx
}
}

// assertServerCountMetric asserts that the server count metric is set to the expected value.
func assertServerCountMetric(expectedServerCount int) func(context.Context, *testing.T, *envconf.Config) context.Context {
return func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
client := cfg.Client()

serverPods := &corev1.PodList{}
err := client.Resources().List(ctx, serverPods, resources.WithLabelSelector("k8s-app=konnectivity-server"))
if err != nil {
t.Fatalf("couldn't get server pods (label selector 'k8s-app=konnectivity-server'): %v", err)
}

for _, serverPod := range serverPods.Items {
serverCount, err := getMetricsGaugeValue(cfg.Client().RESTConfig(), serverPod.Namespace, serverPod.Name, 8095, "konnectivity_network_proxy_server_server_count")
if err != nil {
t.Fatalf("couldn't get server metric 'konnectivity_network_proxy_server_server_count' for pod %v: %v", serverPod.Name, err)
}

if serverCount != expectedServerCount {
t.Errorf("incorrect server count metric (want: %v, got: %v)", expectedServerCount, serverCount)
}
}

return ctx
}
}
2 changes: 2 additions & 0 deletions e2e/static_count_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ func TestSingleServer_SingleAgent_StaticCount(t *testing.T) {
feature = feature.Assess("konnectivity server has a connected client", assertServersAreConnected(1))
feature = feature.Assess("konnectivity agent is connected to a server", assertAgentsAreConnected(1))
feature = feature.Assess("agents correctly count 1 server", assertAgentKnownServerCount(1))
feature = feature.Assess("server count metric is set correctly", assertServerCountMetric(1))
feature = feature.Teardown(deleteDeployment(agentDeployment))
feature = feature.Teardown(deleteDeployment(serverDeployment))

Expand All @@ -113,6 +114,7 @@ func TestMultiServer_MultiAgent_StaticCount(t *testing.T) {
feature = feature.Assess("all servers connected to all clients", assertServersAreConnected(replicas))
feature = feature.Assess("all agents connected to all servers", assertAgentsAreConnected(replicas))
feature = feature.Assess("agents correctly count all servers", assertAgentKnownServerCount(replicas))
feature = feature.Assess("server count metric is set correctly", assertServerCountMetric(replicas))
feature = feature.Teardown(deleteDeployment(agentDeployment))
feature = feature.Teardown(deleteDeployment(serverDeployment))

Expand Down
16 changes: 16 additions & 0 deletions pkg/server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type ServerMetrics struct {
leaseDeletes *prometheus.CounterVec
leaseListLatencies *prometheus.HistogramVec
leaseLists *prometheus.CounterVec
serverCount prometheus.Gauge
}

// newServerMetrics create a new ServerMetrics, configured with default metric names.
Expand Down Expand Up @@ -209,6 +210,14 @@ func newServerMetrics() *ServerMetrics {
},
[]string{"http_status_code", "reason"},
)
serverCount := prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "server_count",
Help: "Number of proxy server instances in the cluster",
},
)
streamPackets := commonmetrics.MakeStreamPacketsTotalMetric(Namespace, Subsystem)
streamErrors := commonmetrics.MakeStreamErrorsTotalMetric(Namespace, Subsystem)
prometheus.MustRegister(endpointLatencies)
Expand All @@ -228,6 +237,7 @@ func newServerMetrics() *ServerMetrics {
prometheus.MustRegister(leaseDeletes)
prometheus.MustRegister(leaseListLatencies)
prometheus.MustRegister(leaseLists)
prometheus.MustRegister(serverCount)
return &ServerMetrics{
endpointLatencies: endpointLatencies,
frontendLatencies: frontendLatencies,
Expand All @@ -246,6 +256,7 @@ func newServerMetrics() *ServerMetrics {
leaseDeletes: leaseDeletes,
leaseListLatencies: leaseListLatencies,
leaseLists: leaseLists,
serverCount: serverCount,
}
}

Expand Down Expand Up @@ -315,6 +326,11 @@ func (s *ServerMetrics) SetEstablishedConnCount(count int) {
s.establishedConns.WithLabelValues().Set(float64(count))
}

// SetServerCount sets the number of proxy server instances in the cluster.
func (s *ServerMetrics) SetServerCount(count int) {
s.serverCount.Set(float64(count))
}

// FullRecvChannel retrieves the metric for counting full receive channels.
func (s *ServerMetrics) FullRecvChannel(serviceMethod string) prometheus.Gauge {
return s.fullRecvChannels.With(prometheus.Labels{"service_method": serviceMethod})
Expand Down
62 changes: 62 additions & 0 deletions pkg/server/metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
Copyright 2024 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
)

// TestServerCountMetric tests the server count metric.
func TestServerCountMetric(t *testing.T) {
// Reset metrics to ensure clean state
Metrics.Reset()

// Test setting server count
expectedCount := 3
Metrics.SetServerCount(expectedCount)

// Verify the metric value
actualCount := testutil.ToFloat64(Metrics.serverCount)
if actualCount != float64(expectedCount) {
t.Errorf("Expected server count %d, got %f", expectedCount, actualCount)
}

// Test updating server count
newCount := 5
Metrics.SetServerCount(newCount)
actualCount = testutil.ToFloat64(Metrics.serverCount)
if actualCount != float64(newCount) {
t.Errorf("Expected updated server count %d, got %f", newCount, actualCount)
}
}

// TestServerCountMetricRegistration tests the registration of the server count metric.
func TestServerCountMetricRegistration(t *testing.T) {
// Verify the metric is properly registered
registry := prometheus.NewRegistry()
registry.MustRegister(Metrics.serverCount)

// Set a value and check if it's accessible
Metrics.SetServerCount(2)
actualCount := testutil.ToFloat64(Metrics.serverCount)
if actualCount != 2.0 {
t.Errorf("Expected server count 2, got %f", actualCount)
}
}
2 changes: 2 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ func NewProxyServer(serverID string, proxyStrategies []proxystrategies.ProxyStra
}
}

metrics.Metrics.SetServerCount(serverCount)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe serverCount is the value passed in from the command line. https://github.com/kubernetes-sigs/apiserver-network-proxy/blob/v0.33.0/cmd/server/app/server.go#L141 is the only place I know its set.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, do you want me to move this SetServerCount to /cmd/server/app/server.go itself?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. I was more thinking that this is the API configured server count, not necessarily the actual server count. It does not actually tell you many servers are up, just how many we advertise to the agents. Even with that the agents themselves will periodically attempt to detect if there are more than advertised.

The server lease for instance (if used) has an actual count of the number of servers which are running.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah my bad, got your point now.

Since the agent already has the lease informer, should we add the server_count metric to the agent (i.e., the NewLeaseInformerWithMetrics) or create a new lease informer in the server pkg and use it in the server?"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on further investigation came across this, looks like the agent already exposes the known_server_count metric.

serverCount := prometheus.NewGauge(


return &ProxyServer{
established: make(map[string](map[int64]*ProxyClientConnection)),
PendingDial: NewPendingDialManager(),
Expand Down