Skip to content

Commit 3257018

Browse files
committed
Wire in metrics
1 parent 89f9019 commit 3257018

File tree

2 files changed

+56
-0
lines changed

2 files changed

+56
-0
lines changed

pkg/agent/lease_counter.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@ package agent
22

33
import (
44
"context"
5+
"errors"
56
"time"
67

78
"k8s.io/apimachinery/pkg/labels"
89
"k8s.io/client-go/kubernetes"
910
"k8s.io/klog/v2"
11+
"sigs.k8s.io/apiserver-network-proxy/pkg/agent/metrics"
1012

1113
coordinationv1api "k8s.io/api/coordination/v1"
14+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1215
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1316
coordinationv1 "k8s.io/client-go/kubernetes/typed/coordination/v1"
1417
)
@@ -42,12 +45,28 @@ func NewServerLeaseCounter(k8sClient kubernetes.Interface, labelSelector labels.
4245
func (lc *ServerLeaseCounter) Count(ctx context.Context) int {
4346
// Since the number of proxy servers is generally small (1-10), we opted against
4447
// using a LIST and WATCH pattern and instead list all leases on each call.
48+
start := timeNow()
49+
defer func() {
50+
latency := timeNow().Sub(start)
51+
metrics.Metrics.ObserveLeaseListLatency(latency)
52+
}()
4553
leases, err := lc.leaseClient.List(ctx, metav1.ListOptions{LabelSelector: lc.selector.String()})
4654
if err != nil {
4755
klog.Errorf("could not list leases to update server count, using fallback count (%v): %v", lc.fallbackCount, err)
56+
57+
apiStatus, ok := err.(apierrors.APIStatus)
58+
if ok || errors.As(err, &apiStatus) {
59+
status := apiStatus.Status()
60+
metrics.Metrics.ObserveLeaseList(int(status.Code), string(status.Reason))
61+
} else {
62+
klog.Errorf("error could not be logged to metrics as it is not an APIStatus: %v", err)
63+
}
64+
4865
return lc.fallbackCount
4966
}
5067

68+
metrics.Metrics.ObserveLeaseList(200, "")
69+
5170
count := 0
5271
for _, lease := range leases.Items {
5372
if isLeaseValid(lease) {

pkg/agent/metrics/metrics.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package metrics
1818

1919
import (
20+
"strconv"
2021
"time"
2122

2223
"github.com/prometheus/client_golang/prometheus"
@@ -57,6 +58,8 @@ type AgentMetrics struct {
5758
endpointConnections *prometheus.GaugeVec
5859
streamPackets *prometheus.CounterVec
5960
streamErrors *prometheus.CounterVec
61+
leaseLists *prometheus.CounterVec
62+
leaseListLatencies *prometheus.HistogramVec
6063
}
6164

6265
// newAgentMetrics create a new AgentMetrics, configured with default metric names.
@@ -116,6 +119,25 @@ func newAgentMetrics() *AgentMetrics {
116119
},
117120
[]string{},
118121
)
122+
leaseLists := prometheus.NewCounterVec(
123+
prometheus.CounterOpts{
124+
Namespace: Namespace,
125+
Subsystem: Subsystem,
126+
Name: "lease_lists_total",
127+
Help: "Count of server lease list calls made by the agent to the k8s apiserver, labeled by HTTP response code and reason",
128+
},
129+
[]string{"http_response_code", "reason"},
130+
)
131+
leaseListLatencies := prometheus.NewHistogramVec(
132+
prometheus.HistogramOpts{
133+
Namespace: Namespace,
134+
Subsystem: Subsystem,
135+
Name: "lease_list_latency_seconds",
136+
Help: "Latency of server lease listing in seconds",
137+
Buckets: latencyBuckets,
138+
},
139+
[]string{},
140+
)
119141
streamPackets := commonmetrics.MakeStreamPacketsTotalMetric(Namespace, Subsystem)
120142
streamErrors := commonmetrics.MakeStreamErrorsTotalMetric(Namespace, Subsystem)
121143
prometheus.MustRegister(dialLatencies)
@@ -126,6 +148,8 @@ func newAgentMetrics() *AgentMetrics {
126148
prometheus.MustRegister(streamPackets)
127149
prometheus.MustRegister(streamErrors)
128150
prometheus.MustRegister(serverCount)
151+
prometheus.MustRegister(leaseLists)
152+
prometheus.MustRegister(leaseListLatencies)
129153
return &AgentMetrics{
130154
dialLatencies: dialLatencies,
131155
serverFailures: serverFailures,
@@ -135,6 +159,8 @@ func newAgentMetrics() *AgentMetrics {
135159
streamPackets: streamPackets,
136160
streamErrors: streamErrors,
137161
serverCount: serverCount,
162+
leaseLists: leaseLists,
163+
leaseListLatencies: leaseListLatencies,
138164
}
139165

140166
}
@@ -148,6 +174,9 @@ func (a *AgentMetrics) Reset() {
148174
a.endpointConnections.Reset()
149175
a.streamPackets.Reset()
150176
a.streamErrors.Reset()
177+
a.serverCount.Reset()
178+
a.leaseLists.Reset()
179+
a.leaseListLatencies.Reset()
151180
}
152181

153182
// ObserveServerFailure records a failure to send to or receive from the proxy
@@ -181,6 +210,14 @@ func (a *AgentMetrics) SetServerCount(count int) {
181210
a.serverCount.WithLabelValues().Set(float64(count))
182211
}
183212

213+
func (a *AgentMetrics) ObserveLeaseList(httpCode int, reason string) {
214+
a.leaseLists.WithLabelValues(strconv.Itoa(httpCode), reason).Inc()
215+
}
216+
217+
func (a *AgentMetrics) ObserveLeaseListLatency(latency time.Duration) {
218+
a.leaseListLatencies.WithLabelValues().Observe(latency.Seconds())
219+
}
220+
184221
// EndpointConnectionInc increments a new endpoint connection.
185222
func (a *AgentMetrics) EndpointConnectionInc() {
186223
a.endpointConnections.WithLabelValues().Inc()

0 commit comments

Comments
 (0)