Skip to content

Commit 175cb5e

Browse files
committed
Add lease deletion HTTP call metrics
1 parent cbce404 commit 175cb5e

File tree

2 files changed

+77
-26
lines changed

2 files changed

+77
-26
lines changed

pkg/server/leases/gc_controller.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ package leases
1717

1818
import (
1919
"context"
20+
"errors"
2021
"time"
2122

22-
"k8s.io/apimachinery/pkg/api/errors"
23+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2324
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2425
"k8s.io/apimachinery/pkg/util/wait"
2526
"k8s.io/client-go/kubernetes"
@@ -66,13 +67,30 @@ func (c *GarbageCollectionController) gc(ctx context.Context) {
6667

6768
// Optimistic concurrency: if a lease has a different resourceVersion than
6869
// when we got it, it may have been renewed.
70+
start := time.Now()
6971
err := c.leaseInterface.Delete(ctx, lease.Name, *metav1.NewRVDeletionPrecondition(lease.ResourceVersion))
70-
if errors.IsNotFound(err) {
72+
if apierrors.IsNotFound(err) {
7173
klog.V(4).Infof("Lease %v was already deleted", lease.Name)
7274
} else if err != nil {
7375
klog.Errorf("Could not delete lease %v: %v", lease.Name, err)
7476
} else {
7577
metrics.Metrics.CulledLeasesInc()
7678
}
79+
80+
// Log metrics for the deletion call.
81+
latency := time.Now().Sub(start)
82+
if err != nil {
83+
var apiStatus apierrors.APIStatus
84+
if errors.As(err, &apiStatus) {
85+
status := apiStatus.Status()
86+
metrics.Metrics.ObserveLeaseDelete(int(status.Code), string(status.Reason))
87+
metrics.Metrics.ObserveLeaseDeleteLatency(int(status.Code), latency)
88+
} else {
89+
klog.Errorf("Lease delete error could not be logged to metrics as it is not an APIStatus: %v", err)
90+
}
91+
} else {
92+
metrics.Metrics.ObserveLeaseDelete(200, "")
93+
metrics.Metrics.ObserveLeaseDeleteLatency(200, latency)
94+
}
7795
}
7896
}

pkg/server/metrics/metrics.go

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package metrics
1818

1919
import (
20+
"strconv"
2021
"time"
2122

2223
"github.com/prometheus/client_golang/prometheus"
@@ -45,18 +46,20 @@ var (
4546

4647
// ServerMetrics includes all the metrics of the proxy server.
4748
type ServerMetrics struct {
48-
endpointLatencies *prometheus.HistogramVec
49-
frontendLatencies *prometheus.HistogramVec
50-
grpcConnections *prometheus.GaugeVec
51-
httpConnections prometheus.Gauge
52-
backend *prometheus.GaugeVec
53-
pendingDials *prometheus.GaugeVec
54-
establishedConns *prometheus.GaugeVec
55-
fullRecvChannels *prometheus.GaugeVec
56-
dialFailures *prometheus.CounterVec
57-
streamPackets *prometheus.CounterVec
58-
streamErrors *prometheus.CounterVec
59-
culledLeases prometheus.Counter
49+
endpointLatencies *prometheus.HistogramVec
50+
frontendLatencies *prometheus.HistogramVec
51+
grpcConnections *prometheus.GaugeVec
52+
httpConnections prometheus.Gauge
53+
backend *prometheus.GaugeVec
54+
pendingDials *prometheus.GaugeVec
55+
establishedConns *prometheus.GaugeVec
56+
fullRecvChannels *prometheus.GaugeVec
57+
dialFailures *prometheus.CounterVec
58+
streamPackets *prometheus.CounterVec
59+
streamErrors *prometheus.CounterVec
60+
culledLeases prometheus.Counter
61+
leaseDeleteLatencies *prometheus.HistogramVec
62+
leaseDeletes *prometheus.CounterVec
6063
}
6164

6265
// newServerMetrics create a new ServerMetrics, configured with default metric names.
@@ -155,6 +158,24 @@ func newServerMetrics() *ServerMetrics {
155158
Name: "culled_leases_count",
156159
Help: "Count of expired leases that the lease garbage collection controller has culled.",
157160
})
161+
leaseDeleteLatencies := prometheus.NewHistogramVec(
162+
prometheus.HistogramOpts{
163+
Namespace: Namespace,
164+
Subsystem: Subsystem,
165+
Name: "lease_delete_latency_seconds",
166+
Help: "Latency of lease deletion calls by the garbage collection controller in seconds.",
167+
},
168+
[]string{"http_status_code"},
169+
)
170+
leaseDeletes := prometheus.NewCounterVec(
171+
prometheus.CounterOpts{
172+
Namespace: Namespace,
173+
Subsystem: Subsystem,
174+
Name: "lease_delete_total",
175+
Help: "Count of lease delection calls by the garbage collection controller. Labeled by HTTP status code and reason.",
176+
},
177+
[]string{"http_status_code", "reason"},
178+
)
158179
streamPackets := commonmetrics.MakeStreamPacketsTotalMetric(Namespace, Subsystem)
159180
streamErrors := commonmetrics.MakeStreamErrorsTotalMetric(Namespace, Subsystem)
160181
prometheus.MustRegister(endpointLatencies)
@@ -169,19 +190,23 @@ func newServerMetrics() *ServerMetrics {
169190
prometheus.MustRegister(streamPackets)
170191
prometheus.MustRegister(streamErrors)
171192
prometheus.MustRegister(culledLeases)
193+
prometheus.MustRegister(leaseDeleteLatencies)
194+
prometheus.MustRegister(leaseDeletes)
172195
return &ServerMetrics{
173-
endpointLatencies: endpointLatencies,
174-
frontendLatencies: frontendLatencies,
175-
grpcConnections: grpcConnections,
176-
httpConnections: httpConnections,
177-
backend: backend,
178-
pendingDials: pendingDials,
179-
establishedConns: establishedConns,
180-
fullRecvChannels: fullRecvChannels,
181-
dialFailures: dialFailures,
182-
streamPackets: streamPackets,
183-
streamErrors: streamErrors,
184-
culledLeases: culledLeases,
196+
endpointLatencies: endpointLatencies,
197+
frontendLatencies: frontendLatencies,
198+
grpcConnections: grpcConnections,
199+
httpConnections: httpConnections,
200+
backend: backend,
201+
pendingDials: pendingDials,
202+
establishedConns: establishedConns,
203+
fullRecvChannels: fullRecvChannels,
204+
dialFailures: dialFailures,
205+
streamPackets: streamPackets,
206+
streamErrors: streamErrors,
207+
culledLeases: culledLeases,
208+
leaseDeleteLatencies: leaseDeleteLatencies,
209+
leaseDeletes: leaseDeletes,
185210
}
186211
}
187212

@@ -276,3 +301,11 @@ func (s *ServerMetrics) ObserveStreamErrorNoPacket(segment commonmetrics.Segment
276301
func (s *ServerMetrics) ObserveStreamError(segment commonmetrics.Segment, err error, packetType client.PacketType) {
277302
commonmetrics.ObserveStreamError(s.streamErrors, segment, err, packetType)
278303
}
304+
305+
func (s *ServerMetrics) ObserveLeaseDeleteLatency(httpCode int, latency time.Duration) {
306+
s.leaseDeleteLatencies.WithLabelValues(strconv.Itoa(httpCode)).Observe(latency.Seconds())
307+
}
308+
309+
func (s *ServerMetrics) ObserveLeaseDelete(httpCode int, reason string) {
310+
s.leaseDeletes.WithLabelValues(strconv.Itoa(httpCode), reason).Inc()
311+
}

0 commit comments

Comments
 (0)