Skip to content

Commit 017f57a

Browse files
committed
proxy: add some useful metrics
This adds some useful metrics around pending changes and last successful sync time. The goal is for administrators to be able to alert on proxies that, for whatever reason, are quite stale. Signed-off-by: Casey Callendrello <[email protected]>
1 parent a1588cf commit 017f57a

File tree

8 files changed

+79
-0
lines changed

8 files changed

+79
-0
lines changed

pkg/proxy/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ go_library(
1818
deps = [
1919
"//pkg/api/v1/service:go_default_library",
2020
"//pkg/proxy/config:go_default_library",
21+
"//pkg/proxy/metrics:go_default_library",
2122
"//pkg/proxy/util:go_default_library",
2223
"//staging/src/k8s.io/api/core/v1:go_default_library",
2324
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",

pkg/proxy/endpoints.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"k8s.io/apimachinery/pkg/types"
3030
"k8s.io/apimachinery/pkg/util/sets"
3131
"k8s.io/client-go/tools/record"
32+
"k8s.io/kubernetes/pkg/proxy/metrics"
3233
utilproxy "k8s.io/kubernetes/pkg/proxy/util"
3334
utilnet "k8s.io/utils/net"
3435
)
@@ -127,6 +128,7 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool {
127128
if endpoints == nil {
128129
return false
129130
}
131+
metrics.EndpointChangesTotal.Inc()
130132
namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}
131133

132134
ect.lock.Lock()
@@ -154,6 +156,8 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool {
154156
// should be exported.
155157
delete(ect.lastChangeTriggerTimes, namespacedName)
156158
}
159+
160+
metrics.EndpointChangesPending.Set(float64(len(ect.items)))
157161
return len(ect.items) > 0
158162
}
159163

@@ -295,6 +299,7 @@ func (em EndpointsMap) apply(changes *EndpointChangeTracker, staleEndpoints *[]S
295299
detectStaleConnections(change.previous, change.current, staleEndpoints, staleServiceNames)
296300
}
297301
changes.items = make(map[types.NamespacedName]*endpointsChange)
302+
metrics.EndpointChangesPending.Set(0)
298303
for _, lastChangeTriggerTime := range changes.lastChangeTriggerTimes {
299304
*lastChangeTriggerTimes = append(*lastChangeTriggerTimes, lastChangeTriggerTime...)
300305
}

pkg/proxy/iptables/proxier.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,7 @@ func (proxier *Proxier) syncProxyRules() {
13951395
if proxier.healthzServer != nil {
13961396
proxier.healthzServer.UpdateTimestamp()
13971397
}
1398+
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
13981399

13991400
// Update healthchecks. The endpoints list might include services that are
14001401
// not "OnlyLocal", but the services list will not, and the healthChecker

pkg/proxy/ipvs/proxier.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,6 +1272,7 @@ func (proxier *Proxier) syncProxyRules() {
12721272
if proxier.healthzServer != nil {
12731273
proxier.healthzServer.UpdateTimestamp()
12741274
}
1275+
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
12751276

12761277
// Update healthchecks. The endpoints list might include services that are
12771278
// not "OnlyLocal", but the services list will not, and the healthChecker

pkg/proxy/metrics/metrics.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@ var (
4646
},
4747
)
4848

49+
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
50+
// successfully synced.
51+
SyncProxyRulesLastTimestamp = prometheus.NewGauge(
52+
prometheus.GaugeOpts{
53+
Subsystem: kubeProxySubsystem,
54+
Name: "sync_proxy_rules_last_timestamp_seconds",
55+
Help: "The last time proxy rules were successfully synced",
56+
},
57+
)
58+
4959
// NetworkProgrammingLatency is defined as the time it took to program the network - from the time
5060
// the service or pod has changed to the time the change was propagated and the proper kube-proxy
5161
// rules were synced. Exported for each endpoints object that were part of the rules sync.
@@ -63,6 +73,46 @@ var (
6373
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
6474
},
6575
)
76+
77+
// EndpointChangesPending is the number of pending endpoint changes that
78+
// have not yet been synced to the proxy.
79+
EndpointChangesPending = prometheus.NewGauge(
80+
prometheus.GaugeOpts{
81+
Subsystem: kubeProxySubsystem,
82+
Name: "sync_proxy_rules_endpoint_changes_pending",
83+
Help: "Pending proxy rules Endpoint changes",
84+
},
85+
)
86+
87+
// EndpointChangesTotal is the number of endpoint changes that the proxy
88+
// has seen.
89+
EndpointChangesTotal = prometheus.NewCounter(
90+
prometheus.CounterOpts{
91+
Subsystem: kubeProxySubsystem,
92+
Name: "sync_proxy_rules_endpoint_changes_total",
93+
Help: "Cumulative proxy rules Endpoint changes",
94+
},
95+
)
96+
97+
// ServiceChangesPending is the number of pending service changes that
98+
// have not yet been synced to the proxy.
99+
ServiceChangesPending = prometheus.NewGauge(
100+
prometheus.GaugeOpts{
101+
Subsystem: kubeProxySubsystem,
102+
Name: "sync_proxy_rules_service_changes_pending",
103+
Help: "Pending proxy rules Service changes",
104+
},
105+
)
106+
107+
// ServiceChangesTotal is the number of service changes that the proxy has
108+
// seen.
109+
ServiceChangesTotal = prometheus.NewCounter(
110+
prometheus.CounterOpts{
111+
Subsystem: kubeProxySubsystem,
112+
Name: "sync_proxy_rules_service_changes_total",
113+
Help: "Cumulative proxy rules Service changes",
114+
},
115+
)
66116
)
67117

68118
var registerMetricsOnce sync.Once
@@ -72,7 +122,12 @@ func RegisterMetrics() {
72122
registerMetricsOnce.Do(func() {
73123
prometheus.MustRegister(SyncProxyRulesLatency)
74124
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
125+
prometheus.MustRegister(SyncProxyRulesLastTimestamp)
75126
prometheus.MustRegister(NetworkProgrammingLatency)
127+
prometheus.MustRegister(EndpointChangesPending)
128+
prometheus.MustRegister(EndpointChangesTotal)
129+
prometheus.MustRegister(ServiceChangesPending)
130+
prometheus.MustRegister(ServiceChangesTotal)
76131
})
77132
}
78133

pkg/proxy/service.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"k8s.io/apimachinery/pkg/util/sets"
3131
"k8s.io/client-go/tools/record"
3232
apiservice "k8s.io/kubernetes/pkg/api/v1/service"
33+
"k8s.io/kubernetes/pkg/proxy/metrics"
3334
utilproxy "k8s.io/kubernetes/pkg/proxy/util"
3435
utilnet "k8s.io/utils/net"
3536
)
@@ -198,6 +199,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool {
198199
if svc == nil {
199200
return false
200201
}
202+
metrics.ServiceChangesTotal.Inc()
201203
namespacedName := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name}
202204

203205
sct.lock.Lock()
@@ -214,6 +216,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool {
214216
if reflect.DeepEqual(change.previous, change.current) {
215217
delete(sct.items, namespacedName)
216218
}
219+
metrics.ServiceChangesPending.Set(float64(len(sct.items)))
217220
return len(sct.items) > 0
218221
}
219222

@@ -296,6 +299,7 @@ func (sm *ServiceMap) apply(changes *ServiceChangeTracker, UDPStaleClusterIP set
296299
}
297300
// clear changes after applying them to ServiceMap.
298301
changes.items = make(map[types.NamespacedName]*serviceChange)
302+
metrics.ServiceChangesPending.Set(0)
299303
return
300304
}
301305

pkg/proxy/winkernel/metrics.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@ var (
4343
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
4444
},
4545
)
46+
47+
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
48+
// successfully synced.
49+
SyncProxyRulesLastTimestamp = prometheus.NewGauge(
50+
prometheus.GaugeOpts{
51+
Subsystem: kubeProxySubsystem,
52+
Name: "sync_proxy_rules_last_timestamp_seconds",
53+
Help: "The last time proxy rules were successfully synced",
54+
},
55+
)
4656
)
4757

4858
var registerMetricsOnce sync.Once
@@ -51,6 +61,7 @@ func RegisterMetrics() {
5161
registerMetricsOnce.Do(func() {
5262
prometheus.MustRegister(SyncProxyRulesLatency)
5363
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
64+
prometheus.MustRegister(SyncProxyRulesLastTimestamp)
5465
})
5566
}
5667

pkg/proxy/winkernel/proxier.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,7 @@ func (proxier *Proxier) syncProxyRules() {
11971197
if proxier.healthzServer != nil {
11981198
proxier.healthzServer.UpdateTimestamp()
11991199
}
1200+
SyncProxyRulesLastTimestamp.SetToCurrentTime()
12001201

12011202
// Update healthchecks. The endpoints list might include services that are
12021203
// not "OnlyLocal", but the services list will not, and the healthChecker

0 commit comments

Comments
 (0)