Skip to content

Commit fa59370

Browse files
committed
Adding new logging, event, and metric to better capture when mirroring addresses is skipped
1 parent e701cb0 commit fa59370

File tree

6 files changed

+88
-21
lines changed

6 files changed

+88
-21
lines changed

pkg/controller/endpointslicemirroring/endpointslicemirroring_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ type Controller struct {
161161
// has been synced at least once. Added as a member to the struct to allow
162162
// injection for testing.
163163
endpointSlicesSynced cache.InformerSynced
164+
164165
// endpointSliceTracker tracks the list of EndpointSlices and associated
165166
// resource versions expected for each Endpoints resource. It can help
166167
// determine if a cached EndpointSlice is out of date.

pkg/controller/endpointslicemirroring/endpointslicemirroring_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ func TestSyncEndpoints(t *testing.T) {
172172
}},
173173
},
174174
endpointSlices: []*discovery.EndpointSlice{},
175-
expectedNumActions: 1,
175+
expectedNumActions: 2, // extra action for creating warning event
176176
expectedNumSlices: 1,
177177
}}
178178

pkg/controller/endpointslicemirroring/events.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,8 @@ const (
2626
// InvalidIPAddress indicates that an IP address found in an Endpoints
2727
// resource is invalid.
2828
InvalidIPAddress = "InvalidIPAddress"
29+
// TooManyAddressesToMirror indicates that some addresses were not mirrored
30+
// due to an EndpointSubset containing more addresses to mirror than
31+
// MaxEndpointsPerSubset allows.
32+
TooManyAddressesToMirror = "TooManyAddressesToMirror"
2933
)

pkg/controller/endpointslicemirroring/metrics/metrics.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@ var (
6464
},
6565
[]string{},
6666
)
67+
// AddressesSkippedPerSync tracks the number of addresses skipped on each
68+
// Endpoints sync due to being invalid or exceeding MaxEndpointsPerSubset.
69+
AddressesSkippedPerSync = metrics.NewHistogramVec(
70+
&metrics.HistogramOpts{
71+
Subsystem: EndpointSliceMirroringSubsystem,
72+
Name: "addresses_skipped_per_sync",
73+
Help: "Number of addresses skipped on each Endpoints sync due to being invalid or exceeding MaxEndpointsPerSubset",
74+
StabilityLevel: metrics.ALPHA,
75+
Buckets: metrics.ExponentialBuckets(2, 2, 15),
76+
},
77+
[]string{},
78+
)
6779
// EndpointsSyncDuration tracks how long syncEndpoints() takes in a number
6880
// of Seconds.
6981
EndpointsSyncDuration = metrics.NewHistogramVec(
@@ -127,6 +139,7 @@ func RegisterMetrics() {
127139
legacyregistry.MustRegister(EndpointsAddedPerSync)
128140
legacyregistry.MustRegister(EndpointsUpdatedPerSync)
129141
legacyregistry.MustRegister(EndpointsRemovedPerSync)
142+
legacyregistry.MustRegister(AddressesSkippedPerSync)
130143
legacyregistry.MustRegister(EndpointsSyncDuration)
131144
legacyregistry.MustRegister(EndpointsDesired)
132145
legacyregistry.MustRegister(NumEndpointSlices)

pkg/controller/endpointslicemirroring/reconciler.go

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,25 @@ import (
3636
// reconciler is responsible for transforming current EndpointSlice state into
3737
// desired state
3838
type reconciler struct {
39-
client clientset.Interface
39+
client clientset.Interface
40+
41+
// endpointSliceTracker tracks the list of EndpointSlices and associated
42+
// resource versions expected for each Endpoints resource. It can help
43+
// determine if a cached EndpointSlice is out of date.
44+
endpointSliceTracker *endpointSliceTracker
45+
46+
// eventRecorder allows reconciler to record an event if it finds an invalid
47+
// IP address in an Endpoints resource.
48+
eventRecorder record.EventRecorder
49+
50+
// maxEndpointsPerSubset references the maximum number of endpoints that
51+
// should be added to an EndpointSlice for an EndpointSubset. This allows
52+
// for a simple 1:1 mapping between EndpointSubset and EndpointSlice.
4053
maxEndpointsPerSubset int32
41-
endpointSliceTracker *endpointSliceTracker
42-
metricsCache *metrics.Cache
43-
eventRecorder record.EventRecorder
54+
55+
// metricsCache tracks values for total numbers of desired endpoints as well
56+
// as the efficiency of EndpointSlice endpoints distribution
57+
metricsCache *metrics.Cache
4458
}
4559

4660
// reconcile takes an Endpoints resource and ensures that corresponding
@@ -50,38 +64,65 @@ func (r *reconciler) reconcile(endpoints *corev1.Endpoints, existingSlices []*di
5064
// Calculate desired state.
5165
d := newDesiredCalc()
5266

67+
numInvalidAddresses := 0
68+
addressesSkipped := 0
69+
5370
for _, subset := range endpoints.Subsets {
5471
multiKey := d.initPorts(subset.Ports)
5572

56-
totalAddresses := 0
57-
numInvalidAddresses := 0
73+
totalAddresses := len(subset.Addresses) + len(subset.NotReadyAddresses)
74+
totalAddressesAdded := 0
5875

5976
for _, address := range subset.Addresses {
60-
totalAddresses++
61-
if totalAddresses > int(r.maxEndpointsPerSubset) {
77+
// Break if we've reached the max number of addresses to mirror
78+
// per EndpointSubset. This allows for a simple 1:1 mapping between
79+
// EndpointSubset and EndpointSlice.
80+
if totalAddressesAdded >= int(r.maxEndpointsPerSubset) {
6281
break
6382
}
64-
if ok := d.addAddress(address, multiKey, true); !ok {
83+
if ok := d.addAddress(address, multiKey, true); ok {
84+
totalAddressesAdded++
85+
} else {
6586
numInvalidAddresses++
6687
klog.Warningf("Address in %s/%s Endpoints is not a valid IP, it will not be mirrored to an EndpointSlice: %s", endpoints.Namespace, endpoints.Name, address.IP)
6788
}
6889
}
6990

7091
for _, address := range subset.NotReadyAddresses {
71-
totalAddresses++
72-
if totalAddresses > int(r.maxEndpointsPerSubset) {
92+
// Break if we've reached the max number of addresses to mirror
93+
// per EndpointSubset. This allows for a simple 1:1 mapping between
94+
// EndpointSubset and EndpointSlice.
95+
if totalAddressesAdded >= int(r.maxEndpointsPerSubset) {
7396
break
7497
}
75-
if ok := d.addAddress(address, multiKey, false); !ok {
98+
if ok := d.addAddress(address, multiKey, true); ok {
99+
totalAddressesAdded++
100+
} else {
76101
numInvalidAddresses++
77102
klog.Warningf("Address in %s/%s Endpoints is not a valid IP, it will not be mirrored to an EndpointSlice: %s", endpoints.Namespace, endpoints.Name, address.IP)
78103
}
79104
}
80105

81-
if numInvalidAddresses > 0 {
82-
r.eventRecorder.Eventf(endpoints, corev1.EventTypeWarning, InvalidIPAddress,
83-
"Skipped %d invalid IP addresses when mirroring to EndpointSlices", numInvalidAddresses)
84-
}
106+
addressesSkipped += totalAddresses - totalAddressesAdded
107+
}
108+
109+
// This metric includes addresses skipped for being invalid or exceeding
110+
// MaxEndpointsPerSubset.
111+
metrics.AddressesSkippedPerSync.WithLabelValues().Observe(float64(addressesSkipped))
112+
113+
// Record an event on the Endpoints resource if we skipped mirroring for any
114+
// invalid IP addresses.
115+
if numInvalidAddresses > 0 {
116+
r.eventRecorder.Eventf(endpoints, corev1.EventTypeWarning, InvalidIPAddress,
117+
"Skipped %d invalid IP addresses when mirroring to EndpointSlices", numInvalidAddresses)
118+
}
119+
120+
// Record a separate event if we skipped mirroring due to the number of
121+
// addresses exceeding MaxEndpointsPerSubset.
122+
if addressesSkipped > numInvalidAddresses {
123+
klog.Warningf("%d addresses in %s/%s Endpoints were skipped due to exceeding MaxEndpointsPerSubset", addressesSkipped, endpoints.Namespace, endpoints.Name)
124+
r.eventRecorder.Eventf(endpoints, corev1.EventTypeWarning, TooManyAddressesToMirror,
125+
"A max of %d addresses can be mirrored to EndpointSlices per Endpoints subset. %d addresses were skipped", r.maxEndpointsPerSubset, addressesSkipped)
85126
}
86127

87128
// Build data structures for existing state.

pkg/controller/endpointslicemirroring/reconciler_test.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ func TestReconcile(t *testing.T) {
533533
existingEndpointSlices: []*discovery.EndpointSlice{},
534534
expectedNumSlices: 2,
535535
expectedClientActions: 2,
536-
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 3, addedPerSync: 3, numCreated: 2},
536+
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 3, addedPerSync: 3, skippedPerSync: 2, numCreated: 2},
537537
}, {
538538
testName: "Endpoints with 2 subsets, multiple ports, all invalid addresses",
539539
subsets: []corev1.EndpointSubset{{
@@ -582,9 +582,9 @@ func TestReconcile(t *testing.T) {
582582
existingEndpointSlices: []*discovery.EndpointSlice{},
583583
expectedNumSlices: 0,
584584
expectedClientActions: 0,
585-
expectedMetrics: &expectedMetrics{desiredSlices: 0, actualSlices: 0, desiredEndpoints: 0, addedPerSync: 0, numCreated: 0},
585+
expectedMetrics: &expectedMetrics{desiredSlices: 0, actualSlices: 0, desiredEndpoints: 0, addedPerSync: 0, skippedPerSync: 5, numCreated: 0},
586586
}, {
587-
testName: "Endpoints with 2 subsets, multiple ports and addresses, existing EndpointSlice with some addresses",
587+
testName: "Endpoints with 2 subsets, 1 exceeding maxEndpointsPerSubset",
588588
subsets: []corev1.EndpointSubset{{
589589
Ports: []corev1.EndpointPort{{
590590
Name: "http",
@@ -632,7 +632,7 @@ func TestReconcile(t *testing.T) {
632632
expectedNumSlices: 2,
633633
expectedClientActions: 2,
634634
maxEndpointsPerSubset: 2,
635-
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 4, addedPerSync: 4, updatedPerSync: 0, removedPerSync: 0, numCreated: 2, numUpdated: 0},
635+
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 4, addedPerSync: 4, updatedPerSync: 0, removedPerSync: 0, skippedPerSync: 1, numCreated: 2, numUpdated: 0},
636636
}}
637637

638638
for _, tc := range testCases {
@@ -885,6 +885,7 @@ type expectedMetrics struct {
885885
addedPerSync int
886886
updatedPerSync int
887887
removedPerSync int
888+
skippedPerSync int
888889
numCreated int
889890
numUpdated int
890891
numDeleted int
@@ -929,6 +930,12 @@ func expectMetrics(t *testing.T, em expectedMetrics) {
929930
t.Errorf("Expected endpointsRemovedPerSync to be %d, got %v", em.removedPerSync, actualRemovedPerSync)
930931
}
931932

933+
actualSkippedPerSync, err := testutil.GetHistogramMetricValue(metrics.AddressesSkippedPerSync.WithLabelValues())
934+
handleErr(t, err, "addressesSkippedPerSync")
935+
if actualSkippedPerSync != float64(em.skippedPerSync) {
936+
t.Errorf("Expected addressesSkippedPerSync to be %d, got %v", em.skippedPerSync, actualSkippedPerSync)
937+
}
938+
932939
actualCreated, err := testutil.GetCounterMetricValue(metrics.EndpointSliceChanges.WithLabelValues("create"))
933940
handleErr(t, err, "endpointSliceChangesCreated")
934941
if actualCreated != float64(em.numCreated) {
@@ -962,6 +969,7 @@ func setupMetrics() {
962969
metrics.EndpointsAddedPerSync.Delete(map[string]string{})
963970
metrics.EndpointsUpdatedPerSync.Delete(map[string]string{})
964971
metrics.EndpointsRemovedPerSync.Delete(map[string]string{})
972+
metrics.AddressesSkippedPerSync.Delete(map[string]string{})
965973
metrics.EndpointSliceChanges.Delete(map[string]string{"operation": "create"})
966974
metrics.EndpointSliceChanges.Delete(map[string]string{"operation": "update"})
967975
metrics.EndpointSliceChanges.Delete(map[string]string{"operation": "delete"})

0 commit comments

Comments
 (0)