Skip to content

Commit de02323

Browse files
committed
Ensuring EndpointSlices are recreated after Service recreation
This fixes a bug that occurred when a Service was rapidly recreated. This relied on an unfortunate series of events: 1. When the Service is deleted, the EndpointSlice controller removes it from the EndpointSliceTracker along with any associated EndpointSlices. 2. When the Service is recreated, the EndpointSlice controller sees that there are still appropriate EndpointSlices for the Service and does nothing. (They have not yet been garbage collected). 3. When the EndpointSlice is deleted, the EndpointSlice controller checks with the EndpointSliceTracker to see if it thinks we should have this EndpointSlice. This check was intended to ensure we wouldn't requeue a Service every time we delete an EndpointSlice for it. This adds a check in reconciler to ensure that EndpointSlices it is working with are owned by a Service with a matching UID. If not, it will mark those EndpointSlices for deletion (assuming they're about to be garbage collected anyway) and create new EndpointSlices.
1 parent 77f349e commit de02323

File tree

5 files changed

+135
-24
lines changed

5 files changed

+135
-24
lines changed

pkg/controller/endpointslice/endpointslice_controller_test.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import (
2929
discovery "k8s.io/api/discovery/v1beta1"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
"k8s.io/apimachinery/pkg/runtime"
32+
"k8s.io/apimachinery/pkg/runtime/schema"
33+
"k8s.io/apimachinery/pkg/types"
3234
"k8s.io/apimachinery/pkg/util/intstr"
3335
"k8s.io/apimachinery/pkg/util/wait"
3436
"k8s.io/client-go/informers"
@@ -126,7 +128,7 @@ func TestSyncServiceWithSelector(t *testing.T) {
126128
ns := metav1.NamespaceDefault
127129
serviceName := "testing-1"
128130
client, esController := newController([]string{"node-1"}, time.Duration(0))
129-
standardSyncService(t, esController, ns, serviceName, "true")
131+
standardSyncService(t, esController, ns, serviceName)
130132
expectActions(t, client.Actions(), 1, "create", "endpointslices")
131133

132134
sliceList, err := client.DiscoveryV1beta1().EndpointSlices(ns).List(context.TODO(), metav1.ListOptions{})
@@ -192,7 +194,7 @@ func TestSyncServicePodSelection(t *testing.T) {
192194
pod2.Labels["foo"] = "boo"
193195
esController.podStore.Add(pod2)
194196

195-
standardSyncService(t, esController, ns, "testing-1", "true")
197+
standardSyncService(t, esController, ns, "testing-1")
196198
expectActions(t, client.Actions(), 1, "create", "endpointslices")
197199

198200
// an endpoint slice should be created, it should only reference pod1 (not pod2)
@@ -211,12 +213,17 @@ func TestSyncServiceEndpointSliceLabelSelection(t *testing.T) {
211213
client, esController := newController([]string{"node-1"}, time.Duration(0))
212214
ns := metav1.NamespaceDefault
213215
serviceName := "testing-1"
216+
service := createService(t, esController, ns, serviceName)
217+
218+
gvk := schema.GroupVersionKind{Version: "v1", Kind: "Service"}
219+
ownerRef := metav1.NewControllerRef(service, gvk)
214220

215221
// 5 slices, 3 with matching labels for our service
216222
endpointSlices := []*discovery.EndpointSlice{{
217223
ObjectMeta: metav1.ObjectMeta{
218-
Name: "matching-1",
219-
Namespace: ns,
224+
Name: "matching-1",
225+
Namespace: ns,
226+
OwnerReferences: []metav1.OwnerReference{*ownerRef},
220227
Labels: map[string]string{
221228
discovery.LabelServiceName: serviceName,
222229
discovery.LabelManagedBy: controllerName,
@@ -225,8 +232,9 @@ func TestSyncServiceEndpointSliceLabelSelection(t *testing.T) {
225232
AddressType: discovery.AddressTypeIPv4,
226233
}, {
227234
ObjectMeta: metav1.ObjectMeta{
228-
Name: "matching-2",
229-
Namespace: ns,
235+
Name: "matching-2",
236+
Namespace: ns,
237+
OwnerReferences: []metav1.OwnerReference{*ownerRef},
230238
Labels: map[string]string{
231239
discovery.LabelServiceName: serviceName,
232240
discovery.LabelManagedBy: controllerName,
@@ -278,9 +286,9 @@ func TestSyncServiceEndpointSliceLabelSelection(t *testing.T) {
278286
}
279287
}
280288

281-
// +1 for extra action involved in Service creation before syncService call.
282-
numActionsBefore := len(client.Actions()) + 1
283-
standardSyncService(t, esController, ns, serviceName, "false")
289+
numActionsBefore := len(client.Actions())
290+
err := esController.syncService(fmt.Sprintf("%s/%s", ns, serviceName))
291+
assert.Nil(t, err, "Expected no error syncing service")
284292

285293
if len(client.Actions()) != numActionsBefore+2 {
286294
t.Errorf("Expected 2 more actions, got %d", len(client.Actions())-numActionsBefore)
@@ -784,21 +792,22 @@ func addPods(t *testing.T, esController *endpointSliceController, namespace stri
784792
}
785793
}
786794

787-
func standardSyncService(t *testing.T, esController *endpointSliceController, namespace, serviceName, managedBySetup string) {
795+
func standardSyncService(t *testing.T, esController *endpointSliceController, namespace, serviceName string) {
788796
t.Helper()
789-
createService(t, esController, namespace, serviceName, managedBySetup)
797+
createService(t, esController, namespace, serviceName)
790798

791799
err := esController.syncService(fmt.Sprintf("%s/%s", namespace, serviceName))
792800
assert.Nil(t, err, "Expected no error syncing service")
793801
}
794802

795-
func createService(t *testing.T, esController *endpointSliceController, namespace, serviceName, managedBySetup string) *v1.Service {
803+
func createService(t *testing.T, esController *endpointSliceController, namespace, serviceName string) *v1.Service {
796804
t.Helper()
797805
service := &v1.Service{
798806
ObjectMeta: metav1.ObjectMeta{
799807
Name: serviceName,
800808
Namespace: namespace,
801809
CreationTimestamp: metav1.NewTime(time.Now()),
810+
UID: types.UID(namespace + "-" + serviceName),
802811
},
803812
Spec: v1.ServiceSpec{
804813
Ports: []v1.ServicePort{{TargetPort: intstr.FromInt(80)}},

pkg/controller/endpointslice/reconciler.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func (r *reconciler) reconcile(service *corev1.Service, pods []*corev1.Pod, exis
7070
existingSlicesByPortMap := map[endpointutil.PortMapKey][]*discovery.EndpointSlice{}
7171
numExistingEndpoints := 0
7272
for _, existingSlice := range existingSlices {
73-
if existingSlice.AddressType == addressType {
73+
if existingSlice.AddressType == addressType && ownedBy(existingSlice, service) {
7474
epHash := endpointutil.NewPortMapKey(existingSlice.Ports)
7575
existingSlicesByPortMap[epHash] = append(existingSlicesByPortMap[epHash], existingSlice)
7676
numExistingEndpoints += len(existingSlice.Endpoints)
@@ -187,13 +187,15 @@ func (r *reconciler) finalize(
187187
}
188188
sliceToDelete := slicesToDelete[i]
189189
slice := slicesToCreate[len(slicesToCreate)-1]
190-
// Only update EndpointSlices that have the same AddressType as this
191-
// field is considered immutable. Since Services also consider IPFamily
192-
// immutable, the only case where this should matter will be the
193-
// migration from IP to IPv4 and IPv6 AddressTypes, where there's a
190+
// Only update EndpointSlices that are owned by this Service and have
191+
// the same AddressType. We need to avoid updating EndpointSlices that
192+
// are being garbage collected for an old Service with the same name.
193+
// The AddressType field is immutable. Since Services also consider
194+
// IPFamily immutable, the only case where this should matter will be
195+
// the migration from IP to IPv4 and IPv6 AddressTypes, where there's a
194196
// chance EndpointSlices with an IP AddressType would otherwise be
195197
// updated to IPv4 or IPv6 without this check.
196-
if sliceToDelete.AddressType == slice.AddressType {
198+
if sliceToDelete.AddressType == slice.AddressType && ownedBy(sliceToDelete, service) {
197199
slice.Name = sliceToDelete.Name
198200
slicesToCreate = slicesToCreate[:len(slicesToCreate)-1]
199201
slicesToUpdate = append(slicesToUpdate, slice)

pkg/controller/endpointslice/reconciler_test.go

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
discovery "k8s.io/api/discovery/v1beta1"
3131
apiequality "k8s.io/apimachinery/pkg/api/equality"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33+
"k8s.io/apimachinery/pkg/runtime/schema"
3334
"k8s.io/apimachinery/pkg/util/intstr"
3435
"k8s.io/client-go/informers"
3536
"k8s.io/client-go/kubernetes/fake"
@@ -595,6 +596,73 @@ func TestReconcileEndpointSlicesReplaceDeprecated(t *testing.T) {
595596
cmc.Check(t)
596597
}
597598

599+
// In this test, we want to verify that a Service recreation will result in new
600+
// EndpointSlices being created.
601+
func TestReconcileEndpointSlicesRecreation(t *testing.T) {
602+
testCases := []struct {
603+
name string
604+
ownedByService bool
605+
expectChanges bool
606+
}{
607+
{
608+
name: "slice owned by Service",
609+
ownedByService: true,
610+
expectChanges: false,
611+
}, {
612+
name: "slice owned by other Service UID",
613+
ownedByService: false,
614+
expectChanges: true,
615+
},
616+
}
617+
618+
for _, tc := range testCases {
619+
t.Run(tc.name, func(t *testing.T) {
620+
client := newClientset()
621+
setupMetrics()
622+
namespace := "test"
623+
624+
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
625+
slice := newEmptyEndpointSlice(1, namespace, endpointMeta, svc)
626+
627+
pod := newPod(1, namespace, true, 1)
628+
slice.Endpoints = append(slice.Endpoints, podToEndpoint(pod, &corev1.Node{}, &corev1.Service{Spec: corev1.ServiceSpec{}}))
629+
630+
if !tc.ownedByService {
631+
slice.OwnerReferences[0].UID = "different"
632+
}
633+
existingSlices := []*discovery.EndpointSlice{slice}
634+
createEndpointSlices(t, client, namespace, existingSlices)
635+
636+
cmc := newCacheMutationCheck(existingSlices)
637+
638+
numActionsBefore := len(client.Actions())
639+
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice)
640+
reconcileHelper(t, r, &svc, []*corev1.Pod{pod}, existingSlices, time.Now())
641+
642+
if tc.expectChanges {
643+
if len(client.Actions()) != numActionsBefore+2 {
644+
t.Fatalf("Expected 2 additional actions, got %d", len(client.Actions())-numActionsBefore)
645+
}
646+
647+
expectAction(t, client.Actions(), numActionsBefore, "create", "endpointslices")
648+
expectAction(t, client.Actions(), numActionsBefore+1, "delete", "endpointslices")
649+
650+
fetchedSlices := fetchEndpointSlices(t, client, namespace)
651+
652+
if len(fetchedSlices) != 1 {
653+
t.Fatalf("Expected 1 EndpointSlice to exist, got %d", len(fetchedSlices))
654+
}
655+
} else {
656+
if len(client.Actions()) != numActionsBefore {
657+
t.Errorf("Expected no additional actions, got %d", len(client.Actions())-numActionsBefore)
658+
}
659+
}
660+
// ensure cache mutation has not occurred
661+
cmc.Check(t)
662+
})
663+
}
664+
}
665+
598666
// Named ports can map to different port numbers on different pods.
599667
// This test ensures that EndpointSlices are grouped correctly in that case.
600668
func TestReconcileEndpointSlicesNamedPorts(t *testing.T) {
@@ -818,15 +886,24 @@ func TestReconcilerFinalizeSvcDeletionTimestamp(t *testing.T) {
818886
namespace := "test"
819887
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
820888
svc.DeletionTimestamp = tc.deletionTimestamp
889+
gvk := schema.GroupVersionKind{Version: "v1", Kind: "Service"}
890+
ownerRef := metav1.NewControllerRef(&svc, gvk)
891+
821892
esToCreate := &discovery.EndpointSlice{
822-
ObjectMeta: metav1.ObjectMeta{Name: "to-create"},
893+
ObjectMeta: metav1.ObjectMeta{
894+
Name: "to-create",
895+
OwnerReferences: []metav1.OwnerReference{*ownerRef},
896+
},
823897
AddressType: endpointMeta.AddressType,
824898
Ports: endpointMeta.Ports,
825899
}
826900

827901
// Add EndpointSlice that can be updated.
828902
esToUpdate, err := client.DiscoveryV1beta1().EndpointSlices(namespace).Create(context.TODO(), &discovery.EndpointSlice{
829-
ObjectMeta: metav1.ObjectMeta{Name: "to-update"},
903+
ObjectMeta: metav1.ObjectMeta{
904+
Name: "to-update",
905+
OwnerReferences: []metav1.OwnerReference{*ownerRef},
906+
},
830907
AddressType: endpointMeta.AddressType,
831908
Ports: endpointMeta.Ports,
832909
}, metav1.CreateOptions{})
@@ -839,7 +916,10 @@ func TestReconcilerFinalizeSvcDeletionTimestamp(t *testing.T) {
839916

840917
// Add EndpointSlice that can be deleted.
841918
esToDelete, err := client.DiscoveryV1beta1().EndpointSlices(namespace).Create(context.TODO(), &discovery.EndpointSlice{
842-
ObjectMeta: metav1.ObjectMeta{Name: "to-delete"},
919+
ObjectMeta: metav1.ObjectMeta{
920+
Name: "to-delete",
921+
OwnerReferences: []metav1.OwnerReference{*ownerRef},
922+
},
843923
AddressType: endpointMeta.AddressType,
844924
Ports: endpointMeta.Ports,
845925
}, metav1.CreateOptions{})

pkg/controller/endpointslice/utils.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,17 @@ func objectRefPtrChanged(ref1, ref2 *corev1.ObjectReference) bool {
201201
return false
202202
}
203203

204+
// ownedBy returns true if the provided EndpointSlice is owned by the provided
205+
// Service.
206+
func ownedBy(endpointSlice *discovery.EndpointSlice, svc *corev1.Service) bool {
207+
for _, o := range endpointSlice.OwnerReferences {
208+
if o.UID == svc.UID && o.Kind == "Service" && o.APIVersion == "v1" {
209+
return true
210+
}
211+
}
212+
return false
213+
}
214+
204215
// getSliceToFill will return the EndpointSlice that will be closest to full
205216
// when numEndpoints are added. If no EndpointSlice can be found, a nil pointer
206217
// will be returned.

pkg/controller/endpointslice/utils_test.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/runtime"
2929
"k8s.io/apimachinery/pkg/runtime/schema"
30+
"k8s.io/apimachinery/pkg/types"
3031
"k8s.io/apimachinery/pkg/util/intstr"
3132
"k8s.io/apimachinery/pkg/util/rand"
3233
"k8s.io/client-go/kubernetes/fake"
@@ -455,7 +456,11 @@ func newServiceAndEndpointMeta(name, namespace string) (v1.Service, endpointMeta
455456
}
456457

457458
svc := v1.Service{
458-
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace},
459+
ObjectMeta: metav1.ObjectMeta{
460+
Name: name,
461+
Namespace: namespace,
462+
UID: types.UID(namespace + "-" + name),
463+
},
459464
Spec: v1.ServiceSpec{
460465
Ports: []v1.ServicePort{{
461466
TargetPort: portNameIntStr,
@@ -477,10 +482,14 @@ func newServiceAndEndpointMeta(name, namespace string) (v1.Service, endpointMeta
477482
}
478483

479484
func newEmptyEndpointSlice(n int, namespace string, endpointMeta endpointMeta, svc v1.Service) *discovery.EndpointSlice {
485+
gvk := schema.GroupVersionKind{Version: "v1", Kind: "Service"}
486+
ownerRef := metav1.NewControllerRef(&svc, gvk)
487+
480488
return &discovery.EndpointSlice{
481489
ObjectMeta: metav1.ObjectMeta{
482-
Name: fmt.Sprintf("%s.%d", svc.Name, n),
483-
Namespace: namespace,
490+
Name: fmt.Sprintf("%s-%d", svc.Name, n),
491+
Namespace: namespace,
492+
OwnerReferences: []metav1.OwnerReference{*ownerRef},
484493
},
485494
Ports: endpointMeta.Ports,
486495
AddressType: endpointMeta.AddressType,

0 commit comments

Comments
 (0)