Skip to content

Commit 07586f6

Browse files
authored
Merge pull request kubernetes#91311 from robscott/endpointslice-garbage-collection
Fixing race condition with EndpointSlice controller garbage collection
2 parents da54185 + 3f59371 commit 07586f6

File tree

3 files changed

+193
-11
lines changed

3 files changed

+193
-11
lines changed

pkg/controller/endpointslice/endpointslice_controller_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,25 @@ func TestSyncServiceNoSelector(t *testing.T) {
102102
assert.Len(t, client.Actions(), 0)
103103
}
104104

105+
// Ensure SyncService for service with pending deletion results in no action
106+
func TestSyncServicePendingDeletion(t *testing.T) {
107+
ns := metav1.NamespaceDefault
108+
serviceName := "testing-1"
109+
deletionTimestamp := metav1.Now()
110+
client, esController := newController([]string{"node-1"}, time.Duration(0))
111+
esController.serviceStore.Add(&v1.Service{
112+
ObjectMeta: metav1.ObjectMeta{Name: serviceName, Namespace: ns, DeletionTimestamp: &deletionTimestamp},
113+
Spec: v1.ServiceSpec{
114+
Selector: map[string]string{"foo": "bar"},
115+
Ports: []v1.ServicePort{{TargetPort: intstr.FromInt(80)}},
116+
},
117+
})
118+
119+
err := esController.syncService(fmt.Sprintf("%s/%s", ns, serviceName))
120+
assert.Nil(t, err)
121+
assert.Len(t, client.Actions(), 0)
122+
}
123+
105124
// Ensure SyncService for service with selector but no pods results in placeholder EndpointSlice
106125
func TestSyncServiceWithSelector(t *testing.T) {
107126
ns := metav1.NamespaceDefault

pkg/controller/endpointslice/reconciler.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -206,18 +206,23 @@ func (r *reconciler) finalize(
206206
}
207207
}
208208

209-
for _, endpointSlice := range slicesToCreate {
210-
addTriggerTimeAnnotation(endpointSlice, triggerTime)
211-
createdSlice, err := r.client.DiscoveryV1beta1().EndpointSlices(service.Namespace).Create(context.TODO(), endpointSlice, metav1.CreateOptions{})
212-
if err != nil {
213-
// If the namespace is terminating, creates will continue to fail. Simply drop the item.
214-
if errors.HasStatusCause(err, corev1.NamespaceTerminatingCause) {
215-
return nil
209+
// Don't create new EndpointSlices if the Service is pending deletion. This
210+
// is to avoid a potential race condition with the garbage collector where
211+
// it tries to delete EndpointSlices as this controller replaces them.
212+
if service.DeletionTimestamp == nil {
213+
for _, endpointSlice := range slicesToCreate {
214+
addTriggerTimeAnnotation(endpointSlice, triggerTime)
215+
createdSlice, err := r.client.DiscoveryV1beta1().EndpointSlices(service.Namespace).Create(context.TODO(), endpointSlice, metav1.CreateOptions{})
216+
if err != nil {
217+
// If the namespace is terminating, creates will continue to fail. Simply drop the item.
218+
if errors.HasStatusCause(err, corev1.NamespaceTerminatingCause) {
219+
return nil
220+
}
221+
errs = append(errs, fmt.Errorf("Error creating EndpointSlice for Service %s/%s: %v", service.Namespace, service.Name, err))
222+
} else {
223+
r.endpointSliceTracker.Update(createdSlice)
224+
metrics.EndpointSliceChanges.WithLabelValues("create").Inc()
216225
}
217-
errs = append(errs, fmt.Errorf("Error creating EndpointSlice for Service %s/%s: %v", service.Namespace, service.Name, err))
218-
} else {
219-
r.endpointSliceTracker.Update(createdSlice)
220-
metrics.EndpointSliceChanges.WithLabelValues("create").Inc()
221226
}
222227
}
223228

pkg/controller/endpointslice/reconciler_test.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,164 @@ func TestReconcileEndpointSlicesMetrics(t *testing.T) {
742742
expectMetrics(t, expectedMetrics{desiredSlices: 1, actualSlices: 1, desiredEndpoints: 10, addedPerSync: 20, removedPerSync: 10, numCreated: 1, numUpdated: 1, numDeleted: 0})
743743
}
744744

745+
// When a Service has a non-nil deletionTimestamp we want to avoid creating any
746+
// new EndpointSlices but continue to allow updates and deletes through. This
747+
// test uses 3 EndpointSlices, 1 "to-create", 1 "to-update", and 1 "to-delete".
748+
// Each test case exercises different combinations of calls to finalize with
749+
// those resources.
750+
func TestReconcilerFinalizeSvcDeletionTimestamp(t *testing.T) {
751+
now := metav1.Now()
752+
753+
testCases := []struct {
754+
name string
755+
deletionTimestamp *metav1.Time
756+
attemptCreate bool
757+
attemptUpdate bool
758+
attemptDelete bool
759+
expectCreatedSlice bool
760+
expectUpdatedSlice bool
761+
expectDeletedSlice bool
762+
}{{
763+
name: "Attempt create and update, nil deletion timestamp",
764+
deletionTimestamp: nil,
765+
attemptCreate: true,
766+
attemptUpdate: true,
767+
expectCreatedSlice: true,
768+
expectUpdatedSlice: true,
769+
expectDeletedSlice: true,
770+
}, {
771+
name: "Attempt create and update, deletion timestamp set",
772+
deletionTimestamp: &now,
773+
attemptCreate: true,
774+
attemptUpdate: true,
775+
expectCreatedSlice: false,
776+
expectUpdatedSlice: true,
777+
expectDeletedSlice: true,
778+
}, {
779+
// Slice scheduled for creation is transitioned to update of Slice
780+
// scheduled for deletion.
781+
name: "Attempt create, update, and delete, nil deletion timestamp, recycling in action",
782+
deletionTimestamp: nil,
783+
attemptCreate: true,
784+
attemptUpdate: true,
785+
attemptDelete: true,
786+
expectCreatedSlice: false,
787+
expectUpdatedSlice: true,
788+
expectDeletedSlice: true,
789+
}, {
790+
// Slice scheduled for creation is transitioned to update of Slice
791+
// scheduled for deletion.
792+
name: "Attempt create, update, and delete, deletion timestamp set, recycling in action",
793+
deletionTimestamp: &now,
794+
attemptCreate: true,
795+
attemptUpdate: true,
796+
attemptDelete: true,
797+
expectCreatedSlice: false,
798+
expectUpdatedSlice: true,
799+
expectDeletedSlice: true,
800+
}, {
801+
// Update and delete continue to work when deletionTimestamp is set.
802+
name: "Attempt update delete, deletion timestamp set",
803+
deletionTimestamp: &now,
804+
attemptCreate: false,
805+
attemptUpdate: true,
806+
attemptDelete: true,
807+
expectCreatedSlice: false,
808+
expectUpdatedSlice: true,
809+
expectDeletedSlice: false,
810+
}}
811+
812+
for _, tc := range testCases {
813+
t.Run(tc.name, func(t *testing.T) {
814+
client := newClientset()
815+
setupMetrics()
816+
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice)
817+
818+
namespace := "test"
819+
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
820+
svc.DeletionTimestamp = tc.deletionTimestamp
821+
esToCreate := &discovery.EndpointSlice{
822+
ObjectMeta: metav1.ObjectMeta{Name: "to-create"},
823+
AddressType: endpointMeta.AddressType,
824+
Ports: endpointMeta.Ports,
825+
}
826+
827+
// Add EndpointSlice that can be updated.
828+
esToUpdate, err := client.DiscoveryV1beta1().EndpointSlices(namespace).Create(context.TODO(), &discovery.EndpointSlice{
829+
ObjectMeta: metav1.ObjectMeta{Name: "to-update"},
830+
AddressType: endpointMeta.AddressType,
831+
Ports: endpointMeta.Ports,
832+
}, metav1.CreateOptions{})
833+
if err != nil {
834+
t.Fatalf("Expected no error creating EndpointSlice during test setup, got %v", err)
835+
}
836+
// Add an endpoint so we can see if this has actually been updated by
837+
// finalize func.
838+
esToUpdate.Endpoints = []discovery.Endpoint{{Addresses: []string{"10.2.3.4"}}}
839+
840+
// Add EndpointSlice that can be deleted.
841+
esToDelete, err := client.DiscoveryV1beta1().EndpointSlices(namespace).Create(context.TODO(), &discovery.EndpointSlice{
842+
ObjectMeta: metav1.ObjectMeta{Name: "to-delete"},
843+
AddressType: endpointMeta.AddressType,
844+
Ports: endpointMeta.Ports,
845+
}, metav1.CreateOptions{})
846+
if err != nil {
847+
t.Fatalf("Expected no error creating EndpointSlice during test setup, got %v", err)
848+
}
849+
850+
slicesToCreate := []*discovery.EndpointSlice{}
851+
if tc.attemptCreate {
852+
slicesToCreate = append(slicesToCreate, esToCreate.DeepCopy())
853+
}
854+
slicesToUpdate := []*discovery.EndpointSlice{}
855+
if tc.attemptUpdate {
856+
slicesToUpdate = append(slicesToUpdate, esToUpdate.DeepCopy())
857+
}
858+
slicesToDelete := []*discovery.EndpointSlice{}
859+
if tc.attemptDelete {
860+
slicesToDelete = append(slicesToDelete, esToDelete.DeepCopy())
861+
}
862+
863+
err = r.finalize(&svc, slicesToCreate, slicesToUpdate, slicesToDelete, time.Now())
864+
if err != nil {
865+
t.Errorf("Error calling r.finalize(): %v", err)
866+
}
867+
868+
fetchedSlices := fetchEndpointSlices(t, client, namespace)
869+
870+
createdSliceFound := false
871+
updatedSliceFound := false
872+
deletedSliceFound := false
873+
for _, epSlice := range fetchedSlices {
874+
if epSlice.Name == esToCreate.Name {
875+
createdSliceFound = true
876+
}
877+
if epSlice.Name == esToUpdate.Name {
878+
updatedSliceFound = true
879+
if tc.attemptUpdate && len(epSlice.Endpoints) != len(esToUpdate.Endpoints) {
880+
t.Errorf("Expected EndpointSlice to be updated with %d endpoints, got %d endpoints", len(esToUpdate.Endpoints), len(epSlice.Endpoints))
881+
}
882+
}
883+
if epSlice.Name == esToDelete.Name {
884+
deletedSliceFound = true
885+
}
886+
}
887+
888+
if createdSliceFound != tc.expectCreatedSlice {
889+
t.Errorf("Expected created EndpointSlice existence to be %t, got %t", tc.expectCreatedSlice, createdSliceFound)
890+
}
891+
892+
if updatedSliceFound != tc.expectUpdatedSlice {
893+
t.Errorf("Expected updated EndpointSlice existence to be %t, got %t", tc.expectUpdatedSlice, updatedSliceFound)
894+
}
895+
896+
if deletedSliceFound != tc.expectDeletedSlice {
897+
t.Errorf("Expected deleted EndpointSlice existence to be %t, got %t", tc.expectDeletedSlice, deletedSliceFound)
898+
}
899+
})
900+
}
901+
}
902+
745903
// Test Helpers
746904

747905
func newReconciler(client *fake.Clientset, nodes []*corev1.Node, maxEndpointsPerSlice int32) *reconciler {

0 commit comments

Comments
 (0)