Skip to content

Commit 5deff60

Browse files
james-milliganskyerusKavindu-Dodan
authored
test: add retry to async podMutator.BackfillPermissions integration test (#303)
Signed-off-by: James Milligan <[email protected]> Signed-off-by: James Milligan <[email protected]> Co-authored-by: Skye Gill <[email protected]> Co-authored-by: Kavindu Dodanduwa <[email protected]>
1 parent 6a78925 commit 5deff60

File tree

5 files changed

+99
-65
lines changed

5 files changed

+99
-65
lines changed

main.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,11 @@ func main() {
219219

220220
setupLog.Info("restoring flagd-kubernetes-sync cluster role binding subjects from current cluster state")
221221
// backfill can be handled asynchronously, so we do not need to block via the channel
222-
go podMutator.BackfillPermissions(ctx, make(chan struct{}, 1))
222+
go func() {
223+
if err := podMutator.BackfillPermissions(ctx); err != nil {
224+
setupLog.Error(err, "problem restoring flagd-kubernetes-sync cluster role binding ")
225+
}
226+
}()
223227

224228
if err := <-errChan; err != nil {
225229
setupLog.Error(err, "problem running manager")

webhooks/pod_webhook.go

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,14 @@ type PodMutator struct {
4949
}
5050

5151
// BackfillPermissions recovers the state of the flagd-kubernetes-sync role binding in the event of upgrade
52-
func (m *PodMutator) BackfillPermissions(ctx context.Context, backfillComplete chan struct{}) {
53-
defer func() {
54-
backfillComplete <- struct{}{}
55-
}()
56-
52+
func (m *PodMutator) BackfillPermissions(ctx context.Context) error {
5753
for i := 0; i < 5; i++ {
5854
// fetch all pods with the "openfeature.dev/enabled" annotation set to "true"
5955
podList := &corev1.PodList{}
6056
err := m.Client.List(ctx, podList, client.MatchingFields{OpenFeatureEnabledAnnotationPath: "true"})
6157
if err != nil {
6258
if !goErr.Is(err, &cache.ErrCacheNotStarted{}) {
63-
m.Log.Error(err, "unable to list annotated pods", "webhook", OpenFeatureEnabledAnnotationPath)
64-
return
59+
return err
6560
}
6661
time.Sleep(1 * time.Second)
6762
continue
@@ -79,14 +74,9 @@ func (m *PodMutator) BackfillPermissions(ctx context.Context, backfillComplete c
7974
)
8075
}
8176
}
82-
return
83-
}
84-
err := goErr.New("unable to backfill permissions for the flagd-kubernetes-sync role binding: timeout")
85-
m.Log.Error(
86-
err,
87-
"webhook",
88-
OpenFeatureEnabledAnnotationPath,
89-
)
77+
return nil
78+
}
79+
return goErr.New("unable to backfill permissions for the flagd-kubernetes-sync role binding: timeout")
9080
}
9181

9282
// Handle injects the flagd sidecar (if the prerequisites are all met)

webhooks/pod_webhook_test.go

Lines changed: 71 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package webhooks
22

33
import (
4+
"errors"
45
"fmt"
6+
"reflect"
7+
"time"
58

69
. "github.com/onsi/ginkgo"
710
. "github.com/onsi/gomega"
@@ -164,24 +167,78 @@ func podMutationWebhookCleanup() {
164167
var _ = Describe("pod mutation webhook", func() {
165168

166169
It("should backfill role binding subjects when annotated pods already exist in the cluster", func() {
167-
pod1 := getPod(existingPod1Name)
168-
pod2 := getPod(existingPod2Name)
169-
// Pod 1 and 2 must not have been mutated by the webhook (we want the rolebinding to be updated via BackfillPermissions)
170-
Expect(len(pod1.Spec.Containers)).To(Equal(1))
171-
Expect(len(pod2.Spec.Containers)).To(Equal(1))
172-
rb := getRoleBinding(clusterRoleBindingName)
173-
Expect(rb.Subjects).To(ContainElement(v1.Subject{
174-
Kind: "ServiceAccount",
175-
APIGroup: "",
176-
Name: existingPod1ServiceAccountName,
177-
Namespace: mutatePodNamespace,
178-
}))
179-
Expect(rb.Subjects).To(ContainElement(v1.Subject{
170+
// this integration test confirms the proper execution of the podMutator.BackfillPermissions method
171+
// this method is responsible for backfilling the subjects of the open-feature-operator-flagd-kubernetes-sync
172+
// cluster role binding, for previously existing pods on startup
173+
// a retry is required on this test as the backfilling occurs asynchronously
174+
var finalError error
175+
for i := 0; i < 3; i++ {
176+
pod1 := getPod(existingPod1Name)
177+
pod2 := getPod(existingPod2Name)
178+
// Pod 1 and 2 must not have been mutated by the webhook (we want the rolebinding to be updated via BackfillPermissions)
179+
180+
if len(pod1.Spec.Containers) != 1 {
181+
finalError = errors.New("pod1 has had a container injected, it should not be mutated by the webhook")
182+
time.Sleep(1 * time.Second)
183+
continue
184+
}
185+
if len(pod2.Spec.Containers) != 1 {
186+
finalError = errors.New("pod2 has had a container injected, it should not be mutated by the webhook")
187+
time.Sleep(1 * time.Second)
188+
continue
189+
}
190+
191+
rb := getRoleBinding(clusterRoleBindingName)
192+
193+
unexpectedServiceAccount := ""
194+
for _, subject := range rb.Subjects {
195+
if !reflect.DeepEqual(subject, v1.Subject{
196+
Kind: "ServiceAccount",
197+
APIGroup: "",
198+
Name: existingPod1ServiceAccountName,
199+
Namespace: mutatePodNamespace,
200+
}) &&
201+
!reflect.DeepEqual(subject, v1.Subject{
202+
Kind: "ServiceAccount",
203+
APIGroup: "",
204+
Name: existingPod2ServiceAccountName,
205+
Namespace: mutatePodNamespace,
206+
}) {
207+
unexpectedServiceAccount = subject.Name
208+
}
209+
}
210+
if unexpectedServiceAccount != "" {
211+
finalError = fmt.Errorf("unexpected subject found in role binding, name: %s", unexpectedServiceAccount)
212+
time.Sleep(1 * time.Second)
213+
continue
214+
}
215+
finalError = nil
216+
break
217+
}
218+
Expect(finalError).ShouldNot(HaveOccurred())
219+
})
220+
221+
It("should update cluster role binding's subjects", func() {
222+
pod := testPod(defaultPodName, defaultPodServiceAccountName, map[string]string{
223+
"openfeature.dev": "enabled",
224+
"openfeature.dev/featureflagconfiguration": fmt.Sprintf("%s/%s", mutatePodNamespace, featureFlagConfigurationName),
225+
})
226+
err := k8sClient.Create(testCtx, pod)
227+
Expect(err).ShouldNot(HaveOccurred())
228+
229+
crb := &v1.ClusterRoleBinding{}
230+
err = k8sClient.Get(testCtx, client.ObjectKey{Name: clusterRoleBindingName}, crb)
231+
Expect(err).ShouldNot(HaveOccurred())
232+
233+
Expect(len(crb.Subjects)).Should(Equal(3))
234+
Expect(crb.Subjects).To(ContainElement(v1.Subject{
180235
Kind: "ServiceAccount",
181236
APIGroup: "",
182-
Name: existingPod2ServiceAccountName,
237+
Name: defaultPodServiceAccountName,
183238
Namespace: mutatePodNamespace,
184239
}))
240+
241+
podMutationWebhookCleanup()
185242
})
186243

187244
It("should create flagd sidecar", func() {
@@ -264,29 +321,6 @@ var _ = Describe("pod mutation webhook", func() {
264321
Expect(err).Should(HaveOccurred())
265322
})
266323

267-
It("should update cluster role binding's subjects", func() {
268-
pod := testPod(defaultPodName, defaultPodServiceAccountName, map[string]string{
269-
"openfeature.dev": "enabled",
270-
"openfeature.dev/featureflagconfiguration": fmt.Sprintf("%s/%s", mutatePodNamespace, featureFlagConfigurationName),
271-
})
272-
err := k8sClient.Create(testCtx, pod)
273-
Expect(err).ShouldNot(HaveOccurred())
274-
275-
crb := &v1.ClusterRoleBinding{}
276-
err = k8sClient.Get(testCtx, client.ObjectKey{Name: clusterRoleBindingName}, crb)
277-
Expect(err).ShouldNot(HaveOccurred())
278-
279-
Expect(len(crb.Subjects)).Should(Equal(3))
280-
Expect(crb.Subjects).To(ContainElement(v1.Subject{
281-
Kind: "ServiceAccount",
282-
APIGroup: "",
283-
Name: defaultPodServiceAccountName,
284-
Namespace: mutatePodNamespace,
285-
}))
286-
287-
podMutationWebhookCleanup()
288-
})
289-
290324
It("should create config map if sync provider isn't kubernetes", func() {
291325
ffConfig := &corev1alpha1.FeatureFlagConfiguration{}
292326
err := k8sClient.Get(

webhooks/run_test.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
// +kubebuilder:scaffold:imports
1616
)
1717

18-
func run(ctx context.Context, cfg *rest.Config, scheme *runtime.Scheme, opts *envtest.WebhookInstallOptions, backfillComplete chan struct{}) error {
18+
func run(ctx context.Context, cfg *rest.Config, scheme *runtime.Scheme, opts *envtest.WebhookInstallOptions) error {
1919
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))
2020

2121
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
@@ -61,13 +61,19 @@ func run(ctx context.Context, cfg *rest.Config, scheme *runtime.Scheme, opts *en
6161
},
6262
})
6363

64-
go func(ctx context.Context, backfillComplete chan struct{}) {
65-
podMutator.BackfillPermissions(ctx, backfillComplete)
66-
}(ctx, backfillComplete)
64+
// podMutator.BackfillPermissions is dependent upon mgr.Start executing correctly
65+
// due to its time.Sleep within the retry loop, mgr.Start will always fail before podMutator.BackfillPermissions
66+
// times out, resulting in the most relevant error being passed into the errChan
67+
// mgr.Start will also only output an error value of nil once the context is closed (this occurs when the test suite is terminating)
68+
errChan := make(chan error, 1)
69+
go func() {
70+
if err := podMutator.BackfillPermissions(ctx); err != nil {
71+
errChan <- err
72+
}
73+
}()
74+
go func() {
75+
errChan <- mgr.Start(ctx)
76+
}()
6777

68-
if err := mgr.Start(ctx); err != nil {
69-
return err
70-
}
71-
72-
return nil
78+
return <-errChan
7379
}

webhooks/suite_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,12 @@ var _ = BeforeSuite(func() {
158158
setupPreviouslyExistingPods()
159159

160160
By("running webhook server")
161-
backfillComplete := make(chan struct{}, 1)
162161
go func() {
163-
if err := run(testCtx, cfg, scheme, &testEnv.WebhookInstallOptions, backfillComplete); err != nil {
162+
if err := run(testCtx, cfg, scheme, &testEnv.WebhookInstallOptions); err != nil {
164163
logf.Log.Error(err, "run webhook server")
165164
}
166165
}()
166+
167167
d := &net.Dialer{Timeout: time.Second}
168168
Eventually(func() error {
169169
serverURL := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort)
@@ -178,7 +178,7 @@ var _ = BeforeSuite(func() {
178178
}
179179
return nil
180180
}).Should(Succeed())
181-
<-backfillComplete
181+
182182
By("setting up resources")
183183
setupMutatePodResources()
184184
setupValidateFeatureFlagConfigurationResources()

0 commit comments

Comments
 (0)