Skip to content

Commit 3495738

Browse files
Merge pull request #745 from dmage/precheck
Bug 2048186: Fix panic in finalizer handler
2 parents 7023e84 + ebef866 commit 3495738

File tree

10 files changed

+181
-41
lines changed

10 files changed

+181
-41
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
tmp/_output
55
tmp/_test
66
deploy/test
7-
cluster-image-registry-operator
87

98
#idea dev tools
109
.idea/

pkg/operator/controller.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ func (c *Controller) sync() error {
278278
}
279279
c.syncStatus(cr, deploy, routes, applyError)
280280

281-
metadataChanged := strategy.Metadata(&prevCR.ObjectMeta, &cr.ObjectMeta)
281+
metadataChanged := strategy.Metadata(prevCR.ObjectMeta.DeepCopy(), &cr.ObjectMeta)
282282
specChanged := !reflect.DeepEqual(prevCR.Spec, cr.Spec)
283283
if metadataChanged || specChanged {
284284
difference, err := object.DiffString(prevCR, cr)
@@ -299,7 +299,12 @@ func (c *Controller) sync() error {
299299
updatedCR.ObjectMeta = cr.ObjectMeta
300300
}
301301
if specChanged {
302+
// FIXME: Here be dragons. The operator can
303+
// accidentally lose user-provided
304+
// configuration.
305+
managementState := updatedCR.Spec.ManagementState
302306
updatedCR.Spec = cr.Spec
307+
updatedCR.Spec.ManagementState = managementState
303308
}
304309

305310
updatedCR, err = c.clients.RegOp.ImageregistryV1().Configs().Update(
@@ -313,6 +318,10 @@ func (c *Controller) sync() error {
313318
// If we updated the Status field too, we'll make one more call and we
314319
// want it to succeed.
315320
cr.ResourceVersion = updatedCR.ResourceVersion
321+
322+
// Update prevCR to make diff accurate.
323+
prevCR.ObjectMeta = updatedCR.ObjectMeta
324+
prevCR.Spec = updatedCR.Spec
316325
}
317326

318327
cr.Status.ObservedGeneration = cr.Generation

pkg/operator/finalizer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (c *Controller) finalizeResources(o *imageregistryv1.Config) error {
5757
if cr == nil {
5858
// Skip using the cache here so we don't have as many
5959
// retries due to slow cache updates
60-
cr, err := client.Configs().Get(
60+
cr, err = client.Configs().Get(
6161
context.TODO(), o.Name, metav1.GetOptions{},
6262
)
6363
if err != nil {

test/e2e/aws_test.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -760,12 +760,6 @@ func TestAWSFinalizerDeleteS3Bucket(t *testing.T) {
760760
if exists {
761761
t.Errorf("s3 bucket should have been deleted, but it wasn't: %s", err)
762762
}
763-
764-
// Once the config object is deleted, the operator will create a new one.
765-
// The testing framework does not expect creation of a new config object
766-
// when it is in the teardown stage, so let's wait until the operator
767-
// stabilizes again.
768-
framework.WaitUntilImageRegistryIsAvailable(te)
769763
}
770764

771765
// createAWSConfigFile creates an AWS credentials config based on the contents of the Secret

test/framework/clusteroperators.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func AreClusterOperatorsHealthy(cos []configv1.ClusterOperator) error {
3434
return utilerrors.NewAggregate(errs)
3535
}
3636

37-
func EnsureClusterOperatorsAreHealthy(te TestEnv, interval, timeout time.Duration) {
37+
func WaitUntilClusterOperatorsAreHealthy(te TestEnv, interval, timeout time.Duration) {
3838
ctx := context.Background()
3939
start := time.Now()
4040
var lastErr error

test/framework/imageregistry.go

Lines changed: 79 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
appsv1 "k8s.io/api/apps/v1"
1111
"k8s.io/apimachinery/pkg/api/errors"
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/labels"
1314
"k8s.io/apimachinery/pkg/types"
1415
"k8s.io/apimachinery/pkg/util/wait"
1516
"k8s.io/client-go/util/retry"
@@ -88,36 +89,84 @@ func (c ImageRegistryConditions) String() string {
8889
)
8990
}
9091

91-
func ensureImageRegistryToBeRemoved(te TestEnv) {
92+
func removeImageRegistry(te TestEnv) {
93+
te.Logf("uninstalling the image registry...")
94+
95+
operatorDeployment, err := te.Client().Deployments(OperatorDeploymentNamespace).Get(
96+
context.Background(), OperatorDeploymentName, metav1.GetOptions{},
97+
)
98+
if err != nil {
99+
te.Fatalf("unable to get the operator deployment: %s", err)
100+
}
101+
102+
if !isDeploymentRolledOut(operatorDeployment) {
103+
te.Errorf("unexepected state: the operator is not rolled out before removing the image registry")
104+
}
105+
106+
if operatorDeployment.Spec.Replicas != nil && *operatorDeployment.Spec.Replicas == 0 {
107+
config, err := te.Client().Configs().Get(
108+
context.Background(), defaults.ImageRegistryResourceName, metav1.GetOptions{},
109+
)
110+
if errors.IsNotFound(err) {
111+
return
112+
} else if err != nil {
113+
te.Fatalf("unable to get the image registry config: %s", err)
114+
}
115+
conds := GetImageRegistryConditions(config)
116+
if !conds.Removed.IsTrue() {
117+
te.Fatalf("unable to uninstall the image registry: the operator is shutted down, but the image registry is not removed: %s", config.Spec.ManagementState, conds)
118+
}
119+
return
120+
}
121+
122+
err = wait.PollImmediate(2*time.Second, 30*time.Second, func() (stop bool, err error) {
123+
cr, err := te.Client().Configs().Get(
124+
context.Background(), defaults.ImageRegistryResourceName, metav1.GetOptions{},
125+
)
126+
if err != nil {
127+
te.Logf("the image registry config is not found: %s", err)
128+
return false, nil
129+
}
130+
if cr.DeletionTimestamp != nil {
131+
te.Logf("the image registry config is being deleted: %s", cr.DeletionTimestamp)
132+
return false, nil
133+
}
134+
return true, nil
135+
})
136+
if err != nil {
137+
te.Fatalf("failed to wait until the operator creates the config object: %s", err)
138+
}
139+
92140
if _, err := te.Client().Configs().Patch(
93141
context.Background(),
94142
defaults.ImageRegistryResourceName,
95143
types.MergePatchType,
96144
[]byte(`{"spec": {"managementState": "Removed"}}`),
97145
metav1.PatchOptions{},
98146
); err != nil {
99-
if errors.IsNotFound(err) {
100-
// That's not exactly what we are asked for. And few seconds later
101-
// the operator may bootstrap it. However, if the operator is
102-
// disabled, it means the registry is not installed and we're
103-
// already in the desired state.
104-
return
105-
}
106147
te.Fatalf("unable to uninstall the image registry: %s", err)
107148
}
108149

109150
var cr *imageregistryapiv1.Config
110-
err := wait.Poll(5*time.Second, AsyncOperationTimeout, func() (stop bool, err error) {
151+
err = wait.Poll(5*time.Second, AsyncOperationTimeout, func() (stop bool, err error) {
111152
cr, err = te.Client().Configs().Get(
112153
context.Background(), defaults.ImageRegistryResourceName, metav1.GetOptions{},
113154
)
114155
if errors.IsNotFound(err) {
156+
te.Logf("waiting for the registry to be removed: the config object does not exist?!")
115157
cr = nil
116158
return true, nil
117159
} else if err != nil {
160+
te.Logf("waiting for the registry to be removed: %s", err)
118161
return false, err
119162
}
120163

164+
if cr.Spec.ManagementState != "Removed" {
165+
DumpYAML(te, "unexpected management state in the config object", cr)
166+
DumpOperatorLogs(te)
167+
te.Fatalf("unexpected management state: got %s, want Removed", cr.Spec.ManagementState)
168+
}
169+
121170
conds := GetImageRegistryConditions(cr)
122171
te.Logf("waiting for the registry to be removed: %s", conds)
123172
return conds.Progressing.IsFalse() && conds.Removed.IsTrue(), nil
@@ -130,6 +179,7 @@ func ensureImageRegistryToBeRemoved(te TestEnv) {
130179
}
131180

132181
func deleteImageRegistryConfig(te TestEnv) {
182+
te.Logf("deleting the image registry config...")
133183
// TODO(dmage): the finalizer should be removed by the operator
134184
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
135185
cr, err := te.Client().Configs().Get(
@@ -165,6 +215,19 @@ func deleteImageRegistryConfig(te TestEnv) {
165215
}
166216
}
167217

218+
func deleteLeaderElectionConfigMap(te TestEnv, name string) {
219+
err := te.Client().ConfigMaps(OperatorDeploymentNamespace).Delete(
220+
context.Background(),
221+
name,
222+
metav1.DeleteOptions{},
223+
)
224+
if err == nil {
225+
te.Logf("leader election configmap %s deleted", name)
226+
} else if !errors.IsNotFound(err) {
227+
te.Errorf("unable to delete leader election configmap %s: %s", name, err)
228+
}
229+
}
230+
168231
func deleteNodeCADaemonSet(te TestEnv) {
169232
ds, err := te.Client().DaemonSets(defaults.ImageRegistryOperatorNamespace).Get(
170233
context.Background(),
@@ -224,18 +287,16 @@ func deleteImageRegistryCertificates(te TestEnv) {
224287
}
225288

226289
func deleteImageRegistryAlwaysPresentResources(te TestEnv) {
290+
te.Logf("deleting always-present resources...")
227291
defer deleteImageRegistryCertificates(te)
228292
defer deleteNodeCADaemonSet(te)
293+
defer deleteLeaderElectionConfigMap(te, "openshift-master-controllers")
229294
}
230295

231296
func RemoveImageRegistry(te TestEnv) {
232-
te.Logf("uninstalling the image registry...")
233-
ensureImageRegistryToBeRemoved(te)
234-
te.Logf("stopping the operator...")
297+
removeImageRegistry(te)
235298
StopDeployment(te, OperatorDeploymentNamespace, OperatorDeploymentName)
236-
te.Logf("deleting the image registry config...")
237299
deleteImageRegistryConfig(te)
238-
te.Logf("deleting always-present resources...")
239300
deleteImageRegistryAlwaysPresentResources(te)
240301
}
241302

@@ -255,7 +316,6 @@ func DeployImageRegistry(te TestEnv, spec *imageregistryapiv1.ImageRegistrySpec)
255316
}
256317
}
257318

258-
te.Logf("starting the operator...")
259319
startOperator(te)
260320
}
261321

@@ -505,10 +565,10 @@ func SetupAvailableImageRegistry(t *testing.T, spec *imageregistryapiv1.ImageReg
505565
}
506566

507567
func TeardownImageRegistry(te TestEnv) {
508-
defer func() {
509-
RemoveImageRegistry(te)
510-
EnsureClusterOperatorsAreHealthy(te, 10*time.Second, AsyncOperationTimeout)
511-
}()
568+
defer WaitUntilClusterOperatorsAreHealthy(te, 10*time.Second, AsyncOperationTimeout)
569+
defer CheckAbsenceOfOperatorPods(te)
570+
defer RemoveImageRegistry(te)
571+
defer CheckPodsAreNotRestarted(te, labels.Everything())
512572
if te.Failed() {
513573
DumpImageRegistryResource(te)
514574
DumpOperatorDeployment(te)

test/framework/logs.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"io"
88
"regexp"
9+
"strings"
910

1011
corev1 "k8s.io/api/core/v1"
1112
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -44,7 +45,7 @@ func (psl PodSetLogs) Contains(re *regexp.Regexp) bool {
4445
return false
4546
}
4647

47-
func GetLogsByLabelSelector(client *Clientset, namespace string, labelSelector *metav1.LabelSelector) (PodSetLogs, error) {
48+
func GetLogsByLabelSelector(client *Clientset, namespace string, labelSelector *metav1.LabelSelector, previous bool) (PodSetLogs, error) {
4849
selector, err := metav1.LabelSelectorAsSelector(labelSelector)
4950
if err != nil {
5051
return nil, err
@@ -61,7 +62,7 @@ func GetLogsByLabelSelector(client *Clientset, namespace string, labelSelector *
6162

6263
podLogs := make(PodSetLogs)
6364
for _, pod := range podList.Items {
64-
podLog, err := readPodLogs(client, &pod)
65+
podLog, err := readPodLogs(client, &pod, previous)
6566
if err != nil {
6667
return nil, err
6768
}
@@ -76,7 +77,7 @@ func GetLogsForPod(client *Clientset, namespace string, podName string) (PodSetL
7677
if err != nil {
7778
return nil, err
7879
}
79-
podLogs, err := readPodLogs(client, pod)
80+
podLogs, err := readPodLogs(client, pod, false)
8081
if err != nil {
8182
return nil, err
8283
}
@@ -85,12 +86,13 @@ func GetLogsForPod(client *Clientset, namespace string, podName string) (PodSetL
8586
return podSetLogs, nil
8687
}
8788

88-
func readPodLogs(client *Clientset, pod *corev1.Pod) (PodLog, error) {
89+
func readPodLogs(client *Clientset, pod *corev1.Pod, previous bool) (PodLog, error) {
8990
podLog := make(PodLog)
9091
for _, container := range pod.Spec.Containers {
9192
var containerLog ContainerLog
9293
log, err := client.Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{
9394
Container: container.Name,
95+
Previous: previous,
9496
}).Stream(context.Background())
9597
if err != nil {
9698
return nil, fmt.Errorf("failed to get logs for pod %s: %s", pod.Name, err)
@@ -114,13 +116,16 @@ func readPodLogs(client *Clientset, pod *corev1.Pod) (PodLog, error) {
114116

115117
func DumpPodLogs(logger Logger, podLogs PodSetLogs) {
116118
if len(podLogs) > 0 {
117-
for pod, logs := range podLogs {
118-
logger.Logf("=== logs for pod/%s", pod)
119-
for _, line := range logs {
120-
logger.Logf("%s", line)
119+
for pod, containers := range podLogs {
120+
for container, logs := range containers {
121+
var buf strings.Builder
122+
fmt.Fprintf(&buf, "logs for pod/%s (container %s)\n", pod, container)
123+
for _, line := range logs {
124+
fmt.Fprintf(&buf, "%s", line)
125+
}
126+
logger.Logf("%s", buf.String())
121127
}
122128
}
123-
logger.Logf("=== end of logs")
124129
}
125130
}
126131

test/framework/operator.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package framework
22

33
import (
44
"context"
5+
"strings"
56
"time"
67

78
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -10,6 +11,7 @@ import (
1011
)
1112

1213
func startOperator(te TestEnv) {
14+
te.Logf("starting the operator...")
1315
if _, err := te.Client().Deployments(OperatorDeploymentNamespace).Patch(
1416
context.Background(),
1517
OperatorDeploymentName,
@@ -32,7 +34,25 @@ func DumpOperatorDeployment(te TestEnv) {
3234
DumpYAML(te, "the operator deployment", deployment)
3335
}
3436

37+
func CheckAbsenceOfOperatorPods(te TestEnv) {
38+
pods, err := te.Client().Pods(OperatorDeploymentNamespace).List(context.Background(), metav1.ListOptions{})
39+
if err != nil {
40+
te.Fatalf("failed to list the pods: %s", err)
41+
}
42+
i := 0
43+
for _, pod := range pods.Items {
44+
if strings.HasPrefix(pod.Name, OperatorDeploymentName+"-") {
45+
te.Errorf("unexpected operator pod %s (%s old)", pod.Name, time.Since(pod.CreationTimestamp.Time))
46+
i++
47+
}
48+
}
49+
if i > 0 {
50+
te.Fatalf("found %d unexpected operator pod(s)", i)
51+
}
52+
}
53+
3554
func StopDeployment(te TestEnv, namespace, name string) {
55+
te.Logf("scaling down the deployment %s/%s...", namespace, name)
3656
var realErr error
3757
err := wait.Poll(1*time.Second, 30*time.Second, func() (bool, error) {
3858
if _, realErr = te.Client().Deployments(namespace).Patch(
@@ -59,7 +79,7 @@ func GetOperatorLogs(client *Clientset) (PodSetLogs, error) {
5979
MatchLabels: map[string]string{
6080
"name": "cluster-image-registry-operator",
6181
},
62-
})
82+
}, false)
6383
}
6484

6585
func DumpOperatorLogs(te TestEnv) {

0 commit comments

Comments
 (0)