Skip to content

Commit e7bc442

Browse files
authored
Re-work how we wait for API/Ingress to update (#82)
* Re-work how we wait for API/Ingress to update * check for error * move ResetRoutes out of verifyDomain
1 parent 55338a5 commit e7bc442

File tree

5 files changed

+104
-62
lines changed

5 files changed

+104
-62
lines changed

config/rbac/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ rules:
129129
- get
130130
- list
131131
- watch
132+
- apiGroups:
133+
- config.openshift.io
134+
resources:
135+
- dnses
136+
verbs:
137+
- get
138+
- list
139+
- watch
132140
- apiGroups:
133141
- config.openshift.io
134142
resources:

controllers/clusterrelocation_controller.go

Lines changed: 88 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ package controllers
1818

1919
import (
2020
"context"
21+
"crypto/tls"
2122
"fmt"
2223
"net"
24+
"time"
2325

2426
rhsysenggithubiov1beta1 "github.com/RHsyseng/cluster-relocation-operator/api/v1beta1"
2527
reconcileACM "github.com/RHsyseng/cluster-relocation-operator/internal/acm"
@@ -31,6 +33,7 @@ import (
3133
reconcilePullSecret "github.com/RHsyseng/cluster-relocation-operator/internal/pullSecret"
3234
registryCert "github.com/RHsyseng/cluster-relocation-operator/internal/registryCert"
3335
reconcileSSH "github.com/RHsyseng/cluster-relocation-operator/internal/ssh"
36+
"github.com/RHsyseng/cluster-relocation-operator/internal/util"
3437
agentv1 "github.com/stolostron/klusterlet-addon-controller/pkg/apis/agent/v1"
3538
clusterv1 "open-cluster-management.io/api/cluster/v1"
3639
operatorapiv1 "open-cluster-management.io/api/operator/v1"
@@ -75,6 +78,7 @@ const relocationFinalizer = "relocationfinalizer"
7578
//+kubebuilder:rbac:groups="",resources=secrets,verbs=watch;list
7679
//+kubebuilder:rbac:groups="",resources=configmaps,verbs=watch;list
7780
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions,verbs=get;watch;list
81+
//+kubebuilder:rbac:groups=config.openshift.io,resources=dnses,verbs=get;watch;list
7882
//+kubebuilder:rbac:groups=config.openshift.io,resources=imagedigestmirrorsets,verbs=watch;list
7983
//+kubebuilder:rbac:groups=operators.coreos.com,resources=catalogsources,verbs=watch;list
8084
//+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigs,verbs=watch;list
@@ -181,24 +185,6 @@ func (r *ClusterRelocationReconciler) Reconcile(ctx context.Context, req ctrl.Re
181185
return ctrl.Result{}, err
182186
}
183187

184-
// Applies a new certificate and domain alias to the API server
185-
if err := reconcileAPI.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
186-
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.APIReconciliationFailedReason, err.Error())
187-
return ctrl.Result{}, err
188-
}
189-
190-
// Applies a new certificate and domain alias to the Apps ingressesed
191-
if err := reconcileIngress.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
192-
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.IngressReconciliationFailedReason, err.Error())
193-
return ctrl.Result{}, err
194-
}
195-
196-
// Apply a new cluster-wide pull secret
197-
if err := reconcilePullSecret.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
198-
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.PullSecretReconciliationFailedReason, err.Error())
199-
return ctrl.Result{}, err
200-
}
201-
202188
// Applies a SSH key for the 'core' user
203189
if err := reconcileSSH.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
204190
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.SSHReconciliationFailedReason, err.Error())
@@ -217,12 +203,40 @@ func (r *ClusterRelocationReconciler) Reconcile(ctx context.Context, req ctrl.Re
217203
return ctrl.Result{}, err
218204
}
219205

206+
// Apply a new cluster-wide pull secret
207+
if err := reconcilePullSecret.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
208+
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.PullSecretReconciliationFailedReason, err.Error())
209+
return ctrl.Result{}, err
210+
}
211+
220212
// Applies new catalog sources
221213
if err := reconcileCatalog.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
222214
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.CatalogReconciliationFailedReason, err.Error())
223215
return ctrl.Result{}, err
224216
}
225217

218+
// Applies a new certificate and domain alias to the Ingress
219+
if err := reconcileIngress.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
220+
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.IngressReconciliationFailedReason, err.Error())
221+
return ctrl.Result{}, err
222+
}
223+
224+
// Applies a new certificate and domain alias to the API server
225+
if err := reconcileAPI.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
226+
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.APIReconciliationFailedReason, err.Error())
227+
return ctrl.Result{}, err
228+
}
229+
230+
if err := r.verifyDomain(ctx, relocation.Spec.Domain, logger); err != nil {
231+
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.InProgressReconciliationFailedReason, err.Error())
232+
return ctrl.Result{}, err
233+
}
234+
235+
if err := reconcileIngress.ResetRoutes(ctx, r.Client, fmt.Sprintf("apps.%s", relocation.Spec.Domain), logger); err != nil {
236+
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.InProgressReconciliationFailedReason, err.Error())
237+
return ctrl.Result{}, err
238+
}
239+
226240
// Registers to ACM
227241
if err := reconcileACM.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
228242
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.ACMReconciliationFailedReason, err.Error())
@@ -328,12 +342,68 @@ func (r *ClusterRelocationReconciler) finalizeRelocation(ctx context.Context, lo
328342
if err := reconcileAPI.Cleanup(ctx, r.Client, logger); err != nil {
329343
return err
330344
}
345+
346+
clusterDNS := &configv1.DNS{}
347+
if err := r.Client.Get(ctx, types.NamespacedName{Name: "cluster"}, clusterDNS); err != nil {
348+
return err
349+
}
350+
if err := r.verifyDomain(ctx, clusterDNS.Spec.BaseDomain, logger); err != nil {
351+
return err
352+
}
353+
354+
if err := reconcileIngress.ResetRoutes(ctx, r.Client, fmt.Sprintf("apps.%s", clusterDNS.Spec.BaseDomain), logger); err != nil {
355+
return err
356+
}
331357
}
332358

333359
logger.Info("Successfully finalized ClusterRelocation")
334360
return nil
335361
}
336362

363+
func (r *ClusterRelocationReconciler) verifyDomain(ctx context.Context, domainName string, logger logr.Logger) error {
364+
urls := []map[string]string{
365+
{
366+
"type": "ingress",
367+
"url": fmt.Sprintf("test.apps.%s:443", domainName),
368+
"commonName": fmt.Sprintf("*.apps.%s", domainName),
369+
},
370+
{
371+
"type": "kube-apiserver",
372+
"url": fmt.Sprintf("api.%s:6443", domainName),
373+
"commonName": fmt.Sprintf("api.%s", domainName),
374+
},
375+
}
376+
377+
for _, v := range urls {
378+
updated := false
379+
for {
380+
conn, err := tls.Dial("tcp", v["url"], &tls.Config{InsecureSkipVerify: true})
381+
if err != nil {
382+
return err
383+
}
384+
certs := conn.ConnectionState().PeerCertificates
385+
conn.Close()
386+
for _, cert := range certs {
387+
if cert.Subject.CommonName == v["commonName"] {
388+
updated = true
389+
}
390+
}
391+
if updated {
392+
// ensure that ClusterOperator has settled
393+
if err := util.WaitForCO(ctx, r.Client, logger, v["type"]); err != nil {
394+
return err
395+
}
396+
break
397+
} else {
398+
logger.Info(fmt.Sprintf("Waiting for %s to update", v["type"]))
399+
time.Sleep(time.Second * 10)
400+
}
401+
}
402+
}
403+
404+
return nil
405+
}
406+
337407
func (r *ClusterRelocationReconciler) installSchemes() error {
338408
if err := configv1.Install(r.Scheme); err != nil { // Add config.openshift.io/v1 to the scheme
339409
return err

internal/api/reconcile.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66

77
rhsysenggithubiov1beta1 "github.com/RHsyseng/cluster-relocation-operator/api/v1beta1"
88
secrets "github.com/RHsyseng/cluster-relocation-operator/internal/secrets"
9-
"github.com/RHsyseng/cluster-relocation-operator/internal/util"
109
"github.com/go-logr/logr"
1110

1211
configv1 "github.com/openshift/api/config/v1"
@@ -117,9 +116,6 @@ func Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme, rel
117116
return err
118117
}
119118
if op != controllerutil.OperationResultNone {
120-
if err := util.WaitForCO(ctx, c, logger, "kube-apiserver", true); err != nil {
121-
return err
122-
}
123119
logger.Info("APIServer modified", "OperationResult", op)
124120
}
125121
return nil
@@ -137,11 +133,6 @@ func Cleanup(ctx context.Context, c client.Client, logger logr.Logger) error {
137133
return err
138134
}
139135
if op != controllerutil.OperationResultNone {
140-
// if we let the finalizer finish before the API server has updated, it will delete a MachineConfig and cause a reboot
141-
// if the node reboots before the API server has updated, it can cause the API server to lock up on the next boot
142-
if err := util.WaitForCO(ctx, c, logger, "kube-apiserver", true); err != nil {
143-
return err
144-
}
145136
logger.Info("APIServer reverted to original state", "OperationResult", op)
146137
}
147138
return nil

internal/ingress/reconcile.go

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,6 @@ func Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme, rel
144144
}
145145

146146
if op != controllerutil.OperationResultNone {
147-
if err := util.WaitForCO(ctx, c, logger, "ingress", true); err != nil {
148-
return err
149-
}
150147
logger.Info("IngressController modified", "OperationResult", op)
151148
}
152149

@@ -186,16 +183,9 @@ func Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme, rel
186183
}
187184

188185
if op != controllerutil.OperationResultNone {
189-
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver", true); err != nil {
190-
return err
191-
}
192186
logger.Info("Ingress domain aliases modified", "OperationResult", op)
193187
}
194188

195-
if err := resetRoutes(ctx, c, fmt.Sprintf("apps.%s", relocation.Spec.Domain), logger); err != nil {
196-
return err
197-
}
198-
199189
return nil
200190
}
201191

@@ -213,9 +203,6 @@ func Cleanup(ctx context.Context, c client.Client, logger logr.Logger) error {
213203
return err
214204
}
215205
if op != controllerutil.OperationResultNone {
216-
if err := util.WaitForCO(ctx, c, logger, "ingress", true); err != nil {
217-
return err
218-
}
219206
logger.Info("Ingress Controller reverted to original state", "OperationResult", op)
220207
}
221208
ingress := &configv1.Ingress{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}}
@@ -228,34 +215,27 @@ func Cleanup(ctx context.Context, c client.Client, logger logr.Logger) error {
228215
return err
229216
}
230217
if op != controllerutil.OperationResultNone {
231-
// let the openshift-apiserver operator settle before deleting the Routes
232-
// this ensures that the Routes get the proper domain when they are re-created
233-
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver", true); err != nil {
234-
return err
235-
}
236218
logger.Info("Cluster Ingress reverted to original state", "OperationResult", op)
237-
238-
}
239-
240-
if err := resetRoutes(ctx, c, ingress.Spec.Domain, logger); err != nil { // reset routes to their original domain if needed
241-
return err
242219
}
243220

244221
return nil
245222
}
246223

247-
func resetRoutes(ctx context.Context, c client.Client, domainName string, logger logr.Logger) error {
248-
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver", false); err != nil {
224+
func ResetRoutes(ctx context.Context, c client.Client, domainName string, logger logr.Logger) error {
225+
routes := &routev1.RouteList{}
226+
if err := c.List(ctx, routes); err != nil {
249227
return err
250228
}
251229

252-
routes := &routev1.RouteList{}
253-
if err := c.List(ctx, routes); err != nil {
230+
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver"); err != nil {
254231
return err
255232
}
256233

257234
for _, v := range routes.Items {
258235
if v.Namespace == "openshift-console" || v.Namespace == "openshift-authentication" || v.Namespace == "open-cluster-management-agent-addon" {
236+
// open-cluster-management-agent-addon is ignored because right now the Klusterlet Add-on ignores the "appsDomain" setting
237+
// A PR has been opened to correct this: https://github.com/stolostron/multicloud-operators-foundation/pull/642
238+
// without this fix, the Route created by the Klusterlet is always re-created with the original domain
259239
continue
260240
}
261241
for _, w := range v.Status.Ingress {

internal/util/util.go

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,7 @@ import (
1414
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusteroperators,verbs=get;list;watch
1515

1616
// Waits for the operator to update before returning
17-
func WaitForCO(ctx context.Context, c client.Client, logger logr.Logger, operator string, waitProgressingTrue bool) error {
18-
if waitProgressingTrue {
19-
logger.Info(fmt.Sprintf("Waiting for %s Progressing to be %s", operator, configv1.ConditionTrue))
20-
if err := waitStatus(ctx, c, logger, operator, configv1.OperatorProgressing, configv1.ConditionTrue); err != nil {
21-
return err
22-
}
23-
}
24-
17+
func WaitForCO(ctx context.Context, c client.Client, logger logr.Logger, operator string) error {
2518
logger.Info(fmt.Sprintf("Waiting for %s Progressing to be %s", operator, configv1.ConditionFalse))
2619
if err := waitStatus(ctx, c, logger, operator, configv1.OperatorProgressing, configv1.ConditionFalse); err != nil {
2720
return err

0 commit comments

Comments
 (0)