Skip to content

Commit bf998e8

Browse files
authored
NETOBSERV-2494: refresh cluster info periodically (#2152)
* NETOBSERV-2494: refresh cluster info periodically - Periodic refresh of cluser info; it requires a mutex - Status update after refresh - Fix issue with static plugin showing "Could not determine if static plugin is supported" because cluster info isn't ready (this error still happens but now it will be retried instead of ignoring the error) * Fix errors at static controller startup - Refactor the controllers Start methods to return an optional post-create hook. This hook is passed to the manager and is going to be called only after the cache was initialized - Use exponential backoff for static controller retries - Fail when all attempts failed - In tests, set up openshift version to 4.20 (anything that supports static plusing would work as well)
1 parent 8997e75 commit bf998e8

File tree

11 files changed

+202
-87
lines changed

11 files changed

+202
-87
lines changed

internal/controller/consoleplugin/consoleplugin_static_reconciler.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,9 @@ func (r *CPReconciler) ReconcileStaticPlugin(ctx context.Context, enable bool) e
3838

3939
// Reconcile is the reconciler entry point to reconcile the static plugin state with the desired configuration
4040
func (r *CPReconciler) reconcileStatic(ctx context.Context, desired *flowslatest.FlowCollector) error {
41-
l := log.FromContext(ctx).WithName("console-plugin")
41+
l := log.FromContext(ctx).WithName("static-console-plugin")
4242
ctx = log.IntoContext(ctx, l)
4343

44-
// Skip static reconciler on older OpenShift (feature not implemented)
45-
if less415, _, err := r.ClusterInfo.IsOpenShiftVersionLessThan("4.15.0"); less415 {
46-
l.Info("Static plugin not supported for this version of OpenShift; skipping")
47-
r.Managed.TryDeleteAll(ctx)
48-
return nil
49-
} else if err != nil {
50-
l.Error(err, "Could not determine if static plugin is supported; proceed with deploying")
51-
}
52-
5344
// Retrieve current owned objects
5445
err := r.Managed.FetchAll(ctx)
5546
if err != nil {

internal/controller/flowcollector_controller.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type FlowCollectorReconciler struct {
3737
watcher *watchers.Watcher
3838
}
3939

40-
func Start(ctx context.Context, mgr *manager.Manager) error {
40+
func Start(ctx context.Context, mgr *manager.Manager) (manager.PostCreateHook, error) {
4141
log := log.FromContext(ctx)
4242
log.Info("Starting FlowCollector controller")
4343
r := FlowCollectorReconciler{
@@ -70,11 +70,11 @@ func Start(ctx context.Context, mgr *manager.Manager) error {
7070

7171
ctrl, err := builder.Build(&r)
7272
if err != nil {
73-
return err
73+
return nil, err
7474
}
7575
r.watcher = watchers.NewWatcher(ctrl)
7676

77-
return nil
77+
return nil, nil
7878
}
7979

8080
// Reconcile is part of the main kubernetes reconciliation loop which aims to

internal/controller/flp/flp_controller.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ type Reconciler struct {
3535
currentNamespace string
3636
}
3737

38-
func Start(ctx context.Context, mgr *manager.Manager) error {
38+
func Start(ctx context.Context, mgr *manager.Manager) (manager.PostCreateHook, error) {
3939
log := log.FromContext(ctx)
4040
log.Info("Starting Flowlogs Pipeline parent controller")
4141

@@ -66,11 +66,11 @@ func Start(ctx context.Context, mgr *manager.Manager) error {
6666

6767
ctrl, err := builder.Build(&r)
6868
if err != nil {
69-
return err
69+
return nil, err
7070
}
7171
r.watcher = watchers.NewWatcher(ctrl)
7272

73-
return nil
73+
return nil, nil
7474
}
7575

7676
type subReconciler interface {

internal/controller/monitoring/monitoring_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ type Reconciler struct {
3333
currentNamespace string
3434
}
3535

36-
func Start(ctx context.Context, mgr *manager.Manager) error {
36+
func Start(ctx context.Context, mgr *manager.Manager) (manager.PostCreateHook, error) {
3737
log := log.FromContext(ctx)
3838
log.Info("Starting Monitoring controller")
3939
r := Reconciler{
4040
Client: mgr.Client,
4141
mgr: mgr,
4242
status: mgr.Status.ForComponent(status.Monitoring),
4343
}
44-
return ctrl.NewControllerManagedBy(mgr).
44+
return nil, ctrl.NewControllerManagedBy(mgr).
4545
For(&flowslatest.FlowCollector{}, reconcilers.IgnoreStatusChange).
4646
Named("monitoring").
4747
Owns(&corev1.Namespace{}, reconcilers.UpdateOrDeleteOnlyPred).

internal/controller/networkpolicy/np_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@ type Reconciler struct {
2222
status status.Instance
2323
}
2424

25-
func Start(ctx context.Context, mgr *manager.Manager) error {
25+
func Start(ctx context.Context, mgr *manager.Manager) (manager.PostCreateHook, error) {
2626
log := log.FromContext(ctx)
2727
log.Info("Starting Network Policy controller")
2828
r := Reconciler{
2929
Client: mgr.Client,
3030
mgr: mgr,
3131
status: mgr.Status.ForComponent(status.NetworkPolicy),
3232
}
33-
return ctrl.NewControllerManagedBy(mgr).
33+
return nil, ctrl.NewControllerManagedBy(mgr).
3434
For(&flowslatest.FlowCollector{}, reconcilers.IgnoreStatusChange).
3535
Named("networkPolicy").
3636
Owns(&networkingv1.NetworkPolicy{}, reconcilers.UpdateOrDeleteOnlyPred).

internal/controller/static/static_controller.go

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"fmt"
66
"time"
77

8+
"k8s.io/apimachinery/pkg/util/wait"
9+
"k8s.io/client-go/util/retry"
810
ctrl "sigs.k8s.io/controller-runtime"
911
"sigs.k8s.io/controller-runtime/pkg/client"
1012
"sigs.k8s.io/controller-runtime/pkg/log"
@@ -17,8 +19,14 @@ import (
1719
"github.com/netobserv/network-observability-operator/internal/pkg/manager/status"
1820
)
1921

20-
const (
21-
initReconcileAttempts = 5
22+
var (
23+
retryBackoff = wait.Backoff{
24+
Steps: 6,
25+
Duration: 2 * time.Second,
26+
Factor: 2,
27+
Jitter: 0.1,
28+
}
29+
clog = log.Log.WithName("static-controller")
2230
)
2331

2432
type Reconciler struct {
@@ -27,62 +35,66 @@ type Reconciler struct {
2735
status status.Instance
2836
}
2937

30-
func Start(ctx context.Context, mgr *manager.Manager) error {
38+
func Start(ctx context.Context, mgr *manager.Manager) (manager.PostCreateHook, error) {
3139
log := log.FromContext(ctx)
3240
log.Info("Starting Static controller")
3341
r := Reconciler{
3442
Client: mgr.Client,
3543
mgr: mgr,
36-
status: mgr.Status.ForComponent(status.StaticPlugin),
44+
status: mgr.Status.ForComponent(status.StaticController),
3745
}
3846

39-
// force reconcile at startup
40-
go r.InitReconcile(ctx)
41-
42-
return ctrl.NewControllerManagedBy(mgr).
47+
// Return initReconcile as a post-create hook
48+
return r.initReconcile, ctrl.NewControllerManagedBy(mgr).
4349
For(&flowslatest.FlowCollector{}, reconcilers.IgnoreStatusChange).
4450
Named("staticPlugin").
4551
Complete(&r)
4652
}
4753

48-
func (r *Reconciler) InitReconcile(ctx context.Context) {
49-
log := log.FromContext(ctx)
50-
log.Info("Initializing resources...")
51-
52-
for attempt := range initReconcileAttempts {
53-
// delay the reconcile calls to let some time to the cache to load
54-
time.Sleep(5 * time.Second)
55-
_, err := r.Reconcile(ctx, ctrl.Request{})
56-
if err != nil {
57-
log.Error(err, "Error while doing initial reconcile", "attempt", attempt)
58-
} else {
59-
return
54+
func (r *Reconciler) initReconcile(ctx context.Context) error {
55+
attempt := 0
56+
err := retry.OnError(retryBackoff, func(error) bool { return true }, func() error {
57+
attempt++
58+
if _, err := r.Reconcile(ctx, ctrl.Request{}); err != nil {
59+
clog.WithValues("attempt", attempt, "error", err).Info("Initial reconcile: attempt failed")
60+
return err
6061
}
62+
return nil
63+
})
64+
if err != nil {
65+
return fmt.Errorf("failed initial reconcile, all attempts failed: %w", err)
6166
}
67+
return nil
6268
}
6369

6470
// Reconcile is the controller entry point for reconciling current state with desired state.
6571
// It manages the controller status at a high level. Business logic is delegated into `reconcile`.
6672
func (r *Reconciler) Reconcile(ctx context.Context, _ ctrl.Request) (ctrl.Result, error) {
67-
l := log.Log.WithName("staticPlugin") // clear context (too noisy)
68-
ctx = log.IntoContext(ctx, l)
73+
ctx = log.IntoContext(ctx, clog)
6974

7075
r.status.SetUnknown()
7176
defer r.status.Commit(ctx, r.Client)
7277

73-
// always reconcile static console plugin
74-
scp, err := helper.NewControllerClientHelper(ctx, r.mgr.Config.Namespace, r.Client)
75-
if err != nil {
76-
return ctrl.Result{}, fmt.Errorf("failed to get controller deployment: %w", err)
77-
}
78-
staticPluginReconciler := consoleplugin.NewStaticReconciler(r.newDefaultReconcilerInstance(scp))
79-
if err := staticPluginReconciler.ReconcileStaticPlugin(ctx, true); err != nil {
80-
l.Error(err, "Static plugin reconcile failure")
81-
// Set status failure unless it was already set
82-
if !r.status.HasFailure() {
83-
r.status.SetFailure("StaticPluginError", err.Error())
78+
if r.mgr.ClusterInfo.HasConsolePlugin() {
79+
if supported, _, err := r.mgr.ClusterInfo.IsOpenShiftVersionAtLeast("4.15.0"); err != nil {
80+
return ctrl.Result{}, err
81+
} else if !supported {
82+
clog.Info("Skipping static plugin reconciler (no console detected)")
83+
} else {
84+
scp, err := helper.NewControllerClientHelper(ctx, r.mgr.Config.Namespace, r.Client)
85+
if err != nil {
86+
return ctrl.Result{}, fmt.Errorf("failed to get controller deployment: %w", err)
87+
}
88+
staticPluginReconciler := consoleplugin.NewStaticReconciler(r.newDefaultReconcilerInstance(scp))
89+
if err := staticPluginReconciler.ReconcileStaticPlugin(ctx, true); err != nil {
90+
clog.Error(err, "Static plugin reconcile failure")
91+
// Set status failure unless it was already set
92+
if !r.status.HasFailure() {
93+
r.status.SetFailure("StaticPluginError", err.Error())
94+
}
95+
return ctrl.Result{}, err
96+
}
8497
}
85-
return ctrl.Result{}, err
8698
}
8799

88100
r.status.SetReady()

0 commit comments

Comments
 (0)