Skip to content

Commit ad9f130

Browse files
committed
gracefully shutdown reconcilers and catalogd FBC
Signed-off-by: Joe Lanford <[email protected]>
1 parent 9b08aea commit ad9f130

File tree

8 files changed

+233
-9
lines changed

8 files changed

+233
-9
lines changed

.golangci.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ linters-settings:
6666
alias: ctrl
6767
- pkg: github.com/blang/semver/v4
6868
alias: bsemver
69+
- pkg: "^github.com/operator-framework/operator-controller/internal/util/([^/]+)$"
70+
alias: "${1}util"
6971

7072
output:
7173
formats:

catalogd/config/base/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ spec:
8585
imagePullPolicy: IfNotPresent
8686
terminationMessagePolicy: FallbackToLogsOnError
8787
serviceAccountName: controller-manager
88-
terminationGracePeriodSeconds: 10
88+
terminationGracePeriodSeconds: 60
8989
volumes:
9090
- name: cache
9191
emptyDir: {}

catalogd/internal/controllers/core/clustercatalog_controller.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
catalogdv1 "github.com/operator-framework/operator-controller/catalogd/api/v1"
4141
"github.com/operator-framework/operator-controller/catalogd/internal/source"
4242
"github.com/operator-framework/operator-controller/catalogd/internal/storage"
43+
contextutil "github.com/operator-framework/operator-controller/internal/util/context"
4344
)
4445

4546
const (
@@ -91,8 +92,11 @@ func (r *ClusterCatalogReconciler) Reconcile(ctx context.Context, req ctrl.Reque
9192
return ctrl.Result{}, client.IgnoreNotFound(err)
9293
}
9394

95+
delayedCtx, delayedCancel := contextutil.WithDelay(ctx, time.Minute)
96+
defer delayedCancel()
97+
9498
reconciledCatsrc := existingCatsrc.DeepCopy()
95-
res, reconcileErr := r.reconcile(ctx, reconciledCatsrc)
99+
res, reconcileErr := r.reconcile(delayedCtx, reconciledCatsrc)
96100

97101
// If we encounter an error, we should delete the stored catalog metadata
98102
// which represents the state of a successfully unpacked catalog. Deleting
@@ -118,15 +122,15 @@ func (r *ClusterCatalogReconciler) Reconcile(ctx context.Context, req ctrl.Reque
118122
finalizers := reconciledCatsrc.Finalizers
119123

120124
if updateStatus {
121-
if err := r.Client.Status().Update(ctx, reconciledCatsrc); err != nil {
125+
if err := r.Client.Status().Update(delayedCtx, reconciledCatsrc); err != nil {
122126
reconcileErr = errors.Join(reconcileErr, fmt.Errorf("error updating status: %v", err))
123127
}
124128
}
125129

126130
reconciledCatsrc.Finalizers = finalizers
127131

128132
if updateFinalizers {
129-
if err := r.Client.Update(ctx, reconciledCatsrc); err != nil {
133+
if err := r.Client.Update(delayedCtx, reconciledCatsrc); err != nil {
130134
reconcileErr = errors.Join(reconcileErr, fmt.Errorf("error updating finalizers: %v", err))
131135
}
132136
}

catalogd/internal/serverutil/serverutil.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func AddCatalogServerToManager(mgr ctrl.Manager, cfg CatalogServerConfig, tlsFil
4141
listener = tls.NewListener(listener, config)
4242
}
4343

44-
shutdownTimeout := 30 * time.Second
44+
shutdownTimeout := 60 * time.Second
4545

4646
l := mgr.GetLogger().WithName("catalogd-http-server")
4747
handler := catalogdmetrics.AddMetricsToHandler(cfg.LocalStorage.StorageServerHandler())

config/base/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ spec:
8585
memory: 64Mi
8686
terminationMessagePolicy: FallbackToLogsOnError
8787
serviceAccountName: operator-controller-controller-manager
88-
terminationGracePeriodSeconds: 10
88+
terminationGracePeriodSeconds: 60
8989
volumes:
9090
- name: cache
9191
emptyDir: {}

internal/controllers/clusterextension_controller.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ import (
5656
"github.com/operator-framework/operator-controller/internal/labels"
5757
"github.com/operator-framework/operator-controller/internal/resolve"
5858
rukpaksource "github.com/operator-framework/operator-controller/internal/rukpak/source"
59+
contextutil "github.com/operator-framework/operator-controller/internal/util/context"
5960
)
6061

6162
const (
@@ -110,8 +111,11 @@ func (r *ClusterExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req
110111
return ctrl.Result{}, client.IgnoreNotFound(err)
111112
}
112113

114+
delayedCtx, delayedCancel := contextutil.WithDelay(ctx, time.Minute)
115+
defer delayedCancel()
116+
113117
reconciledExt := existingExt.DeepCopy()
114-
res, reconcileErr := r.reconcile(ctx, reconciledExt)
118+
res, reconcileErr := r.reconcile(delayedCtx, reconciledExt)
115119

116120
// Do checks before any Update()s, as Update() may modify the resource structure!
117121
updateStatus := !equality.Semantic.DeepEqual(existingExt.Status, reconciledExt.Status)
@@ -129,14 +133,14 @@ func (r *ClusterExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req
129133
// reconciledExt before updating the object.
130134
finalizers := reconciledExt.Finalizers
131135
if updateStatus {
132-
if err := r.Client.Status().Update(ctx, reconciledExt); err != nil {
136+
if err := r.Client.Status().Update(delayedCtx, reconciledExt); err != nil {
133137
reconcileErr = errors.Join(reconcileErr, fmt.Errorf("error updating status: %v", err))
134138
}
135139
}
136140
reconciledExt.Finalizers = finalizers
137141

138142
if updateFinalizers {
139-
if err := r.Client.Update(ctx, reconciledExt); err != nil {
143+
if err := r.Client.Update(delayedCtx, reconciledExt); err != nil {
140144
reconcileErr = errors.Join(reconcileErr, fmt.Errorf("error updating finalizers: %v", err))
141145
}
142146
}

internal/util/context/context.go

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package context
2+
3+
import (
4+
"context"
5+
"time"
6+
)
7+
8+
func (d *delayContext) Deadline() (time.Time, bool) {
9+
select {
10+
case <-d.parentCtx.Done():
11+
// if the parent context is done, wait
12+
// for our timeout setup to complete, then
13+
// return the timeout context's deadline.
14+
<-d.setupDone
15+
return d.timeoutCtx.Deadline()
16+
default:
17+
// if the parent context has a deadline, simply add
18+
// our delay.
19+
if parentDeadline, ok := d.parentCtx.Deadline(); ok {
20+
return parentDeadline.Add(d.delay), true
21+
}
22+
// if the parent context does not have a deadline
23+
// then we don't know ours either because it depends
24+
// on when the parent is done.
25+
return time.Time{}, false
26+
}
27+
}
28+
29+
func (d *delayContext) Done() <-chan struct{} {
30+
return d.done
31+
}
32+
33+
func (d *delayContext) Err() error {
34+
// If the parent context is done, wait until setup
35+
// is done, then return the timeout context's error.
36+
select {
37+
case <-d.parentCtx.Done():
38+
<-d.setupDone
39+
return d.timeoutCtx.Err()
40+
default:
41+
}
42+
43+
// If done is closed, that means we were
44+
// directly cancelled. Otherwise (if neither
45+
// parent context is done or done is closed)
46+
// the context is still active, hence no error
47+
select {
48+
case <-d.done:
49+
return context.Canceled
50+
default:
51+
return nil
52+
}
53+
}
54+
55+
func (d *delayContext) Value(key interface{}) interface{} {
56+
return d.parentCtx.Value(key)
57+
}
58+
59+
type delayContext struct {
60+
parentCtx context.Context
61+
delay time.Duration
62+
63+
done chan struct{}
64+
setupDone chan struct{}
65+
66+
timeoutCtx context.Context
67+
timeoutCancel context.CancelFunc
68+
}
69+
70+
func WithDelay(parentCtx context.Context, delay time.Duration) (context.Context, context.CancelFunc) {
71+
delayedCtx := &delayContext{
72+
parentCtx: parentCtx,
73+
delay: delay,
74+
done: make(chan struct{}),
75+
setupDone: make(chan struct{}),
76+
}
77+
78+
setupDelay := func() {
79+
timeoutCtx, timeoutCancel := context.WithTimeout(context.Background(), delay)
80+
context.AfterFunc(timeoutCtx, func() { close(delayedCtx.done) })
81+
delayedCtx.timeoutCtx = timeoutCtx
82+
delayedCtx.timeoutCancel = timeoutCancel
83+
close(delayedCtx.setupDone)
84+
}
85+
86+
unregisterDelay := context.AfterFunc(parentCtx, setupDelay)
87+
88+
cancelFunc := func() {
89+
setupNeverHappened := unregisterDelay()
90+
if setupNeverHappened {
91+
// if setup never happened, then the delay context was
92+
// cancelled prior to the parent context being done.
93+
//
94+
// all we need to do here is close the done chan.
95+
close(delayedCtx.done)
96+
} else {
97+
// if we're here, the setup function was called
98+
99+
// wait until setup is done to ensure there is a
100+
// timeoutContext/timeoutCancel
101+
<-delayedCtx.setupDone
102+
103+
// cancel the timeout context (which includes
104+
// an AfterFunc to also close our doneChan, so
105+
// we'll wait for that to be closed before
106+
// returning)
107+
delayedCtx.timeoutCancel()
108+
<-delayedCtx.done
109+
}
110+
}
111+
112+
return delayedCtx, cancelFunc
113+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package context_test
2+
3+
import (
4+
"context"
5+
"testing"
6+
"time"
7+
8+
"github.com/stretchr/testify/assert"
9+
10+
contextutil "github.com/operator-framework/operator-controller/internal/util/context"
11+
)
12+
13+
func TestWithDelay_Delays(t *testing.T) {
14+
for _, delay := range []time.Duration{
15+
0,
16+
time.Millisecond * 10,
17+
time.Millisecond * 100,
18+
time.Millisecond * 200,
19+
} {
20+
t.Run(delay.String(), func(t *testing.T) {
21+
parentCtx, parentCancel := context.WithCancel(context.Background())
22+
delayCtx, _ := contextutil.WithDelay(parentCtx, delay)
23+
24+
parentCancel()
25+
26+
// verify deadline is within 1m ms of what we expect
27+
expectDeadline := time.Now().Add(delay)
28+
actualDeadline, ok := delayCtx.Deadline()
29+
assert.True(t, ok, "expected delay context to have a deadline after parent was cancelled")
30+
assert.WithinDurationf(t, expectDeadline, actualDeadline, time.Millisecond, "expected the context's deadline (%v) to be within 1 ms of %v; diff was %v", expectDeadline, actualDeadline, expectDeadline.Sub(actualDeadline))
31+
32+
// verify context is done due to deadline exceeded and that it happens
33+
// within 3ms of our expectation
34+
select {
35+
case <-delayCtx.Done():
36+
case <-time.After(time.Until(expectDeadline.Add(3 * time.Millisecond))):
37+
diff := time.Since(expectDeadline)
38+
t.Fatalf("delay context should have been canceled quickly after %s, but it took %s", delay, diff)
39+
}
40+
assert.ErrorIs(t, delayCtx.Err(), context.DeadlineExceeded)
41+
})
42+
}
43+
}
44+
45+
func TestWithDelay_Deadline(t *testing.T) {
46+
t.Run("parent has deadline", func(t *testing.T) {
47+
parentDeadline := time.Now().Add(200 * time.Millisecond)
48+
parentCtx, parentCancel := context.WithDeadline(context.Background(), parentDeadline)
49+
defer parentCancel()
50+
51+
delay := 250 * time.Millisecond
52+
delayCtx, _ := contextutil.WithDelay(parentCtx, delay)
53+
54+
expectDeadline := parentDeadline.Add(delay)
55+
actualDeadline, ok := delayCtx.Deadline()
56+
57+
assert.True(t, ok, "expected delay context to have a deadline before parent was cancelled")
58+
assert.Equal(t, expectDeadline, actualDeadline)
59+
})
60+
t.Run("parent has no deadline", func(t *testing.T) {
61+
parentCtx, parentCancel := context.WithCancel(context.Background())
62+
defer parentCancel()
63+
64+
delayCtx, _ := contextutil.WithDelay(parentCtx, 200*time.Millisecond)
65+
actualDeadline, ok := delayCtx.Deadline()
66+
assert.False(t, ok, "expected delay context to have an unknown deadline before parent was cancelled")
67+
assert.Equal(t, time.Time{}, actualDeadline, "expected delay context deadline to be unset")
68+
})
69+
}
70+
71+
func TestWithDelay_Err(t *testing.T) {
72+
t.Run("nil", func(t *testing.T) {
73+
delayCtx, _ := contextutil.WithDelay(context.Background(), 0)
74+
assert.NoError(t, delayCtx.Err())
75+
})
76+
t.Run("canceled before parent done", func(t *testing.T) {
77+
delayCtx, delayCancel := contextutil.WithDelay(context.Background(), 0)
78+
delayCancel()
79+
assert.ErrorIs(t, delayCtx.Err(), context.Canceled)
80+
})
81+
t.Run("canceled after parent done", func(t *testing.T) {
82+
parentCtx, parentCancel := context.WithCancel(context.Background())
83+
delayCtx, delayCancel := contextutil.WithDelay(parentCtx, 200*time.Millisecond)
84+
parentCancel()
85+
delayCancel()
86+
assert.ErrorIs(t, delayCtx.Err(), context.Canceled)
87+
})
88+
t.Run("deadline exceeded", func(t *testing.T) {
89+
parentCtx, parentCancel := context.WithCancel(context.Background())
90+
delayCtx, _ := contextutil.WithDelay(parentCtx, 0)
91+
parentCancel()
92+
assert.ErrorIs(t, delayCtx.Err(), context.DeadlineExceeded)
93+
})
94+
}
95+
96+
func TestWithDelay_Value(t *testing.T) {
97+
type valueKey string
98+
parentCtx := context.WithValue(context.Background(), valueKey("foo"), "bar")
99+
delayCtx, _ := contextutil.WithDelay(parentCtx, 0)
100+
assert.Equal(t, "bar", delayCtx.Value(valueKey("foo")))
101+
}

0 commit comments

Comments
 (0)