Skip to content

Commit 16052be

Browse files
Merge pull request #250 from slintes/fix-unit-test
Fix watchdog usage in controller unit tests
2 parents 8a2445e + c674511 commit 16052be

File tree

7 files changed

+189
-58
lines changed

7 files changed

+189
-58
lines changed

controllers/tests/controller/selfnoderemediation_controller_test.go

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/medik8s/self-node-remediation/controllers"
2626
"github.com/medik8s/self-node-remediation/controllers/tests/shared"
2727
"github.com/medik8s/self-node-remediation/pkg/utils"
28+
"github.com/medik8s/self-node-remediation/pkg/watchdog"
2829
)
2930

3031
const (
@@ -44,6 +45,9 @@ var _ = Describe("SNR Controller", func() {
4445
snr.Namespace = snrNamespace
4546
snrConfig = shared.GenerateTestConfig()
4647
time.Sleep(time.Second * 2)
48+
49+
// reset watchdog for each test!
50+
dummyDog.Reset()
4751
})
4852

4953
JustBeforeEach(func() {
@@ -183,7 +187,7 @@ var _ = Describe("SNR Controller", func() {
183187

184188
verifyTimeHasBeenRebootedExists(snr)
185189

186-
verifyNoWatchdogFood()
190+
verifyWatchdogTriggered()
187191

188192
verifyEvent("Normal", "NodeReboot", "Remediation process - about to attempt fencing the unhealthy node by rebooting it")
189193

@@ -233,7 +237,7 @@ var _ = Describe("SNR Controller", func() {
233237

234238
verifyTimeHasBeenRebootedExists(snr)
235239

236-
verifyNoWatchdogFood()
240+
verifyWatchdogTriggered()
237241

238242
// The kube-api calls for VA fail intentionally. In this case, we expect the snr agent to try
239243
// to delete node resources again. So LastError is set to the error every time Reconcile()
@@ -315,7 +319,7 @@ var _ = Describe("SNR Controller", func() {
315319

316320
verifyTimeHasBeenRebootedExists(snr)
317321

318-
verifyNoWatchdogFood()
322+
verifyWatchdogTriggered()
319323

320324
verifyFinalizerExists(snr)
321325

@@ -436,30 +440,16 @@ var _ = Describe("SNR Controller", func() {
436440
})
437441

438442
Context("Unhealthy node without api-server access", func() {
439-
440-
// this is not a controller test anymore... it's testing peers. But keep it here for now...
441-
442443
BeforeEach(func() {
443444
By("Simulate api-server failure")
444445
k8sClient.ShouldSimulateFailure = true
445446
remediationStrategy = v1alpha1.ResourceDeletionRemediationStrategy
446447
})
447448

448-
It("Verify that watchdog is not receiving food after some time", func() {
449-
lastFoodTime := dummyDog.LastFoodTime()
450-
timeout := dummyDog.GetTimeout()
451-
Eventually(func() bool {
452-
newTime := dummyDog.LastFoodTime()
453-
// ensure the timeout passed
454-
timeoutPassed := time.Now().After(lastFoodTime.Add(3 * timeout))
455-
// ensure wd wasn't feeded
456-
missedFeed := newTime.Before(lastFoodTime.Add(timeout))
457-
if timeoutPassed && missedFeed {
458-
return true
459-
}
460-
lastFoodTime = newTime
461-
return false
462-
}, 10*shared.PeerUpdateInterval, timeout).Should(BeTrue())
449+
Context("no peer found", func() {
450+
It("Verify that watchdog is not triggered", func() {
451+
verifyWatchdogNotTriggered()
452+
})
463453
})
464454
})
465455

@@ -532,12 +522,16 @@ func verifyNodeIsSchedulable() {
532522
}, 95*time.Second, 250*time.Millisecond).Should(BeFalse())
533523
}
534524

535-
func verifyNoWatchdogFood() {
536-
By("Verify that watchdog is not receiving food")
537-
currentLastFoodTime := dummyDog.LastFoodTime()
538-
ConsistentlyWithOffset(1, func() time.Time {
539-
return dummyDog.LastFoodTime()
540-
}, 5*dummyDog.GetTimeout(), 1*time.Second).Should(Equal(currentLastFoodTime), "watchdog should not receive food anymore")
525+
func verifyWatchdogTriggered() {
526+
EventuallyWithOffset(1, func(g Gomega) {
527+
g.Expect(dummyDog.Status()).To(Equal(watchdog.Triggered))
528+
}, 5*dummyDog.GetTimeout(), 1*time.Second).Should(Succeed(), "watchdog should be triggered")
529+
}
530+
531+
func verifyWatchdogNotTriggered() {
532+
ConsistentlyWithOffset(1, func(g Gomega) {
533+
g.Expect(dummyDog.Status()).To(Equal(watchdog.Armed))
534+
}, 5*dummyDog.GetTimeout(), 1*time.Second).Should(Succeed(), "watchdog should not be triggered")
541535
}
542536

543537
func verifyFinalizerExists(snr *v1alpha1.SelfNodeRemediation) {

controllers/tests/controller/suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ import (
5454

5555
var (
5656
testEnv *envtest.Environment
57-
dummyDog watchdog.Watchdog
57+
dummyDog watchdog.FakeWatchdog
5858
unhealthyNode, peerNode = &v1.Node{}, &v1.Node{}
5959
cancelFunc context.CancelFunc
6060
k8sClient *shared.K8sClientWrapper

pkg/peers/peers.go

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ type Role int8
2727
const (
2828
Worker Role = iota
2929
ControlPlane
30+
31+
// this is used instead of "peerUpdateInterval" when peer update fails
32+
quickPeerUpdateInterval = 2 * time.Minute
3033
)
3134

3235
type Peers struct {
@@ -77,22 +80,24 @@ func (p *Peers) Start(ctx context.Context) error {
7780
p.controlPlanePeerSelector = createSelector(hostname, getControlPlaneLabel(myNode))
7881
}
7982

80-
var updatePeersError error
81-
cancellableCtx, cancel := context.WithCancel(ctx)
82-
8383
p.log.Info("peer starting", "name", p.myNodeName)
84-
wait.UntilWithContext(cancellableCtx, func(ctx context.Context) {
85-
updatePeersError = p.updateWorkerPeers(ctx)
86-
if updatePeersError != nil {
87-
cancel()
88-
}
89-
updatePeersError = p.updateControlPlanePeers(ctx)
90-
if updatePeersError != nil {
91-
cancel()
84+
wait.UntilWithContext(ctx, func(ctx context.Context) {
85+
updateWorkerPeersError := p.updateWorkerPeers(ctx)
86+
updateControlPlanePeersError := p.updateControlPlanePeers(ctx)
87+
if updateWorkerPeersError != nil || updateControlPlanePeersError != nil {
88+
// the default update interval is quite long, in case of an error we want to retry quicker
89+
quickCtx, quickCancel := context.WithCancel(ctx)
90+
wait.UntilWithContext(quickCtx, func(ctx context.Context) {
91+
quickUpdateWorkerPeersError := p.updateWorkerPeers(ctx)
92+
quickUpdateControlPlanePeersError := p.updateControlPlanePeers(ctx)
93+
if quickUpdateWorkerPeersError == nil && quickUpdateControlPlanePeersError == nil {
94+
quickCancel()
95+
}
96+
}, quickPeerUpdateInterval)
9297
}
9398
}, p.peerUpdateInterval)
9499

95-
return updatePeersError
100+
return nil
96101
}
97102

98103
func (p *Peers) updateWorkerPeers(ctx context.Context) error {

pkg/watchdog/fake.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,22 @@ const (
1111
fakeTimeout = 1 * time.Second
1212
)
1313

14+
type FakeWatchdog interface {
15+
Watchdog
16+
Reset()
17+
}
18+
1419
// fakeWatchdogImpl provides the fake implementation of the watchdogImpl interface for tests
1520
type fakeWatchdogImpl struct {
1621
IsStartSuccessful bool
22+
*synchronizedWatchdog
1723
}
1824

19-
func NewFake(isStartSuccessful bool) Watchdog {
25+
func NewFake(isStartSuccessful bool) FakeWatchdog {
2026
fakeWDImpl := &fakeWatchdogImpl{IsStartSuccessful: isStartSuccessful}
21-
return newSynced(ctrl.Log.WithName("fake watchdog"), fakeWDImpl)
27+
syncedWD := newSynced(ctrl.Log.WithName("fake watchdog"), fakeWDImpl)
28+
fakeWDImpl.synchronizedWatchdog = syncedWD
29+
return fakeWDImpl
2230
}
2331

2432
func (f *fakeWatchdogImpl) start() (*time.Duration, error) {
@@ -36,3 +44,14 @@ func (f *fakeWatchdogImpl) feed() error {
3644
func (f *fakeWatchdogImpl) disarm() error {
3745
return nil
3846
}
47+
48+
func (f *fakeWatchdogImpl) Reset() {
49+
swd := f.synchronizedWatchdog
50+
swd.mutex.Lock()
51+
defer swd.mutex.Unlock()
52+
if swd.status != Armed {
53+
swd.stop()
54+
swd.status = Armed
55+
swd.startFeeding()
56+
}
57+
}

pkg/watchdog/synchronized.go

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,7 @@ func (swd *synchronizedWatchdog) Start(ctx context.Context) error {
6565
swd.log.Info("watchdog started")
6666
swd.mutex.Unlock()
6767

68-
feedCtx, cancel := context.WithCancel(context.Background())
69-
swd.stop = cancel
70-
// feed until stopped
71-
go wait.NonSlidingUntilWithContext(feedCtx, func(feedCtx context.Context) {
72-
swd.mutex.Lock()
73-
defer swd.mutex.Unlock()
74-
// prevent feeding of a disarmed watchdog in case the context isn't cancelled yet
75-
if swd.status != Armed {
76-
return
77-
}
78-
if err := swd.impl.feed(); err != nil {
79-
swd.log.Error(err, "failed to feed watchdog!")
80-
} else {
81-
swd.lastFoodTime = time.Now()
82-
}
83-
}, swd.timeout/3)
68+
swd.startFeeding()
8469

8570
<-ctx.Done()
8671

@@ -100,6 +85,25 @@ func (swd *synchronizedWatchdog) Start(ctx context.Context) error {
10085
return nil
10186
}
10287

88+
func (swd *synchronizedWatchdog) startFeeding() {
89+
feedCtx, cancel := context.WithCancel(context.Background())
90+
swd.stop = cancel
91+
// feed until stopped
92+
go wait.NonSlidingUntilWithContext(feedCtx, func(feedCtx context.Context) {
93+
swd.mutex.Lock()
94+
defer swd.mutex.Unlock()
95+
// prevent feeding of a disarmed watchdog in case the context isn't cancelled yet
96+
if swd.status != Armed {
97+
return
98+
}
99+
if err := swd.impl.feed(); err != nil {
100+
swd.log.Error(err, "failed to feed watchdog!")
101+
} else {
102+
swd.lastFoodTime = time.Now()
103+
}
104+
}, swd.timeout/3)
105+
}
106+
103107
func (swd *synchronizedWatchdog) Stop() {
104108
swd.mutex.Lock()
105109
defer swd.mutex.Unlock()
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package watchdog_test
2+
3+
import (
4+
"testing"
5+
6+
. "github.com/onsi/ginkgo/v2"
7+
. "github.com/onsi/gomega"
8+
)
9+
10+
func TestWatchdog(t *testing.T) {
11+
RegisterFailHandler(Fail)
12+
RunSpecs(t, "Watchdog Suite")
13+
}

pkg/watchdog/watchdog_test.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package watchdog_test
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
. "github.com/onsi/ginkgo/v2"
8+
. "github.com/onsi/gomega"
9+
10+
"github.com/medik8s/self-node-remediation/pkg/watchdog"
11+
)
12+
13+
var _ = Describe("Watchdog", func() {
14+
15+
var wd watchdog.FakeWatchdog
16+
var cancel context.CancelFunc
17+
18+
BeforeEach(func() {
19+
var ctx context.Context
20+
ctx, cancel = context.WithCancel(context.Background())
21+
22+
wd = watchdog.NewFake(true)
23+
Expect(wd).NotTo(BeNil())
24+
go func() {
25+
err := wd.Start(ctx)
26+
Expect(err).NotTo(HaveOccurred())
27+
}()
28+
Eventually(func(g Gomega) {
29+
g.Expect(wd.Status()).To(Equal(watchdog.Armed))
30+
}, 1*time.Second, 100*time.Millisecond).Should(Succeed(), "watchdog should be armed")
31+
})
32+
33+
AfterEach(func() {
34+
cancel()
35+
})
36+
37+
Context("Watchdog started", func() {
38+
It("should be fed", func() {
39+
verifyWatchdogFood(wd)
40+
})
41+
})
42+
43+
Context("Watchdog triggered", func() {
44+
BeforeEach(func() {
45+
wd.Stop()
46+
})
47+
48+
It("should be triggered and not be fed anymore", func() {
49+
Eventually(func(g Gomega) {
50+
g.Expect(wd.Status()).To(Equal(watchdog.Triggered))
51+
}, 1*time.Second, 100*time.Millisecond).Should(Succeed(), "watchdog should be triggered")
52+
verifyNoWatchdogFood(wd)
53+
})
54+
})
55+
56+
Context("Watchdog cancelled", func() {
57+
BeforeEach(func() {
58+
cancel()
59+
})
60+
61+
It("should be disarmed and and not be fed anymore", func() {
62+
Eventually(func(g Gomega) {
63+
g.Expect(wd.Status()).To(Equal(watchdog.Disarmed))
64+
}, 1*time.Second, 100*time.Millisecond).Should(Succeed(), "watchdog should be disarmed")
65+
verifyNoWatchdogFood(wd)
66+
})
67+
})
68+
69+
Context("Triggered watchdog reset", func() {
70+
BeforeEach(func() {
71+
wd.Stop()
72+
wd.Reset()
73+
})
74+
75+
It("should be armed and fed", func() {
76+
Eventually(func(g Gomega) {
77+
g.Expect(wd.Status()).To(Equal(watchdog.Armed))
78+
}, 1*time.Second, 100*time.Millisecond).Should(Succeed(), "watchdog should be armed")
79+
verifyWatchdogFood(wd)
80+
})
81+
})
82+
})
83+
84+
func verifyWatchdogFood(wd watchdog.Watchdog) {
85+
currentLastFoodTime := wd.LastFoodTime()
86+
EventuallyWithOffset(1, func() time.Time {
87+
return wd.LastFoodTime()
88+
}, wd.GetTimeout(), 100*time.Millisecond).Should(BeTemporally(">", currentLastFoodTime), "watchdog should receive food")
89+
}
90+
91+
func verifyNoWatchdogFood(wd watchdog.Watchdog) {
92+
currentLastFoodTime := wd.LastFoodTime()
93+
ConsistentlyWithOffset(1, func() time.Time {
94+
return wd.LastFoodTime()
95+
}, 5*wd.GetTimeout(), 1*time.Second).Should(Equal(currentLastFoodTime), "watchdog should not receive food anymore")
96+
}

0 commit comments

Comments
 (0)