Skip to content

Commit c674511

Browse files
committed
Fix general watchdog usage in unit test, and 1 specific wrong test
- Do not test watchdog feeding (is in new watchdog unit tests now) - Test watchdog status instead - Reset watchdog before each test! Otherwise we get false positives, because earlier tests triggered the watchdog already! - Fix "Unhealthy node without api-server access" test: the watchdog should NOT be triggered! Signed-off-by: Marc Sluiter <[email protected]>
1 parent 0c83839 commit c674511

File tree

5 files changed

+78
-47
lines changed

5 files changed

+78
-47
lines changed

controllers/tests/controller/selfnoderemediation_controller_test.go

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/medik8s/self-node-remediation/controllers"
2626
"github.com/medik8s/self-node-remediation/controllers/tests/shared"
2727
"github.com/medik8s/self-node-remediation/pkg/utils"
28+
"github.com/medik8s/self-node-remediation/pkg/watchdog"
2829
)
2930

3031
const (
@@ -44,6 +45,9 @@ var _ = Describe("SNR Controller", func() {
4445
snr.Namespace = snrNamespace
4546
snrConfig = shared.GenerateTestConfig()
4647
time.Sleep(time.Second * 2)
48+
49+
// reset watchdog for each test!
50+
dummyDog.Reset()
4751
})
4852

4953
JustBeforeEach(func() {
@@ -183,7 +187,7 @@ var _ = Describe("SNR Controller", func() {
183187

184188
verifyTimeHasBeenRebootedExists(snr)
185189

186-
verifyNoWatchdogFood()
190+
verifyWatchdogTriggered()
187191

188192
verifyEvent("Normal", "NodeReboot", "Remediation process - about to attempt fencing the unhealthy node by rebooting it")
189193

@@ -233,7 +237,7 @@ var _ = Describe("SNR Controller", func() {
233237

234238
verifyTimeHasBeenRebootedExists(snr)
235239

236-
verifyNoWatchdogFood()
240+
verifyWatchdogTriggered()
237241

238242
// The kube-api calls for VA fail intentionally. In this case, we expect the snr agent to try
239243
// to delete node resources again. So LastError is set to the error every time Reconcile()
@@ -315,7 +319,7 @@ var _ = Describe("SNR Controller", func() {
315319

316320
verifyTimeHasBeenRebootedExists(snr)
317321

318-
verifyNoWatchdogFood()
322+
verifyWatchdogTriggered()
319323

320324
verifyFinalizerExists(snr)
321325

@@ -436,30 +440,16 @@ var _ = Describe("SNR Controller", func() {
436440
})
437441

438442
Context("Unhealthy node without api-server access", func() {
439-
440-
// this is not a controller test anymore... it's testing peers. But keep it here for now...
441-
442443
BeforeEach(func() {
443444
By("Simulate api-server failure")
444445
k8sClient.ShouldSimulateFailure = true
445446
remediationStrategy = v1alpha1.ResourceDeletionRemediationStrategy
446447
})
447448

448-
It("Verify that watchdog is not receiving food after some time", func() {
449-
lastFoodTime := dummyDog.LastFoodTime()
450-
timeout := dummyDog.GetTimeout()
451-
Eventually(func() bool {
452-
newTime := dummyDog.LastFoodTime()
453-
// ensure the timeout passed
454-
timeoutPassed := time.Now().After(lastFoodTime.Add(3 * timeout))
455-
// ensure wd wasn't feeded
456-
missedFeed := newTime.Before(lastFoodTime.Add(timeout))
457-
if timeoutPassed && missedFeed {
458-
return true
459-
}
460-
lastFoodTime = newTime
461-
return false
462-
}, 10*shared.PeerUpdateInterval, timeout).Should(BeTrue())
449+
Context("no peer found", func() {
450+
It("Verify that watchdog is not triggered", func() {
451+
verifyWatchdogNotTriggered()
452+
})
463453
})
464454
})
465455

@@ -532,12 +522,16 @@ func verifyNodeIsSchedulable() {
532522
}, 95*time.Second, 250*time.Millisecond).Should(BeFalse())
533523
}
534524

535-
func verifyNoWatchdogFood() {
536-
By("Verify that watchdog is not receiving food")
537-
currentLastFoodTime := dummyDog.LastFoodTime()
538-
ConsistentlyWithOffset(1, func() time.Time {
539-
return dummyDog.LastFoodTime()
540-
}, 5*dummyDog.GetTimeout(), 1*time.Second).Should(Equal(currentLastFoodTime), "watchdog should not receive food anymore")
525+
func verifyWatchdogTriggered() {
526+
EventuallyWithOffset(1, func(g Gomega) {
527+
g.Expect(dummyDog.Status()).To(Equal(watchdog.Triggered))
528+
}, 5*dummyDog.GetTimeout(), 1*time.Second).Should(Succeed(), "watchdog should be triggered")
529+
}
530+
531+
func verifyWatchdogNotTriggered() {
532+
ConsistentlyWithOffset(1, func(g Gomega) {
533+
g.Expect(dummyDog.Status()).To(Equal(watchdog.Armed))
534+
}, 5*dummyDog.GetTimeout(), 1*time.Second).Should(Succeed(), "watchdog should not be triggered")
541535
}
542536

543537
func verifyFinalizerExists(snr *v1alpha1.SelfNodeRemediation) {

controllers/tests/controller/suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ import (
5454

5555
var (
5656
testEnv *envtest.Environment
57-
dummyDog watchdog.Watchdog
57+
dummyDog watchdog.FakeWatchdog
5858
unhealthyNode, peerNode = &v1.Node{}, &v1.Node{}
5959
cancelFunc context.CancelFunc
6060
k8sClient *shared.K8sClientWrapper

pkg/watchdog/fake.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,22 @@ const (
1111
fakeTimeout = 1 * time.Second
1212
)
1313

14+
type FakeWatchdog interface {
15+
Watchdog
16+
Reset()
17+
}
18+
1419
// fakeWatchdogImpl provides the fake implementation of the watchdogImpl interface for tests
1520
type fakeWatchdogImpl struct {
1621
IsStartSuccessful bool
22+
*synchronizedWatchdog
1723
}
1824

19-
func NewFake(isStartSuccessful bool) Watchdog {
25+
func NewFake(isStartSuccessful bool) FakeWatchdog {
2026
fakeWDImpl := &fakeWatchdogImpl{IsStartSuccessful: isStartSuccessful}
21-
return newSynced(ctrl.Log.WithName("fake watchdog"), fakeWDImpl)
27+
syncedWD := newSynced(ctrl.Log.WithName("fake watchdog"), fakeWDImpl)
28+
fakeWDImpl.synchronizedWatchdog = syncedWD
29+
return fakeWDImpl
2230
}
2331

2432
func (f *fakeWatchdogImpl) start() (*time.Duration, error) {
@@ -36,3 +44,14 @@ func (f *fakeWatchdogImpl) feed() error {
3644
func (f *fakeWatchdogImpl) disarm() error {
3745
return nil
3846
}
47+
48+
func (f *fakeWatchdogImpl) Reset() {
49+
swd := f.synchronizedWatchdog
50+
swd.mutex.Lock()
51+
defer swd.mutex.Unlock()
52+
if swd.status != Armed {
53+
swd.stop()
54+
swd.status = Armed
55+
swd.startFeeding()
56+
}
57+
}

pkg/watchdog/synchronized.go

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,7 @@ func (swd *synchronizedWatchdog) Start(ctx context.Context) error {
6565
swd.log.Info("watchdog started")
6666
swd.mutex.Unlock()
6767

68-
feedCtx, cancel := context.WithCancel(context.Background())
69-
swd.stop = cancel
70-
// feed until stopped
71-
go wait.NonSlidingUntilWithContext(feedCtx, func(feedCtx context.Context) {
72-
swd.mutex.Lock()
73-
defer swd.mutex.Unlock()
74-
// prevent feeding of a disarmed watchdog in case the context isn't cancelled yet
75-
if swd.status != Armed {
76-
return
77-
}
78-
if err := swd.impl.feed(); err != nil {
79-
swd.log.Error(err, "failed to feed watchdog!")
80-
} else {
81-
swd.lastFoodTime = time.Now()
82-
}
83-
}, swd.timeout/3)
68+
swd.startFeeding()
8469

8570
<-ctx.Done()
8671

@@ -100,6 +85,25 @@ func (swd *synchronizedWatchdog) Start(ctx context.Context) error {
10085
return nil
10186
}
10287

88+
func (swd *synchronizedWatchdog) startFeeding() {
89+
feedCtx, cancel := context.WithCancel(context.Background())
90+
swd.stop = cancel
91+
// feed until stopped
92+
go wait.NonSlidingUntilWithContext(feedCtx, func(feedCtx context.Context) {
93+
swd.mutex.Lock()
94+
defer swd.mutex.Unlock()
95+
// prevent feeding of a disarmed watchdog in case the context isn't cancelled yet
96+
if swd.status != Armed {
97+
return
98+
}
99+
if err := swd.impl.feed(); err != nil {
100+
swd.log.Error(err, "failed to feed watchdog!")
101+
} else {
102+
swd.lastFoodTime = time.Now()
103+
}
104+
}, swd.timeout/3)
105+
}
106+
103107
func (swd *synchronizedWatchdog) Stop() {
104108
swd.mutex.Lock()
105109
defer swd.mutex.Unlock()

pkg/watchdog/watchdog_test.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212

1313
var _ = Describe("Watchdog", func() {
1414

15-
var wd watchdog.Watchdog
15+
var wd watchdog.FakeWatchdog
1616
var cancel context.CancelFunc
1717

1818
BeforeEach(func() {
@@ -65,6 +65,20 @@ var _ = Describe("Watchdog", func() {
6565
verifyNoWatchdogFood(wd)
6666
})
6767
})
68+
69+
Context("Triggered watchdog reset", func() {
70+
BeforeEach(func() {
71+
wd.Stop()
72+
wd.Reset()
73+
})
74+
75+
It("should be armed and fed", func() {
76+
Eventually(func(g Gomega) {
77+
g.Expect(wd.Status()).To(Equal(watchdog.Armed))
78+
}, 1*time.Second, 100*time.Millisecond).Should(Succeed(), "watchdog should be armed")
79+
verifyWatchdogFood(wd)
80+
})
81+
})
6882
})
6983

7084
func verifyWatchdogFood(wd watchdog.Watchdog) {

0 commit comments

Comments
 (0)