Skip to content

Commit b65313a

Browse files
yevgeny-shnaidmank8s-ci-robot
authored andcommitted
Fixing Module CR deletion during node reboot
In case node it rebooted(for any reason) and the Module CR is being deleted at the same period of time, the following sequence happens: 1) node becomes NotReady 2) module-nmc controller removes Spec from NMC 3) nmc controller does not schedule any unloader pod, since the node is not ready 4) node becomes ready 5) nmc controller does not schedule unloader pod, since after node reboot it ignores scheduling unloader pod, since the kernel module is not loaded anyway 6) no unloader pod running, meaning Status in the NMC is not deleted, meaning Module CR cannot be finalized, since Status in NMC is present Solution: When nmc controller processes Modules without Spec, and the node has been rebooted, the nmc controller will delete the Status from NMC.
1 parent efdbe15 commit b65313a

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

internal/controllers/nmc_reconciler.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,8 +408,10 @@ func (h *nmcReconcilerHelperImpl) ProcessUnconfiguredModuleStatus(
408408
it also fixes the scenario when node's kernel was upgraded, so unload pod will fail anyway
409409
*/
410410
if h.nodeAPI.NodeBecomeReadyAfter(node, status.LastTransitionTime) {
411-
logger.Info("node was rebooted, no need to unload kernel module that is not present in kernel, will wait until NMC spec is updated")
412-
return nil
411+
logger.Info("node was rebooted and spec is missing: delete the status to allow Module CR unload, if needed")
412+
patchFrom := client.MergeFrom(nmcObj.DeepCopy())
413+
nmc.RemoveModuleStatus(&nmcObj.Status.Modules, status.Namespace, status.Name)
414+
return h.client.Status().Patch(ctx, nmcObj, patchFrom)
413415
}
414416

415417
pod, err := h.pm.GetWorkerPod(ctx, podName, status.Namespace)

internal/controllers/nmc_reconciler_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,8 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessUnconfiguredModuleStatus", func
793793
podName = workerPodName(nmcName, name)
794794

795795
client *testclient.MockClient
796+
sw *testclient.MockStatusWriter
797+
796798
pm *MockpodManager
797799
nm *node.MockNode
798800
helper nmcReconcilerHelper
@@ -801,6 +803,7 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessUnconfiguredModuleStatus", func
801803
BeforeEach(func() {
802804
ctrl := gomock.NewController(GinkgoT())
803805
client = testclient.NewMockClient(ctrl)
806+
sw = testclient.NewMockStatusWriter(ctrl)
804807
pm = NewMockpodManager(ctrl)
805808
nm = node.NewMockNode(ctrl)
806809
helper = newNMCReconcilerHelper(client, pm, nil, nm)
@@ -820,7 +823,11 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessUnconfiguredModuleStatus", func
820823
node := v1.Node{}
821824

822825
It("should do nothing , if the node has been rebooted/ready lately", func() {
823-
nm.EXPECT().NodeBecomeReadyAfter(&node, status.LastTransitionTime).Return(true)
826+
gomock.InOrder(
827+
nm.EXPECT().NodeBecomeReadyAfter(&node, status.LastTransitionTime).Return(true),
828+
client.EXPECT().Status().Return(sw),
829+
sw.EXPECT().Patch(ctx, nmc, gomock.Any()),
830+
)
824831

825832
Expect(
826833
helper.ProcessUnconfiguredModuleStatus(ctx, nmc, status, &node),

0 commit comments

Comments
 (0)