Skip to content

Commit fb18eb2

Browse files
committed
2 parents eea2981 + 45e7389 commit fb18eb2

File tree

12 files changed

+190
-31
lines changed

12 files changed

+190
-31
lines changed

controllers/sriovnetworknodepolicy_controller.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
317317
return err
318318
}
319319
}
320+
320321
logger.V(1).Info("Remove SriovNetworkNodeState custom resource for unselected node")
321322
nsList := &sriovnetworkv1.SriovNetworkNodeStateList{}
322323
err := r.List(ctx, nsList, &client.ListOptions{})
@@ -335,10 +336,10 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
335336
}
336337
}
337338
if !found {
338-
// remove device plugin labels
339+
// remove device plugin labels if the node doesn't exist we continue to handle the stale nodeState
339340
logger.Info("removing device plugin label from node as SriovNetworkNodeState doesn't exist", "nodeStateName", ns.Name)
340341
err = utils.RemoveLabelFromNode(ctx, ns.Name, constants.SriovDevicePluginLabel, r.Client)
341-
if err != nil {
342+
if err != nil && !errors.IsNotFound(err) {
342343
logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name)
343344
return err
344345
}

controllers/sriovnetworknodepolicy_controller_test.go

Lines changed: 159 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,13 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() {
181181
})
182182
})
183183
AfterEach(func() {
184-
err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{})
184+
err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{}, k8sclient.GracePeriodSeconds(0))
185185
Expect(err).ToNot(HaveOccurred())
186186

187-
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, k8sclient.InNamespace(vars.Namespace))
187+
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, k8sclient.InNamespace(vars.Namespace), k8sclient.GracePeriodSeconds(0))
188188
Expect(err).ToNot(HaveOccurred())
189189

190-
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, k8sclient.InNamespace(vars.Namespace))
190+
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, k8sclient.InNamespace(vars.Namespace), k8sclient.GracePeriodSeconds(0))
191191
Expect(err).ToNot(HaveOccurred())
192192
})
193193
Context("device plugin labels", func() {
@@ -263,6 +263,162 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() {
263263
Expect(errors.IsNotFound(err)).To(BeTrue())
264264
}, time.Minute, time.Second).Should(Succeed())
265265
})
266+
267+
It("should skip label removal for nodes that doesn't exist with no stale timer", func() {
268+
node0 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
269+
Name: "node0",
270+
Labels: map[string]string{"kubernetes.io/os": "linux",
271+
"node-role.kubernetes.io/worker": ""},
272+
}}
273+
Expect(k8sClient.Create(ctx, node0)).To(Succeed())
274+
275+
node1 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
276+
Name: "node1",
277+
Labels: map[string]string{"kubernetes.io/os": "linux",
278+
"node-role.kubernetes.io/worker": ""},
279+
}}
280+
Expect(k8sClient.Create(ctx, node1)).To(Succeed())
281+
282+
nodeState := &sriovnetworkv1.SriovNetworkNodeState{}
283+
node := &corev1.Node{}
284+
for _, nodeName := range []string{"node0", "node1"} {
285+
Eventually(func(g Gomega) {
286+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: nodeName, Namespace: testNamespace}, nodeState)
287+
g.Expect(err).ToNot(HaveOccurred())
288+
}, time.Minute, time.Second).Should(Succeed())
289+
290+
Eventually(func(g Gomega) {
291+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: nodeName}, node)
292+
g.Expect(err).ToNot(HaveOccurred())
293+
value, exist := node.Labels[consts.SriovDevicePluginLabel]
294+
g.Expect(exist).To(BeTrue())
295+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelDisabled))
296+
}, time.Minute, time.Second).Should(Succeed())
297+
298+
nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{
299+
sriovnetworkv1.InterfaceExt{
300+
Vendor: "8086",
301+
Driver: "i40e",
302+
Mtu: 1500,
303+
Name: "ens803f0",
304+
PciAddress: "0000:86:00.0",
305+
NumVfs: 0,
306+
TotalVfs: 64,
307+
},
308+
}
309+
err := k8sClient.Status().Update(context.Background(), nodeState)
310+
Expect(err).ToNot(HaveOccurred())
311+
}
312+
313+
err := k8sClient.Delete(context.Background(), node1, k8sclient.GracePeriodSeconds(0))
314+
Expect(err).ToNot(HaveOccurred())
315+
316+
Eventually(func(g Gomega) {
317+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node1", Namespace: testNamespace}, nodeState)
318+
g.Expect(err).To(HaveOccurred())
319+
g.Expect(errors.IsNotFound(err)).To(BeTrue())
320+
}, 30*time.Second, time.Second).Should(Succeed())
321+
322+
somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{}
323+
somePolicy.SetNamespace(testNamespace)
324+
somePolicy.SetName("some-policy")
325+
somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{
326+
NumVfs: 5,
327+
NodeSelector: map[string]string{"node-role.kubernetes.io/worker": ""},
328+
NicSelector: sriovnetworkv1.SriovNetworkNicSelector{Vendor: "8086"},
329+
Priority: 20,
330+
}
331+
Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred())
332+
333+
Eventually(func(g Gomega) {
334+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node0.Name}, node0)
335+
g.Expect(err).ToNot(HaveOccurred())
336+
value, exist := node0.Labels[consts.SriovDevicePluginLabel]
337+
g.Expect(exist).To(BeTrue())
338+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelEnabled))
339+
}, time.Minute, time.Second).Should(Succeed())
340+
})
341+
342+
It("should skip label removal for nodes that doesn't exist with stale timer", func() {
343+
err := os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "5")
344+
Expect(err).ToNot(HaveOccurred())
345+
defer func() {
346+
err = os.Unsetenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")
347+
Expect(err).ToNot(HaveOccurred())
348+
}()
349+
350+
node0 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
351+
Name: "node0",
352+
Labels: map[string]string{"kubernetes.io/os": "linux",
353+
"node-role.kubernetes.io/worker": ""},
354+
}}
355+
Expect(k8sClient.Create(ctx, node0)).To(Succeed())
356+
357+
node1 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
358+
Name: "node1",
359+
Labels: map[string]string{"kubernetes.io/os": "linux",
360+
"node-role.kubernetes.io/worker": ""},
361+
}}
362+
Expect(k8sClient.Create(ctx, node1)).To(Succeed())
363+
364+
nodeState := &sriovnetworkv1.SriovNetworkNodeState{}
365+
node := &corev1.Node{}
366+
for _, nodeName := range []string{"node0", "node1"} {
367+
Eventually(func(g Gomega) {
368+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: nodeName, Namespace: testNamespace}, nodeState)
369+
g.Expect(err).ToNot(HaveOccurred())
370+
}, time.Minute, time.Second).Should(Succeed())
371+
372+
Eventually(func(g Gomega) {
373+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: nodeName}, node)
374+
g.Expect(err).ToNot(HaveOccurred())
375+
value, exist := node.Labels[consts.SriovDevicePluginLabel]
376+
g.Expect(exist).To(BeTrue())
377+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelDisabled))
378+
}, time.Minute, time.Second).Should(Succeed())
379+
380+
nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{
381+
sriovnetworkv1.InterfaceExt{
382+
Vendor: "8086",
383+
Driver: "i40e",
384+
Mtu: 1500,
385+
Name: "ens803f0",
386+
PciAddress: "0000:86:00.0",
387+
NumVfs: 0,
388+
TotalVfs: 64,
389+
},
390+
}
391+
err := k8sClient.Status().Update(context.Background(), nodeState)
392+
Expect(err).ToNot(HaveOccurred())
393+
}
394+
395+
err = k8sClient.Delete(context.Background(), node1, k8sclient.GracePeriodSeconds(0))
396+
Expect(err).ToNot(HaveOccurred())
397+
398+
Consistently(func(g Gomega) {
399+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node1", Namespace: testNamespace}, nodeState)
400+
g.Expect(err).ToNot(HaveOccurred())
401+
}, 10*time.Second, time.Second).Should(Succeed())
402+
403+
somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{}
404+
somePolicy.SetNamespace(testNamespace)
405+
somePolicy.SetName("some-policy")
406+
somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{
407+
NumVfs: 5,
408+
NodeSelector: map[string]string{"node-role.kubernetes.io/worker": ""},
409+
NicSelector: sriovnetworkv1.SriovNetworkNicSelector{Vendor: "8086"},
410+
Priority: 20,
411+
}
412+
Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred())
413+
414+
Eventually(func(g Gomega) {
415+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node0.Name}, node0)
416+
g.Expect(err).ToNot(HaveOccurred())
417+
value, exist := node0.Labels[consts.SriovDevicePluginLabel]
418+
g.Expect(exist).To(BeTrue())
419+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelEnabled))
420+
}, time.Minute, time.Second).Should(Succeed())
421+
})
266422
})
267423

268424
Context("RdmaMode", func() {

controllers/suite_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646

4747
//+kubebuilder:scaffold:imports
4848
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
49+
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
4950
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
5051
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
5152
)
@@ -96,6 +97,7 @@ var _ = BeforeSuite(func() {
9697
func(o *zap.Options) {
9798
o.TimeEncoder = zapcore.RFC3339NanoTimeEncoder
9899
}))
100+
snolog.InitLog()
99101

100102
// Go to project root directory
101103
err = os.Chdir("..")

deploy/configmap.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ data:
2121
Intel_ice_Columbiapark_E823C: "8086 188a 1889"
2222
Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889"
2323
Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889"
24+
Intel_ice_Columbiapark_E825C_BACKPLANE: "8086 579c 1889"
25+
Intel_ice_Columbiapark_E825C_QSFP: "8086 579d 1889"
26+
Intel_ice_Columbiapark_E825C_SFP: "8086 579e 1889"
2427
Nvidia_mlx5_ConnectX-4: "15b3 1013 1014"
2528
Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016"
2629
Nvidia_mlx5_ConnectX-5: "15b3 1017 1018"

deployment/sriov-network-operator-chart/templates/configmap.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ data:
2121
Intel_ice_Columbiapark_E823C: "8086 188a 1889"
2222
Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889"
2323
Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889"
24+
Intel_ice_Columbiapark_E825C_BACKPLANE: "8086 579c 1889"
25+
Intel_ice_Columbiapark_E825C_QSFP: "8086 579d 1889"
26+
Intel_ice_Columbiapark_E825C_SFP: "8086 579e 1889"
2427
Nvidia_mlx5_ConnectX-4: "15b3 1013 1014"
2528
Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016"
2629
Nvidia_mlx5_ConnectX-5: "15b3 1017 1018"

doc/supported-hardware.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ The following SR-IOV capable hardware is supported with sriov-network-operator:
1717
| Intel E823-C Family | 8086 | 188a |
1818
| Intel E823-L SFP Family | 8086 | 124d |
1919
| Intel E823-L Backplane Family | 8086 | 124c |
20+
| Intel E825-C Backplane Family | 8086 | 579c |
21+
| Intel E825-C QSFP Family | 8086 | 579d |
22+
| Intel E825-C SFP Family | 8086 | 579e |
2023
| Mellanox MT27700 Family [ConnectX-4] | 15b3 | 1013 |
2124
| Mellanox MT27710 Family [ConnectX-4 Lx] | 15b3 | 1015 |
2225
| Mellanox MT27800 Family [ConnectX-5] | 15b3 | 1017 |
@@ -61,6 +64,9 @@ The following table depicts the supported SR-IOV hardware features of each suppo
6164
| Intel E823-C Family | V | V | X |
6265
| Intel E823-L SFP Family | V | V | X |
6366
| Intel E823-L Backplane Family | V | V | X |
67+
| Intel E825-C Backplane | V | V | X |
68+
| Intel E825-C QSFP Family | V | V | X |
69+
| Intel E825-C SFP Family | V | V | X |
6470
| Mellanox MT27700 Family [ConnectX-4] | V | V | V |
6571
| Mellanox MT27710 Family [ConnectX-4 Lx] | V | V | V |
6672
| Mellanox MT27800 Family [ConnectX-5] | V | V | V |

pkg/consts/constants.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,11 @@ const (
139139
`IMPORT{program}="/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}", ` +
140140
`NAME="%s_$env{NUMBER}"`
141141

142-
KernelArgPciRealloc = "pci=realloc"
143-
KernelArgIntelIommu = "intel_iommu=on"
144-
KernelArgIommuPt = "iommu=pt"
145-
KernelArgIommuPassthrough = "iommu.passthrough=1"
146-
KernelArgRdmaShared = "ib_core.netns_mode=1"
147-
KernelArgRdmaExclusive = "ib_core.netns_mode=0"
142+
KernelArgPciRealloc = "pci=realloc"
143+
KernelArgIntelIommu = "intel_iommu=on"
144+
KernelArgIommuPt = "iommu=pt"
145+
KernelArgRdmaShared = "ib_core.netns_mode=1"
146+
KernelArgRdmaExclusive = "ib_core.netns_mode=0"
148147

149148
// Systemd consts
150149
SriovSystemdConfigPath = SriovConfBasePath + "/sriov-interface-config.yaml"

pkg/daemon/daemon_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ var _ = Describe("Daemon Controller", Ordered, func() {
185185
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgPciRealloc).Return(true).AnyTimes()
186186
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgIntelIommu).Return(true).AnyTimes()
187187
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgIommuPt).Return(true).AnyTimes()
188-
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgIommuPassthrough).Return(true).AnyTimes()
189188
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgRdmaExclusive).Return(false).AnyTimes()
190189
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgRdmaShared).Return(false).AnyTimes()
191190
hostHelper.EXPECT().SetRDMASubsystem("").Return(nil).AnyTimes()

pkg/daemon/plugin_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ var _ = Describe("config daemon plugin loading tests", func() {
4747
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false).AnyTimes()
4848
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false).AnyTimes()
4949
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false).AnyTimes()
50-
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false).AnyTimes()
5150

5251
// k8s plugin is ATM the only plugin which require mocking/faking, as its New method performs additional logic
5352
// other than simple plugin struct initialization

pkg/daemon/status.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ func (dn *NodeReconciler) updateSyncState(ctx context.Context, desiredNodeState
3131
"LastSyncError", failedMessage)
3232
return err
3333
}
34+
// update the object meta if not the patch can fail if the object did change
35+
desiredNodeState.ObjectMeta = currentNodeState.ObjectMeta
3436

3537
funcLog.V(2).Info("update nodeState status",
3638
"CurrentSyncStatus", currentNodeState.Status.SyncStatus,
3739
"CurrentLastSyncError", currentNodeState.Status.LastSyncError,
38-
"NewSyncStatus", status,
39-
"NewFailedMessage", failedMessage)
40+
"NewSyncStatus", desiredNodeState.Status.SyncStatus,
41+
"NewFailedMessage", desiredNodeState.Status.LastSyncError)
4042

4143
err := dn.client.Status().Patch(ctx, desiredNodeState, client.MergeFrom(currentNodeState))
4244
if err != nil {

0 commit comments

Comments
 (0)