Skip to content

Commit ca29a6e

Browse files
Merge pull request openshift#1073 from SchSeba/merge-bot-master
OCPBUGS-52853,OCPBUGS-54380,OCPBUGS-54401: Merge https://github.com/k8snetworkplumbingwg/sriov-network-operator:master into main
2 parents eea2981 + 86bdb37 commit ca29a6e

File tree

14 files changed

+192
-33
lines changed

14 files changed

+192
-33
lines changed

bundle/manifests/sriov-network-operator.clusterserviceversion.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ metadata:
100100
categories: Networking
101101
certified: "false"
102102
containerImage: quay.io/openshift/origin-sriov-network-operator:4.19
103-
createdAt: "2025-03-27T23:49:55Z"
103+
createdAt: "2025-04-01T23:50:01Z"
104104
description: An operator for configuring SR-IOV components and initializing SRIOV
105105
network devices in Openshift cluster.
106106
features.operators.openshift.io/cnf: "false"

controllers/sriovnetworknodepolicy_controller.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
317317
return err
318318
}
319319
}
320+
320321
logger.V(1).Info("Remove SriovNetworkNodeState custom resource for unselected node")
321322
nsList := &sriovnetworkv1.SriovNetworkNodeStateList{}
322323
err := r.List(ctx, nsList, &client.ListOptions{})
@@ -335,10 +336,10 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
335336
}
336337
}
337338
if !found {
338-
// remove device plugin labels
339+
// remove device plugin labels if the node doesn't exist we continue to handle the stale nodeState
339340
logger.Info("removing device plugin label from node as SriovNetworkNodeState doesn't exist", "nodeStateName", ns.Name)
340341
err = utils.RemoveLabelFromNode(ctx, ns.Name, constants.SriovDevicePluginLabel, r.Client)
341-
if err != nil {
342+
if err != nil && !errors.IsNotFound(err) {
342343
logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name)
343344
return err
344345
}

controllers/sriovnetworknodepolicy_controller_test.go

Lines changed: 159 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,13 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() {
181181
})
182182
})
183183
AfterEach(func() {
184-
err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{})
184+
err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{}, k8sclient.GracePeriodSeconds(0))
185185
Expect(err).ToNot(HaveOccurred())
186186

187-
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, k8sclient.InNamespace(vars.Namespace))
187+
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, k8sclient.InNamespace(vars.Namespace), k8sclient.GracePeriodSeconds(0))
188188
Expect(err).ToNot(HaveOccurred())
189189

190-
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, k8sclient.InNamespace(vars.Namespace))
190+
err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, k8sclient.InNamespace(vars.Namespace), k8sclient.GracePeriodSeconds(0))
191191
Expect(err).ToNot(HaveOccurred())
192192
})
193193
Context("device plugin labels", func() {
@@ -263,6 +263,162 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() {
263263
Expect(errors.IsNotFound(err)).To(BeTrue())
264264
}, time.Minute, time.Second).Should(Succeed())
265265
})
266+
267+
It("should skip label removal for nodes that doesn't exist with no stale timer", func() {
268+
node0 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
269+
Name: "node0",
270+
Labels: map[string]string{"kubernetes.io/os": "linux",
271+
"node-role.kubernetes.io/worker": ""},
272+
}}
273+
Expect(k8sClient.Create(ctx, node0)).To(Succeed())
274+
275+
node1 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
276+
Name: "node1",
277+
Labels: map[string]string{"kubernetes.io/os": "linux",
278+
"node-role.kubernetes.io/worker": ""},
279+
}}
280+
Expect(k8sClient.Create(ctx, node1)).To(Succeed())
281+
282+
nodeState := &sriovnetworkv1.SriovNetworkNodeState{}
283+
node := &corev1.Node{}
284+
for _, nodeName := range []string{"node0", "node1"} {
285+
Eventually(func(g Gomega) {
286+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: nodeName, Namespace: testNamespace}, nodeState)
287+
g.Expect(err).ToNot(HaveOccurred())
288+
}, time.Minute, time.Second).Should(Succeed())
289+
290+
Eventually(func(g Gomega) {
291+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: nodeName}, node)
292+
g.Expect(err).ToNot(HaveOccurred())
293+
value, exist := node.Labels[consts.SriovDevicePluginLabel]
294+
g.Expect(exist).To(BeTrue())
295+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelDisabled))
296+
}, time.Minute, time.Second).Should(Succeed())
297+
298+
nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{
299+
sriovnetworkv1.InterfaceExt{
300+
Vendor: "8086",
301+
Driver: "i40e",
302+
Mtu: 1500,
303+
Name: "ens803f0",
304+
PciAddress: "0000:86:00.0",
305+
NumVfs: 0,
306+
TotalVfs: 64,
307+
},
308+
}
309+
err := k8sClient.Status().Update(context.Background(), nodeState)
310+
Expect(err).ToNot(HaveOccurred())
311+
}
312+
313+
err := k8sClient.Delete(context.Background(), node1, k8sclient.GracePeriodSeconds(0))
314+
Expect(err).ToNot(HaveOccurred())
315+
316+
Eventually(func(g Gomega) {
317+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node1", Namespace: testNamespace}, nodeState)
318+
g.Expect(err).To(HaveOccurred())
319+
g.Expect(errors.IsNotFound(err)).To(BeTrue())
320+
}, 30*time.Second, time.Second).Should(Succeed())
321+
322+
somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{}
323+
somePolicy.SetNamespace(testNamespace)
324+
somePolicy.SetName("some-policy")
325+
somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{
326+
NumVfs: 5,
327+
NodeSelector: map[string]string{"node-role.kubernetes.io/worker": ""},
328+
NicSelector: sriovnetworkv1.SriovNetworkNicSelector{Vendor: "8086"},
329+
Priority: 20,
330+
}
331+
Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred())
332+
333+
Eventually(func(g Gomega) {
334+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node0.Name}, node0)
335+
g.Expect(err).ToNot(HaveOccurred())
336+
value, exist := node0.Labels[consts.SriovDevicePluginLabel]
337+
g.Expect(exist).To(BeTrue())
338+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelEnabled))
339+
}, time.Minute, time.Second).Should(Succeed())
340+
})
341+
342+
It("should skip label removal for nodes that doesn't exist with stale timer", func() {
343+
err := os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "5")
344+
Expect(err).ToNot(HaveOccurred())
345+
defer func() {
346+
err = os.Unsetenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")
347+
Expect(err).ToNot(HaveOccurred())
348+
}()
349+
350+
node0 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
351+
Name: "node0",
352+
Labels: map[string]string{"kubernetes.io/os": "linux",
353+
"node-role.kubernetes.io/worker": ""},
354+
}}
355+
Expect(k8sClient.Create(ctx, node0)).To(Succeed())
356+
357+
node1 := &corev1.Node{ObjectMeta: metav1.ObjectMeta{
358+
Name: "node1",
359+
Labels: map[string]string{"kubernetes.io/os": "linux",
360+
"node-role.kubernetes.io/worker": ""},
361+
}}
362+
Expect(k8sClient.Create(ctx, node1)).To(Succeed())
363+
364+
nodeState := &sriovnetworkv1.SriovNetworkNodeState{}
365+
node := &corev1.Node{}
366+
for _, nodeName := range []string{"node0", "node1"} {
367+
Eventually(func(g Gomega) {
368+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: nodeName, Namespace: testNamespace}, nodeState)
369+
g.Expect(err).ToNot(HaveOccurred())
370+
}, time.Minute, time.Second).Should(Succeed())
371+
372+
Eventually(func(g Gomega) {
373+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: nodeName}, node)
374+
g.Expect(err).ToNot(HaveOccurred())
375+
value, exist := node.Labels[consts.SriovDevicePluginLabel]
376+
g.Expect(exist).To(BeTrue())
377+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelDisabled))
378+
}, time.Minute, time.Second).Should(Succeed())
379+
380+
nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{
381+
sriovnetworkv1.InterfaceExt{
382+
Vendor: "8086",
383+
Driver: "i40e",
384+
Mtu: 1500,
385+
Name: "ens803f0",
386+
PciAddress: "0000:86:00.0",
387+
NumVfs: 0,
388+
TotalVfs: 64,
389+
},
390+
}
391+
err := k8sClient.Status().Update(context.Background(), nodeState)
392+
Expect(err).ToNot(HaveOccurred())
393+
}
394+
395+
err = k8sClient.Delete(context.Background(), node1, k8sclient.GracePeriodSeconds(0))
396+
Expect(err).ToNot(HaveOccurred())
397+
398+
Consistently(func(g Gomega) {
399+
err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node1", Namespace: testNamespace}, nodeState)
400+
g.Expect(err).ToNot(HaveOccurred())
401+
}, 10*time.Second, time.Second).Should(Succeed())
402+
403+
somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{}
404+
somePolicy.SetNamespace(testNamespace)
405+
somePolicy.SetName("some-policy")
406+
somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{
407+
NumVfs: 5,
408+
NodeSelector: map[string]string{"node-role.kubernetes.io/worker": ""},
409+
NicSelector: sriovnetworkv1.SriovNetworkNicSelector{Vendor: "8086"},
410+
Priority: 20,
411+
}
412+
Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred())
413+
414+
Eventually(func(g Gomega) {
415+
err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node0.Name}, node0)
416+
g.Expect(err).ToNot(HaveOccurred())
417+
value, exist := node0.Labels[consts.SriovDevicePluginLabel]
418+
g.Expect(exist).To(BeTrue())
419+
g.Expect(value).To(Equal(consts.SriovDevicePluginLabelEnabled))
420+
}, time.Minute, time.Second).Should(Succeed())
421+
})
266422
})
267423

268424
Context("RdmaMode", func() {

controllers/suite_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646

4747
//+kubebuilder:scaffold:imports
4848
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
49+
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
4950
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
5051
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
5152
)
@@ -96,6 +97,7 @@ var _ = BeforeSuite(func() {
9697
func(o *zap.Options) {
9798
o.TimeEncoder = zapcore.RFC3339NanoTimeEncoder
9899
}))
100+
snolog.InitLog()
99101

100102
// Go to project root directory
101103
err = os.Chdir("..")

deploy/configmap.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ data:
2121
Intel_ice_Columbiapark_E823C: "8086 188a 1889"
2222
Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889"
2323
Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889"
24+
Intel_ice_Columbiapark_E825C_BACKPLANE: "8086 579c 1889"
25+
Intel_ice_Columbiapark_E825C_QSFP: "8086 579d 1889"
26+
Intel_ice_Columbiapark_E825C_SFP: "8086 579e 1889"
2427
Nvidia_mlx5_ConnectX-4: "15b3 1013 1014"
2528
Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016"
2629
Nvidia_mlx5_ConnectX-5: "15b3 1017 1018"

deployment/sriov-network-operator-chart/templates/configmap.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ data:
2121
Intel_ice_Columbiapark_E823C: "8086 188a 1889"
2222
Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889"
2323
Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889"
24+
Intel_ice_Columbiapark_E825C_BACKPLANE: "8086 579c 1889"
25+
Intel_ice_Columbiapark_E825C_QSFP: "8086 579d 1889"
26+
Intel_ice_Columbiapark_E825C_SFP: "8086 579e 1889"
2427
Nvidia_mlx5_ConnectX-4: "15b3 1013 1014"
2528
Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016"
2629
Nvidia_mlx5_ConnectX-5: "15b3 1017 1018"

doc/supported-hardware.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ The following SR-IOV capable hardware is supported with sriov-network-operator:
1717
| Intel E823-C Family | 8086 | 188a |
1818
| Intel E823-L SFP Family | 8086 | 124d |
1919
| Intel E823-L Backplane Family | 8086 | 124c |
20+
| Intel E825-C Backplane Family | 8086 | 579c |
21+
| Intel E825-C QSFP Family | 8086 | 579d |
22+
| Intel E825-C SFP Family | 8086 | 579e |
2023
| Mellanox MT27700 Family [ConnectX-4] | 15b3 | 1013 |
2124
| Mellanox MT27710 Family [ConnectX-4 Lx] | 15b3 | 1015 |
2225
| Mellanox MT27800 Family [ConnectX-5] | 15b3 | 1017 |
@@ -61,6 +64,9 @@ The following table depicts the supported SR-IOV hardware features of each suppo
6164
| Intel E823-C Family | V | V | X |
6265
| Intel E823-L SFP Family | V | V | X |
6366
| Intel E823-L Backplane Family | V | V | X |
67+
| Intel E825-C Backplane | V | V | X |
68+
| Intel E825-C QSFP Family | V | V | X |
69+
| Intel E825-C SFP Family | V | V | X |
6470
| Mellanox MT27700 Family [ConnectX-4] | V | V | V |
6571
| Mellanox MT27710 Family [ConnectX-4 Lx] | V | V | V |
6672
| Mellanox MT27800 Family [ConnectX-5] | V | V | V |

manifests/stable/sriov-network-operator.clusterserviceversion.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ metadata:
100100
categories: Networking
101101
certified: "false"
102102
containerImage: quay.io/openshift/origin-sriov-network-operator:4.19
103-
createdAt: "2025-03-27T23:49:55Z"
103+
createdAt: "2025-04-01T23:50:01Z"
104104
description: An operator for configuring SR-IOV components and initializing SRIOV
105105
network devices in Openshift cluster.
106106
features.operators.openshift.io/cnf: "false"

pkg/consts/constants.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,11 @@ const (
139139
`IMPORT{program}="/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}", ` +
140140
`NAME="%s_$env{NUMBER}"`
141141

142-
KernelArgPciRealloc = "pci=realloc"
143-
KernelArgIntelIommu = "intel_iommu=on"
144-
KernelArgIommuPt = "iommu=pt"
145-
KernelArgIommuPassthrough = "iommu.passthrough=1"
146-
KernelArgRdmaShared = "ib_core.netns_mode=1"
147-
KernelArgRdmaExclusive = "ib_core.netns_mode=0"
142+
KernelArgPciRealloc = "pci=realloc"
143+
KernelArgIntelIommu = "intel_iommu=on"
144+
KernelArgIommuPt = "iommu=pt"
145+
KernelArgRdmaShared = "ib_core.netns_mode=1"
146+
KernelArgRdmaExclusive = "ib_core.netns_mode=0"
148147

149148
// Systemd consts
150149
SriovSystemdConfigPath = SriovConfBasePath + "/sriov-interface-config.yaml"

pkg/daemon/daemon_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ var _ = Describe("Daemon Controller", Ordered, func() {
185185
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgPciRealloc).Return(true).AnyTimes()
186186
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgIntelIommu).Return(true).AnyTimes()
187187
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgIommuPt).Return(true).AnyTimes()
188-
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgIommuPassthrough).Return(true).AnyTimes()
189188
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgRdmaExclusive).Return(false).AnyTimes()
190189
hostHelper.EXPECT().IsKernelArgsSet("", constants.KernelArgRdmaShared).Return(false).AnyTimes()
191190
hostHelper.EXPECT().SetRDMASubsystem("").Return(nil).AnyTimes()

0 commit comments

Comments
 (0)