Skip to content

Commit b48f1ab

Browse files
Merge pull request openshift#1078 from SchSeba/merge-bot-master
OCPBUGS-53346: Merge https://github.com/k8snetworkplumbingwg/sriov-network-operator:master into main
2 parents 1c4cd6e + 7513e6f commit b48f1ab

File tree

68 files changed

+344
-3004
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+344
-3004
lines changed

Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@ clean:
6868
@rm -rf $(TARGET_DIR)
6969
@rm -rf $(BIN_DIR)
7070

71-
update-codegen:
72-
hack/update-codegen.sh
73-
7471
image: ; $(info Building images...)
7572
$(IMAGE_BUILDER) build -f $(DOCKERFILE) -t $(IMAGE_TAG) $(CURPATH) $(IMAGE_BUILD_OPTS)
7673
$(IMAGE_BUILDER) build -f $(DOCKERFILE_CONFIG_DAEMON) -t $(CONFIG_DAEMON_IMAGE_TAG) $(CURPATH) $(IMAGE_BUILD_OPTS)

bindata/manifests/daemon/daemonset.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,15 @@ spec:
117117
command:
118118
- /bin/bash
119119
- -c
120-
- mkdir -p /host/var/lib/sriov/ && cp /usr/bin/sriov-network-config-daemon /host/var/lib/sriov/sriov-network-config-daemon && chcon -t bin_t /host/var/lib/sriov/sriov-network-config-daemon | true # Allow systemd to run the file, use pipe true to not failed if the system doesn't have selinux or apparmor enabled
120+
- |
121+
set -e
122+
if [ ! -f /host/usr/share/hwdata/pci.ids ]; then # If pci.ids file is missing on the host, config daemon won't be able to discover PCI devices
123+
mkdir -p /host/usr/share/hwdata/
124+
cp /usr/share/hwdata/pci.ids /host/usr/share/hwdata/pci.ids
125+
fi
126+
mkdir -p /host/var/lib/sriov/
127+
cp /usr/bin/sriov-network-config-daemon /host/var/lib/sriov/sriov-network-config-daemon
128+
chcon -t bin_t /host/var/lib/sriov/sriov-network-config-daemon || true # Allow systemd to run the file, use pipe true to not failed if the system doesn't have selinux or apparmor enabled
121129
securityContext:
122130
privileged: true
123131
resources:

bundle/manifests/sriov-network-operator.clusterserviceversion.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ metadata:
100100
categories: Networking
101101
certified: "false"
102102
containerImage: quay.io/openshift/origin-sriov-network-operator:4.19
103-
createdAt: "2025-04-06T23:49:43Z"
103+
createdAt: "2025-04-11T23:49:10Z"
104104
description: An operator for configuring SR-IOV components and initializing SRIOV
105105
network devices in Openshift cluster.
106106
features.operators.openshift.io/cnf: "false"

cmd/sriov-network-config-daemon/start.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ import (
4040
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
4141

4242
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
43-
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
4443
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
4544
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/daemon"
4645
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
@@ -243,7 +242,6 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
243242
}
244243

245244
// create clients
246-
snclient := snclientset.NewForConfigOrDie(config)
247245
kubeclient := kubernetes.NewForConfigOrDie(config)
248246
kClient, err := runtimeclient.New(
249247
config,
@@ -254,7 +252,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
254252
os.Exit(1)
255253
}
256254

257-
eventRecorder := daemon.NewEventRecorder(snclient, kubeclient, scheme)
255+
eventRecorder := daemon.NewEventRecorder(kClient, kubeclient, scheme)
258256
defer eventRecorder.Shutdown()
259257

260258
nodeInfo, err := kubeclient.CoreV1().Nodes().Get(context.Background(), vars.NodeName, v1.GetOptions{})

cmd/sriov-network-operator-config-cleanup/cleanup.go

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,19 @@ import (
44
"context"
55
"time"
66

7+
ocpconfigapi "github.com/openshift/api/config/v1"
78
"github.com/spf13/cobra"
9+
"k8s.io/apimachinery/pkg/api/errors"
10+
"k8s.io/apimachinery/pkg/runtime"
11+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
12+
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
813
ctrl "sigs.k8s.io/controller-runtime"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
915
"sigs.k8s.io/controller-runtime/pkg/log"
1016

17+
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
1118
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
12-
13-
"k8s.io/apimachinery/pkg/api/errors"
14-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15-
"k8s.io/apimachinery/pkg/watch"
16-
17-
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/typed/sriovnetwork/v1"
19+
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
1820
)
1921

2022
var (
@@ -25,6 +27,14 @@ var (
2527
func init() {
2628
rootCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "designated SriovOperatorConfig namespace")
2729
rootCmd.Flags().IntVarP(&watchTO, "watch-timeout", "w", 10, "sriov-operator config post-delete watch timeout ")
30+
31+
// Init Scheme
32+
newScheme := runtime.NewScheme()
33+
utilruntime.Must(clientgoscheme.AddToScheme(newScheme))
34+
utilruntime.Must(sriovnetworkv1.AddToScheme(newScheme))
35+
utilruntime.Must(ocpconfigapi.AddToScheme(newScheme))
36+
37+
vars.Scheme = newScheme
2838
}
2939

3040
func runCleanupCmd(cmd *cobra.Command, args []string) error {
@@ -38,46 +48,42 @@ func runCleanupCmd(cmd *cobra.Command, args []string) error {
3848
defer timeoutFunc()
3949

4050
restConfig := ctrl.GetConfigOrDie()
41-
sriovcs, err := sriovnetworkv1.NewForConfig(restConfig)
51+
c, err := client.New(restConfig, client.Options{Scheme: vars.Scheme})
4252
if err != nil {
4353
setupLog.Error(err, "failed to create 'sriovnetworkv1' clientset")
4454
}
4555

46-
err = sriovcs.SriovOperatorConfigs(namespace).Delete(context.Background(), "default", metav1.DeleteOptions{})
56+
operatorConfig := &sriovnetworkv1.SriovOperatorConfig{}
57+
err = c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: "default"}, operatorConfig)
4758
if err != nil {
4859
if errors.IsNotFound(err) {
4960
return nil
5061
}
51-
setupLog.Error(err, "failed to delete SriovOperatorConfig")
62+
setupLog.Error(err, "failed to get SriovOperatorConfig")
5263
return err
5364
}
5465

55-
// watching 'default' config deletion with context timeout, in case sriov-operator fails to delete 'default' config
56-
watcher, err := sriovcs.SriovOperatorConfigs(namespace).Watch(ctx, metav1.ListOptions{Watch: true})
66+
err = c.Delete(ctx, operatorConfig)
5767
if err != nil {
58-
setupLog.Error(err, "failed creating 'default' SriovOperatorConfig object watcher")
68+
if errors.IsNotFound(err) {
69+
return nil
70+
}
71+
setupLog.Error(err, "failed to delete SriovOperatorConfig")
5972
return err
6073
}
61-
defer watcher.Stop()
62-
for {
63-
select {
64-
case event := <-watcher.ResultChan():
65-
if event.Type == watch.Deleted {
66-
setupLog.Info("'default' SriovOperatorConfig is deleted")
67-
return nil
68-
}
6974

70-
case <-ctx.Done():
71-
// check whether object might has been deleted before watch event triggered
72-
_, err := sriovcs.SriovOperatorConfigs(namespace).Get(context.Background(), "default", metav1.GetOptions{})
73-
if err != nil {
74-
if errors.IsNotFound(err) {
75-
return nil
76-
}
75+
for {
76+
err = c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: "default"}, operatorConfig)
77+
if err != nil {
78+
if errors.IsNotFound(err) {
79+
break
7780
}
78-
err = ctx.Err()
79-
setupLog.Error(err, "timeout has occurred for 'default' SriovOperatorConfig deletion")
81+
setupLog.Error(err, "failed to check sriovOperatorConfig exist")
8082
return err
8183
}
84+
time.Sleep(100 * time.Millisecond)
8285
}
86+
87+
setupLog.Info("'default' SriovOperatorConfig is deleted")
88+
return nil
8389
}

controllers/drain_controller.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"sync"
2323

24+
"github.com/go-logr/logr"
2425
corev1 "k8s.io/api/core/v1"
2526
"k8s.io/apimachinery/pkg/api/errors"
2627
"k8s.io/apimachinery/pkg/runtime"
@@ -76,8 +77,8 @@ func NewDrainReconcileController(client client.Client, Scheme *runtime.Scheme, r
7677
// For more details, check Reconcile and its Result here:
7778
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
7879
func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
79-
reqLogger := log.FromContext(ctx)
80-
reqLogger.Info("Reconciling Drain")
80+
ctx = context.WithValue(ctx, "logger", log.FromContext(ctx))
81+
reqLogger := log.FromContext(ctx).WithName("Drain Reconcile")
8182

8283
req.Namespace = vars.Namespace
8384

@@ -149,14 +150,14 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
149150
// doesn't need to drain anymore, so we can stop the drain
150151
if nodeStateDrainAnnotationCurrent == constants.DrainComplete ||
151152
nodeStateDrainAnnotationCurrent == constants.Draining {
152-
return dr.handleNodeIdleNodeStateDrainingOrCompleted(ctx, &reqLogger, node, nodeNetworkState)
153+
return dr.handleNodeIdleNodeStateDrainingOrCompleted(ctx, node, nodeNetworkState)
153154
}
154155
}
155156

156157
// this cover the case a node request to drain or reboot
157158
if nodeDrainAnnotation == constants.DrainRequired ||
158159
nodeDrainAnnotation == constants.RebootRequired {
159-
return dr.handleNodeDrainOrReboot(ctx, &reqLogger, node, nodeNetworkState, nodeDrainAnnotation, nodeStateDrainAnnotationCurrent)
160+
return dr.handleNodeDrainOrReboot(ctx, node, nodeNetworkState, nodeDrainAnnotation, nodeStateDrainAnnotationCurrent)
160161
}
161162

162163
reqLogger.Error(nil, "unexpected node drain annotation")
@@ -209,7 +210,17 @@ func (dr *DrainReconcile) SetupWithManager(mgr ctrl.Manager) error {
209210
nodeStatePredicates := builder.WithPredicates(DrainStateAnnotationPredicate{})
210211

211212
return ctrl.NewControllerManagedBy(mgr).
212-
WithOptions(controller.Options{MaxConcurrentReconciles: 50}).
213+
WithOptions(controller.Options{
214+
MaxConcurrentReconciles: 50,
215+
LogConstructor: func(request *reconcile.Request) logr.Logger {
216+
logger := mgr.GetLogger().WithValues("Function", "Drain")
217+
// Inspired by https://github.com/kubernetes-sigs/controller-runtime/blob/52b17917caa97ec546423867d9637f1787830f3e/pkg/builder/controller.go#L447
218+
if req, ok := any(request).(*reconcile.Request); ok && req != nil {
219+
logger = logger.WithValues("node", request.Name)
220+
}
221+
return logger
222+
},
223+
}).
213224
For(&corev1.Node{}, nodePredicates).
214225
Watches(&sriovnetworkv1.SriovNetworkNodeState{}, createUpdateEnqueue, nodeStatePredicates).
215226
Complete(dr)

controllers/drain_controller_helper.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"k8s.io/apimachinery/pkg/labels"
1212
ctrl "sigs.k8s.io/controller-runtime"
1313
"sigs.k8s.io/controller-runtime/pkg/client"
14-
"sigs.k8s.io/controller-runtime/pkg/log"
1514
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1615

1716
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
@@ -21,9 +20,9 @@ import (
2120
)
2221

2322
func (dr *DrainReconcile) handleNodeIdleNodeStateDrainingOrCompleted(ctx context.Context,
24-
reqLogger *logr.Logger,
2523
node *corev1.Node,
2624
nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState) (ctrl.Result, error) {
25+
reqLogger := ctx.Value("logger").(logr.Logger).WithName("handleNodeIdleNodeStateDrainingOrCompleted")
2726
completed, err := dr.drainer.CompleteDrainNode(ctx, node)
2827
if err != nil {
2928
reqLogger.Error(err, "failed to complete drain on node")
@@ -61,11 +60,11 @@ func (dr *DrainReconcile) handleNodeIdleNodeStateDrainingOrCompleted(ctx context
6160
}
6261

6362
func (dr *DrainReconcile) handleNodeDrainOrReboot(ctx context.Context,
64-
reqLogger *logr.Logger,
6563
node *corev1.Node,
6664
nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState,
6765
nodeDrainAnnotation,
6866
nodeStateDrainAnnotationCurrent string) (ctrl.Result, error) {
67+
reqLogger := ctx.Value("logger").(logr.Logger).WithName("handleNodeDrainOrReboot")
6968
// nothing to do here we need to wait for the node to move back to idle
7069
if nodeStateDrainAnnotationCurrent == constants.DrainComplete {
7170
reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo")
@@ -139,9 +138,7 @@ func (dr *DrainReconcile) handleNodeDrainOrReboot(ctx context.Context,
139138
}
140139

141140
func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (*reconcile.Result, error) {
142-
// configure logs
143-
reqLogger := log.FromContext(ctx)
144-
reqLogger.Info("checkForNodeDrain():")
141+
reqLogger := ctx.Value("logger").(logr.Logger).WithName("tryDrainNode")
145142

146143
//critical section we need to check if we can start the draining
147144
dr.drainCheckMutex.Lock()
@@ -169,7 +166,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (
169166
err = dr.Get(ctx, client.ObjectKey{Name: nodeObj.GetName(), Namespace: vars.Namespace}, snns)
170167
if err != nil {
171168
if errors.IsNotFound(err) {
172-
reqLogger.V(2).Info("node doesn't have a sriovNetworkNodePolicy")
169+
reqLogger.V(2).Info("node doesn't have a sriovNetworkNodeState, skipping")
173170
continue
174171
}
175172
return nil, err
@@ -211,8 +208,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (
211208
}
212209

213210
func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
214-
logger := log.FromContext(ctx)
215-
logger.Info("findNodePoolConfig():")
211+
logger := ctx.Value("logger").(logr.Logger).WithName("findNodePoolConfig")
216212
// get all the sriov network pool configs
217213
npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
218214
err := dr.List(ctx, npcl)

controllers/sriovoperatorconfig_controller.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
8585
if err != nil {
8686
if apierrors.IsNotFound(err) {
8787
logger.Info("default SriovOperatorConfig object not found. waiting for creation.")
88-
return reconcile.Result{}, err
88+
return reconcile.Result{}, nil
8989
}
9090
// Error reading the object - requeue the request.
9191
logger.Error(err, "Failed to get default SriovOperatorConfig object")
@@ -99,12 +99,9 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
9999
// The object is being deleted
100100
return r.handleSriovOperatorConfigDeletion(ctx, defaultConfig, logger)
101101
}
102-
// add finalizer if needed
103-
if !sriovnetworkv1.StringInArray(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) {
104-
defaultConfig.ObjectMeta.Finalizers = append(defaultConfig.ObjectMeta.Finalizers, sriovnetworkv1.OPERATORCONFIGFINALIZERNAME)
105-
if err := r.Update(ctx, defaultConfig); err != nil {
106-
return reconcile.Result{}, err
107-
}
102+
103+
if err = r.syncOperatorConfigFinalizers(ctx, defaultConfig, logger); err != nil {
104+
return reconcile.Result{}, err
108105
}
109106

110107
r.FeatureGate.Init(defaultConfig.Spec.FeatureGates)
@@ -448,6 +445,29 @@ func (r *SriovOperatorConfigReconciler) syncOpenShiftSystemdService(ctx context.
448445
return r.setLabelInsideObject(ctx, cr, objs)
449446
}
450447

448+
func (r SriovOperatorConfigReconciler) syncOperatorConfigFinalizers(ctx context.Context, defaultConfig *sriovnetworkv1.SriovOperatorConfig, logger logr.Logger) error {
449+
if sriovnetworkv1.StringInArray(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) {
450+
return nil
451+
}
452+
453+
newObj := defaultConfig.DeepCopyObject().(client.Object)
454+
newObj.SetFinalizers(
455+
append(newObj.GetFinalizers(), sriovnetworkv1.OPERATORCONFIGFINALIZERNAME),
456+
)
457+
458+
logger.WithName("syncOperatorConfigFinalizers").
459+
Info("Adding finalizer", "key", sriovnetworkv1.OPERATORCONFIGFINALIZERNAME)
460+
461+
patch := client.MergeFrom(defaultConfig)
462+
err := r.Patch(ctx, newObj, patch)
463+
if err != nil {
464+
return fmt.Errorf("can't patch SriovOperatorConfig to add finalizer [%s]: %w", sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, err)
465+
}
466+
467+
// Refresh the defaultConfig object with the latest changes
468+
return r.Get(ctx, types.NamespacedName{Namespace: defaultConfig.Namespace, Name: defaultConfig.Name}, defaultConfig)
469+
}
470+
451471
func (r *SriovOperatorConfigReconciler) handleSriovOperatorConfigDeletion(ctx context.Context,
452472
defaultConfig *sriovnetworkv1.SriovOperatorConfig, logger logr.Logger) (ctrl.Result, error) {
453473
var err error

0 commit comments

Comments
 (0)