Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/ccoveille/go-safecast v1.8.0 h1:vx5q4QzC8MJuESZLOKZXp+63gPCt4qsaeVOFMTFlDCk=
github.com/ccoveille/go-safecast v1.8.0/go.mod h1:QqwNjxQ7DAqY0C721OIO9InMk9zCwcsO7tnRuHytad8=
github.com/ccoveille/go-safecast v1.8.2 h1:+d+s5UGQiCVJX9oYc8XvYcB2zCMBlax6lIP7YdxXLHA=
github.com/ccoveille/go-safecast v1.8.2/go.mod h1:M0Ubpl11x63fE7iOfk5MtngQFXsntcRzOoSsFDqQYDY=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
Expand Down
28 changes: 16 additions & 12 deletions pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ func (s *Scheduler) getNodesUsage(nodes *[]string, task *corev1.Pod) (*map[strin
failedNodes := make(map[string]string)
allNodes, err := s.ListNodes()
if err != nil {
return &overallnodeMap, failedNodes, err
return nil, nil, err
}

for _, node := range allNodes {
Expand Down Expand Up @@ -480,37 +480,41 @@ func (s *Scheduler) Bind(args extenderv1.ExtenderBindingArgs) (*extenderv1.Exten
util.BindTimeAnnotations: strconv.FormatInt(time.Now().Unix(), 10),
}

// Function to release node locks in case of binding failure
releaseNodeLocks := func() (*extenderv1.ExtenderBindingResult, error) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yxxhero Is it possible to using a defer statement to improve code clarity and maintainability ?

klog.InfoS("Release node locks", "node", args.Node)
for _, val := range device.GetDevices() {
if releaseErr := val.ReleaseNodeLock(node, current); releaseErr != nil {
klog.ErrorS(releaseErr, "Failed to release node lock", "node", args.Node, "device", val)
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The error returned by val.ReleaseNodeLock(node, current) is ignored. If releasing the lock fails, it could fail silently and leave the node in a locked state, preventing other pods from being scheduled on it. This error should be logged.

if releaseErr := val.ReleaseNodeLock(node, current); releaseErr != nil {
	klog.ErrorS(releaseErr, "Failed to release node lock", "node", args.Node, "device", val)
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please adopt this suggestion

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

s.recordScheduleBindingResultEvent(current, EventReasonBindingFailed, []string{}, err)
return &extenderv1.ExtenderBindingResult{Error: err.Error()}, nil
}

for _, val := range device.GetDevices() {
err = val.LockNode(node, current)
if err != nil {
klog.ErrorS(err, "Failed to lock node", "node", args.Node, "device", val)
goto ReleaseNodeLocks
return releaseNodeLocks()
}
}

err = util.PatchPodAnnotations(current, tmppatch)
if err != nil {
klog.ErrorS(err, "Failed to patch pod annotations", "pod", klog.KObj(current))
goto ReleaseNodeLocks
return releaseNodeLocks()
}

err = s.kubeClient.CoreV1().Pods(args.PodNamespace).Bind(context.Background(), binding, metav1.CreateOptions{})
if err != nil {
klog.ErrorS(err, "Failed to bind pod", "pod", args.PodName, "namespace", args.PodNamespace, "node", args.Node)
goto ReleaseNodeLocks
return releaseNodeLocks()
}

s.recordScheduleBindingResultEvent(current, EventReasonBindingSucceed, []string{args.Node}, nil)
klog.InfoS("Successfully bound pod to node", "pod", args.PodName, "namespace", args.PodNamespace, "node", args.Node)
return &extenderv1.ExtenderBindingResult{Error: ""}, nil

ReleaseNodeLocks:
klog.InfoS("Release node locks", "node", args.Node)
for _, val := range device.GetDevices() {
val.ReleaseNodeLock(node, current)
}
s.recordScheduleBindingResultEvent(current, EventReasonBindingFailed, []string{}, err)
return &extenderv1.ExtenderBindingResult{Error: err.Error()}, nil
}

func (s *Scheduler) Filter(args extenderv1.ExtenderArgs) (*extenderv1.ExtenderFilterResult, error) {
Expand Down