Skip to content

Commit 5b638dd

Browse files
refactor: standardize variable naming for memory and lock management
1 parent d50cb70 commit 5b638dd

File tree

3 files changed

+25
-15
lines changed

3 files changed

+25
-15
lines changed

pkg/nvidia-plugin/pkg/plugin/register.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,11 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
137137
panic(0)
138138
}
139139

140-
registeredmem := int32(memoryTotal / 1024 / 1024)
140+
registeredMem := int32(memoryTotal / 1024 / 1024)
141141
if plugin.schedulerConfig.DeviceMemoryScaling != 1 {
142-
registeredmem = int32(float64(registeredmem) * plugin.schedulerConfig.DeviceMemoryScaling)
142+
registeredMem = int32(float64(registeredMem) * plugin.schedulerConfig.DeviceMemoryScaling)
143143
}
144-
klog.Infoln("MemoryScaling=", plugin.schedulerConfig.DeviceMemoryScaling, "registeredmem=", registeredmem)
144+
klog.Infoln("MemoryScaling=", plugin.schedulerConfig.DeviceMemoryScaling, "registeredMem=", registeredMem)
145145
health := true
146146
for _, val := range devs {
147147
if strings.Compare(val.ID, UUID) == 0 {
@@ -163,14 +163,14 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
163163
ID: UUID,
164164
Index: uint(idx),
165165
Count: int32(plugin.schedulerConfig.DeviceSplitCount),
166-
Devmem: registeredmem,
166+
Devmem: registeredMem,
167167
Devcore: int32(plugin.schedulerConfig.DeviceCoreScaling * 100),
168168
Type: fmt.Sprintf("%v-%v", "NVIDIA", Model),
169169
Numa: numa,
170170
Mode: plugin.operatingMode,
171171
Health: health,
172172
})
173-
klog.Infof("nvml registered device id=%v, memory=%v, type=%v, numa=%v", idx, registeredmem, Model, numa)
173+
klog.Infof("nvml registered device id=%v, memory=%v, type=%v, numa=%v", idx, registeredMem, Model, numa)
174174
}
175175
return &res
176176
}
@@ -184,9 +184,9 @@ func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error {
184184
klog.Errorln("get node error", err.Error())
185185
return err
186186
}
187-
encodeddevices := util.EncodeNodeDevices(*devices)
187+
encodedDevices := util.EncodeNodeDevices(*devices)
188188
annos[nvidia.HandshakeAnnos] = "Reported " + time.Now().String()
189-
annos[nvidia.RegisterAnnos] = encodeddevices
189+
annos[nvidia.RegisterAnnos] = encodedDevices
190190
klog.Infof("patch node with the following annos %v", fmt.Sprintf("%v", annos))
191191
err = util.PatchNodeAnnotations(node, annos)
192192

pkg/scheduler/scheduler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ func (s *Scheduler) Bind(args extenderv1.ExtenderBindingArgs) (*extenderv1.Exten
420420
return res, nil
421421
}
422422

423-
tmppatch := map[string]string{
423+
deviceBindAnnotations := map[string]string{
424424
util.DeviceBindPhase: "allocating",
425425
util.BindTimeAnnotations: strconv.FormatInt(time.Now().Unix(), 10),
426426
}
@@ -433,7 +433,7 @@ func (s *Scheduler) Bind(args extenderv1.ExtenderBindingArgs) (*extenderv1.Exten
433433
}
434434
}
435435

436-
err = util.PatchPodAnnotations(current, tmppatch)
436+
err = util.PatchPodAnnotations(current, deviceBindAnnotations)
437437
if err != nil {
438438
klog.ErrorS(err, "Failed to patch pod annotations", "pod", klog.KObj(current))
439439
return &extenderv1.ExtenderBindingResult{Error: err.Error()}, err

pkg/util/nodelock/nodelock.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const (
4242

4343
var lock sync.Mutex
4444

45-
func SetNodeLock(nodeName string, lockname string, pods *corev1.Pod) error {
45+
func SetNodeLock(nodeName string, lockName string, pods *corev1.Pod) error {
4646
lock.Lock()
4747
defer lock.Unlock()
4848
ctx := context.Background()
@@ -80,7 +80,7 @@ func SetNodeLock(nodeName string, lockname string, pods *corev1.Pod) error {
8080
return nil
8181
}
8282

83-
func ReleaseNodeLock(nodeName string, lockname string, pod *corev1.Pod, timeout bool) error {
83+
func ReleaseNodeLock(nodeName string, lockName string, pod *corev1.Pod, timeout bool) error {
8484
lock.Lock()
8585
defer lock.Unlock()
8686
ctx := context.Background()
@@ -118,27 +118,37 @@ func ReleaseNodeLock(nodeName string, lockname string, pod *corev1.Pod, timeout
118118
return nil
119119
}
120120

121-
func LockNode(nodeName string, lockname string, pods *corev1.Pod) error {
121+
func LockNode(nodeName string, lockName string, pods *corev1.Pod) error {
122122
ctx := context.Background()
123123
node, err := client.GetClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
124124
if err != nil {
125125
return err
126126
}
127127
if _, ok := node.ObjectMeta.Annotations[NodeLockKey]; !ok {
128-
return SetNodeLock(nodeName, lockname, pods)
128+
return SetNodeLock(nodeName, lockName, pods)
129129
}
130130
lockTime, _, _, err := ParseNodeLock(node.ObjectMeta.Annotations[NodeLockKey])
131131
if err != nil {
132132
return err
133133
}
134+
klog.InfoS("Attempting to lock node", "node", nodeName, "pod", pods.Name)
135+
if _, ok := node.ObjectMeta.Annotations[NodeLockKey]; !ok {
136+
klog.InfoS("No existing lock found", "node", nodeName)
137+
} else {
138+
klog.InfoS("Existing lock details",
139+
"node", nodeName,
140+
"lockTime", lockTime,
141+
"currentTime", time.Now(),
142+
"timeSinceLock", time.Since(lockTime))
143+
}
134144
if time.Since(lockTime) > time.Minute*5 {
135145
klog.InfoS("Node lock expired", "node", nodeName, "lockTime", lockTime)
136-
err = ReleaseNodeLock(nodeName, lockname, pods, true)
146+
err = ReleaseNodeLock(nodeName, lockName, pods, true)
137147
if err != nil {
138148
klog.ErrorS(err, "Failed to release node lock", "node", nodeName)
139149
return err
140150
}
141-
return SetNodeLock(nodeName, lockname, pods)
151+
return SetNodeLock(nodeName, lockName, pods)
142152
}
143153
return fmt.Errorf("node %s has been locked within 5 minutes", nodeName)
144154
}

0 commit comments

Comments
 (0)