Skip to content

Commit 12c3dd5

Browse files
authored
fix: parse nvidia.com/gpu to annotation when enabled (#401)
* fix: disable ngpu mode by default * chore: lint * fix: parse nvidia.com/gpu to annotation when enabled * fix: optimize
1 parent d0118cf commit 12c3dd5

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

internal/webhook/v1/pod_webhook.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ func (m *TensorFusionPodMutator) patchTFClient(
304304
return nil, fmt.Errorf("unmarshal patched container, invalid container patch: %w", err)
305305
}
306306

307-
removeNativeGPUResourceClaim(container)
307+
removeNativeGPULimitsAndAddCountToAnnotation(pod, container)
308308

309309
if !isLocalGPU {
310310
addConnectionForRemoteFixedReplicaVirtualGPU(pod, container, clientConfig)
@@ -423,13 +423,21 @@ func addConnectionForRemoteFixedReplicaVirtualGPU(pod *corev1.Pod, container *co
423423
})
424424
}
425425

426-
// remove nvidia.com/gpu in resources
427-
func removeNativeGPUResourceClaim(container *corev1.Container) {
426+
// remove nvidia.com/gpu in resources, add the GPU number into annotation
427+
func removeNativeGPULimitsAndAddCountToAnnotation(pod *corev1.Pod, container *corev1.Container) {
428428
if container.Resources.Requests != nil {
429429
delete(container.Resources.Requests, constants.NvidiaGPUKey)
430430
}
431431
if container.Resources.Limits != nil {
432-
delete(container.Resources.Limits, constants.NvidiaGPUKey)
432+
if quantity, ok := container.Resources.Limits[constants.NvidiaGPUKey]; ok {
433+
gpuNumber, err := strconv.Atoi(quantity.String())
434+
if err != nil || gpuNumber <= 0 {
435+
ctrl.Log.Error(err, "unrecognized nvidia.com/gpu in resources, not a valid number", "pod", pod.Name, "container", container.Name)
436+
} else {
437+
pod.Annotations[constants.GpuCountAnnotation] = strconv.Itoa(gpuNumber)
438+
}
439+
delete(container.Resources.Limits, constants.NvidiaGPUKey)
440+
}
433441
}
434442
}
435443

0 commit comments

Comments
 (0)