Skip to content

Commit b311427

Browse files
authored
fix: hypervisor shm issue (#283)
1 parent 5367e0d commit b311427

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

internal/constants/env.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@ const (
9393
LdPreloadEnv = "LD_PRELOAD"
9494
LdPreloadLimiter = "/home/app/libcuda_limiter.so"
9595

96-
SharedMemResName = "tensor-fusion.ai/shm"
96+
SharedMemResName = "tensor-fusion.ai/shm"
97+
SharedMemDeviceName = "/dev/shm"
98+
SharedMemMountSubPath = "shm"
9799

98100
// disable GPU limiter, for emergency use
99101
DisableGpuLimiterEnv = "DISABLE_GPU_LIMITER"

internal/utils/compose.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -368,10 +368,12 @@ func composeHypervisorInitContainer(spec *v1.PodSpec, pool *tfv1.GPUPool) {
368368
}
369369

370370
func composeHypervisorContainer(spec *v1.PodSpec, pool *tfv1.GPUPool) {
371+
spec.HostNetwork = true
371372
spec.Containers[0].VolumeMounts = append(spec.Containers[0].VolumeMounts, v1.VolumeMount{
372373
Name: constants.DataVolumeName,
373374
ReadOnly: false,
374-
MountPath: constants.TFDataPath,
375+
MountPath: constants.SharedMemDeviceName,
376+
SubPath: constants.SharedMemMountSubPath,
375377
}, v1.VolumeMount{
376378
Name: constants.LogsVolumeName,
377379
MountPath: constants.TensorFusionLogPath,
@@ -420,12 +422,6 @@ func composeHypervisorContainer(spec *v1.PodSpec, pool *tfv1.GPUPool) {
420422
if pool.Spec.ComponentConfig.Hypervisor.Image != "" {
421423
spec.Containers[0].Image = pool.Spec.ComponentConfig.Hypervisor.Image
422424
}
423-
spec.Containers[0].Ports = append(spec.Containers[0].Ports, v1.ContainerPort{
424-
ContainerPort: port,
425-
HostPort: port,
426-
Name: constants.HypervisorPortName,
427-
Protocol: v1.ProtocolTCP,
428-
})
429425

430426
if len(spec.Containers[0].Resources.Requests) == 0 {
431427
spec.Containers[0].Resources.Requests = hypervisorDefaultRequests

0 commit comments

Comments
 (0)