@@ -36,6 +36,7 @@ import (
3636)
3737
3838const TMP_PATH = "/tmp"
39+ const LAPTOP_GPU_SUFFIX = " Laptop GPU"
3940
4041var Scheme = runtime .NewScheme ()
4142
@@ -144,10 +145,17 @@ func main() {
144145 ctrl .Log .Error (errors .New (nvml .ErrorString (ret )), "unable to get memory info of device" , "index" , i )
145146 os .Exit (1 )
146147 }
148+
149+ // Nvidia mobile series GPU chips are the same as desktop series GPU, but clock speed is lower
150+ // so we can use desktop series GPU info to represent mobile series GPU, and set available TFlops with a multiplier
151+ isLaptopGPU := strings .HasSuffix (deviceName , LAPTOP_GPU_SUFFIX )
152+ if isLaptopGPU {
153+ deviceName = strings .ReplaceAll (deviceName , LAPTOP_GPU_SUFFIX , "" )
154+ ctrl .Log .Info ("found mobile/laptop GPU, clock speed is lower, will set lower TFlops" , "deviceName" , deviceName )
155+ }
147156 info , ok := lo .Find (gpuInfo , func (info config.GpuInfo ) bool {
148157 return info .FullModelName == deviceName
149158 })
150- tflops := info .Fp16TFlops
151159 if ! ok {
152160 ctrl .Log .Info (
153161 "[Error] Unknown GPU model, please update `gpu-public-gpu-info` configMap " +
@@ -157,9 +165,13 @@ func main() {
157165 "#pod-stuck-in-starting-status-after-enabling-tensorfusion" ,
158166 "deviceName" , deviceName , "uuid" , uuid )
159167 os .Exit (1 )
160- } else {
161- ctrl .Log .Info ("found GPU info from config" , "deviceName" , deviceName , "FP16 TFlops" , tflops , "uuid" , uuid )
162168 }
169+ tflops := info .Fp16TFlops
170+ if isLaptopGPU {
171+ tflops = resource .MustParse (fmt .Sprintf ("%.2f" ,
172+ tflops .AsApproximateFloat64 ()* constants .MobileGpuClockSpeedMultiplier ))
173+ }
174+ ctrl .Log .Info ("found GPU info from config" , "deviceName" , deviceName , "FP16 TFlops" , tflops , "uuid" , uuid )
163175
164176 gpu , err := createOrUpdateTensorFusionGPU (k8sClient , ctx , k8sNodeName , gpunode , uuid , deviceName , memInfo , tflops )
165177 if err != nil {
0 commit comments