@@ -310,14 +310,16 @@ func (s *GpuAllocator) Dealloc(
310310 log := log .FromContext (s .ctx )
311311
312312 request , exists := s .uniqueAllocation [podUID ]
313- if ! exists {
313+ if ! exists || request == nil {
314314 // should not block finalizer
315315 log .Error (fmt .Errorf ("pod has not allocated GPUs" ), "pod" , podUID )
316+ return
316317 }
317318
318319 if _ , exists := s .uniqueDeallocation [podUID ]; exists {
319320 // should not block finalizer
320321 log .Error (fmt .Errorf ("pod has already deallocated GPUs" ), "pod" , podUID )
322+ return
321323 }
322324
323325 s .storeMutex .Lock ()
@@ -1057,16 +1059,6 @@ func removeRunningApp(ctx context.Context, gpu *tfv1.GPU, workloadNameNamespace
10571059}
10581060
10591061func (s * GpuAllocator ) ComposeAllocationRequest (pod * v1.Pod ) (tfv1.AllocRequest , string , error ) {
1060- var tfWorkload tfv1.TensorFusionWorkload
1061-
1062- err := s .Get (s .ctx , client.ObjectKey {
1063- Name : pod .Labels [constants .WorkloadKey ],
1064- Namespace : pod .Namespace ,
1065- }, & tfWorkload )
1066- if err != nil {
1067- return tfv1.AllocRequest {}, "failed to get tf workload" , err
1068- }
1069-
10701062 gpuRequestResource , err := utils .GetGPUResource (pod , true )
10711063 if err != nil {
10721064 return tfv1.AllocRequest {}, "invalid gpu request annotation" , err
@@ -1091,11 +1083,9 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (tfv1.AllocRequest,
10911083 Count : uint (count ),
10921084 GPUModel : pod .Annotations [constants .GPUModelAnnotation ],
10931085 WorkloadNameNamespace : tfv1.NameNamespace {
1094- Name : tfWorkload . Name ,
1095- Namespace : tfWorkload .Namespace ,
1086+ Name : pod . Labels [ constants . WorkloadKey ] ,
1087+ Namespace : pod .Namespace ,
10961088 },
1097- NodeAffinity : tfWorkload .Spec .NodeAffinity ,
1098-
10991089 PodMeta : pod .ObjectMeta ,
11001090 }
11011091 return allocRequest , "" , nil
0 commit comments