@@ -44,7 +44,9 @@ var mu sync.Mutex
4444var  GPUCapacityMap  =  map [string ]tfv1.Resource {}
4545
4646type  Strategy  interface  {
47- 	Score (gpu  * tfv1.GPU ) int 
47+ 	// When isForNode = true, indicates each GPU's node level score 
48+ 	// otherwise it's single GPU score inside one node 
49+ 	Score (gpu  * tfv1.GPU , isForNode  bool ) int 
4850
4951	SelectGPUs (gpus  []* tfv1.GPU , count  uint ) ([]* tfv1.GPU , error )
5052}
@@ -59,13 +61,14 @@ func (p *SimulateSchedulingFilterDetail) Clone() fwk.StateData {
5961}
6062
6163// NewStrategy creates a strategy based on the placement mode 
62- func  NewStrategy (placementMode  tfv1.PlacementMode , cfg  * config.GPUFitConfig ) Strategy  {
64+ func  NewStrategy (placementMode  tfv1.PlacementMode , cfg  * config.GPUFitConfig ,  nodeGpuStore   map [ string ] map [ string ] * tfv1. GPU ) Strategy  {
6365	switch  placementMode  {
6466	case  tfv1 .PlacementModeLowLoadFirst :
65- 		return  LowLoadFirst {cfg : cfg }
67+ 		return  LowLoadFirst {cfg : cfg , nodeGpuStore : nodeGpuStore }
68+ 	case  tfv1 .PlacementModeCompactFirst :
69+ 		return  CompactFirst {cfg : cfg , nodeGpuStore : nodeGpuStore }
6670	default :
67- 		// CompactFirst is the default strategy 
68- 		return  CompactFirst {cfg : cfg }
71+ 		return  NodeCompactGPULowLoad {cfg : cfg , nodeGpuStore : nodeGpuStore }
6972	}
7073}
7174
@@ -182,14 +185,16 @@ func (s *GpuAllocator) Filter(
182185		filterRegistry  =  filterRegistry .With (filter .NewGPUModelFilter (req .GPUModel ))
183186	}
184187
185- 	if  req .Count  >  1  {
186- 		filterRegistry  =  filterRegistry .With (filter .NewSameNodeFilter (req .Count ))
187- 	}
188- 	// Add NodeAffinityFilter if specified 
188+ 	// NOTE: deprecated, use Kubernetes native spec template affinity way 
189189	if  req .NodeAffinity  !=  nil  {
190190		filterRegistry  =  filterRegistry .With (filter .NewNodeAffinityFilter (s .Client , req .NodeAffinity ))
191191	}
192192
193+ 	// Same node filter must be applied at final step 
194+ 	if  req .Count  >  1  {
195+ 		filterRegistry  =  filterRegistry .With (filter .NewSameNodeFilter (req .Count ))
196+ 	}
197+ 
193198	// Apply the filters in sequence 
194199	filteredGPUs , filterDetails , err  :=  filterRegistry .Apply (s .ctx , req .WorkloadNameNamespace , toFilterGPUs , isSimulateSchedule )
195200	if  err  !=  nil  {
@@ -245,7 +250,7 @@ func (s *GpuAllocator) Select(req *tfv1.AllocRequest, filteredGPUs []*tfv1.GPU)
245250
246251	strategy  :=  NewStrategy (schedulingConfigTemplate .Spec .Placement .Mode , & config.GPUFitConfig {
247252		MaxWorkerPerNode : s .maxWorkerPerNode ,
248- 	})
253+ 	},  s . nodeGpuStore )
249254	selectedGPUs , err  :=  strategy .SelectGPUs (filteredGPUs , req .Count )
250255	if  err  !=  nil  {
251256		return  nil , fmt .Errorf ("select GPU: %w" , err )
@@ -670,18 +675,20 @@ type scoredGPU struct {
670675	score     int 
671676}
672677
678+ func  (s  * GpuAllocator ) GetScoringStrategy (cfg  * config.GPUFitConfig , req  * tfv1.AllocRequest ) Strategy  {
679+ 	return  NewStrategy (s .getPlacementMode (s .ctx , req .PoolName ), cfg , s .nodeGpuStore )
680+ }
681+ 
673682// First level is k8s node name, second level is GPU name, value is score 
674683func  (s  * GpuAllocator ) Score (
675- 	ctx  context.Context , cfg   * config. GPUFitConfig , req  * tfv1.AllocRequest , nodeGPUs  map [string ][]* tfv1.GPU ,
684+ 	ctx  context.Context , strategy   Strategy , req  * tfv1.AllocRequest , nodeGPUs  map [string ][]* tfv1.GPU ,
676685) map [string ]map [string ]int  {
677686	result  :=  make (map [string ]map [string ]int , len (nodeGPUs ))
678- 	strategy  :=  NewStrategy (s .getPlacementMode (ctx , req .PoolName ), cfg )
679- 
680687	allScores  :=  make ([]scoredGPU , 0 , len (nodeGPUs ))
681688
682689	for  nodeName , gpus  :=  range  nodeGPUs  {
683690		for  _ , gpu  :=  range  gpus  {
684- 			res  :=  strategy .Score (gpu )
691+ 			res  :=  strategy .Score (gpu ,  true )
685692
686693			// making Pending GPU to lower score, prefer not scheduling to them 
687694			if  gpu .Status .Phase  ==  tfv1 .TensorFusionGPUPhasePending  {
@@ -1477,18 +1484,18 @@ func (s *GpuAllocator) getPlacementMode(ctx context.Context, poolName string) tf
14771484	pool  :=  & tfv1.GPUPool {}
14781485	if  err  :=  s .Get (ctx , client.ObjectKey {Name : poolName }, pool ); err  !=  nil  {
14791486		// if failed to get pool, default to compact first 
1480- 		return  tfv1 .PlacementModeCompactFirst 
1487+ 		return  tfv1 .PlacementModeNodeCompactGPULowLoad 
14811488	}
14821489
14831490	if  pool .Spec .SchedulingConfigTemplate  ==  nil  ||  * pool .Spec .SchedulingConfigTemplate  ==  ""  {
1484- 		return  tfv1 .PlacementModeCompactFirst 
1491+ 		return  tfv1 .PlacementModeNodeCompactGPULowLoad 
14851492	}
14861493
14871494	// get scheduling config template 
14881495	schedulingConfigTemplate  :=  & tfv1.SchedulingConfigTemplate {}
14891496	if  err  :=  s .Get (ctx , client.ObjectKey {Name : * pool .Spec .SchedulingConfigTemplate }, schedulingConfigTemplate ); err  !=  nil  {
14901497		// if failed to get scheduling config template, default to compact first 
1491- 		return  tfv1 .PlacementModeCompactFirst 
1498+ 		return  tfv1 .PlacementModeNodeCompactGPULowLoad 
14921499	}
14931500	return  schedulingConfigTemplate .Spec .Placement .Mode 
14941501}
0 commit comments