@@ -22,18 +22,20 @@ func pickBestFullFitByLibrary(f *ggml.GGML, modelPath string, projectors []strin
2222 for _ , gl := range ml .ByLibrary (gpus ) {
2323 sgl := append (make ([]ml.DeviceInfo , 0 , len (gl )), gl ... )
2424
25- // TODO - potentially sort by performance capability, existing models loaded, etc.
26- // TODO - Eliminate any GPUs that already have envconfig.MaxRunners loaded on them
27- // Respect Vulkan device enumeration order (ID 0, 1, 2...) instead of reordering by VRAM
28- // This allows VkConfig and driver-level device ordering to be honored
29- sort .Slice (sgl , func (i , j int ) bool {
30- return sgl [i ].ID < sgl [j ].ID
31- })
32-
33- // Debug log to verify GPU ordering
34- for idx , gpu := range sgl {
35- slog .Debug ("GPU order after sort" , "index" , idx , "ID" , gpu .ID , "name" , gpu .Name , "free" , format .HumanBytes2 (gpu .FreeMemory ))
36- } if ! envconfig .SchedSpread () {
25+ // TODO - potentially sort by performance capability, existing models loaded, etc.
26+ // TODO - Eliminate any GPUs that already have envconfig.MaxRunners loaded on them
27+ // Respect Vulkan device enumeration order (ID 0, 1, 2...) instead of reordering by VRAM
28+ // This allows VkConfig and driver-level device ordering to be honored
29+ sort .Slice (sgl , func (i , j int ) bool {
30+ return sgl [i ].ID < sgl [j ].ID
31+ })
32+
33+ // Debug log to verify GPU ordering
34+ for idx , gpu := range sgl {
35+ slog .Debug ("GPU order after sort" , "index" , idx , "ID" , gpu .ID , "name" , gpu .Name , "free" , format .HumanBytes2 (gpu .FreeMemory ))
36+ }
37+
38+ if ! envconfig .SchedSpread () {
3739 // Try to pack into as few GPUs as possible, starting from 1 GPU
3840 for numGPUs := 1 ; numGPUs <= len (sgl ); numGPUs ++ {
3941 gpuSubset := sgl [:numGPUs ]
0 commit comments