Skip to content

Commit 15eab62

Browse files
committed
Fix: GPU selection respeta orden Vulkan, overflow Intel solo si es necesario. Smart build siempre verbose. Pruebas y logs actualizados.
modified: Z_Iosu/.build_state.json modified: llm/memory.go modified: llm/server.go
1 parent 8a3856f commit 15eab62

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

Z_Iosu/.build_state.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
<<<<<<< HEAD
23
"LastBuild": "2025-10-21T00:32:22.7635276-05:00",
34
"Components": {
45
"App": {
@@ -19,6 +20,28 @@
1920
},
2021
"Vulkan": {
2122
"LastModified": "2025-10-20T21:11:18.1539263-05:00",
23+
=======
24+
"LastBuild": "2025-10-20T16:12:39.4148936-05:00",
25+
"Components": {
26+
"App": {
27+
"LastModified": "2025-10-18T10:00:22.5534265-05:00",
28+
"FileCount": 57
29+
},
30+
"CUDA": {
31+
"LastModified": "2025-10-18T09:59:44.7143808-05:00",
32+
"FileCount": 214
33+
},
34+
"Ollama": {
35+
"LastModified": "2025-10-20T16:10:39.1356998-05:00",
36+
"FileCount": 446
37+
},
38+
"CPU": {
39+
"LastModified": "2025-10-20T15:42:39.5692009-05:00",
40+
"FileCount": 222
41+
},
42+
"Vulkan": {
43+
"LastModified": "2025-10-20T15:42:39.5692009-05:00",
44+
>>>>>>> 1f279e404 (Fix: GPU selection respeta orden Vulkan, overflow Intel solo si es necesario. Smart build siempre verbose. Pruebas y logs actualizados.)
2245
"FileCount": 121
2346
}
2447
}

llm/memory.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,18 @@ func pickBestFullFitByLibrary(f *ggml.GGML, modelPath string, projectors []strin
2222
for _, gl := range ml.ByLibrary(gpus) {
2323
sgl := append(make([]ml.DeviceInfo, 0, len(gl)), gl...)
2424

25-
// TODO - potentially sort by performance capability, existing models loaded, etc.
26-
// TODO - Eliminate any GPUs that already have envconfig.MaxRunners loaded on them
27-
// Note: at present, this will favor most current available VRAM descending and ignoring faster GPU speed in mixed setups
28-
sort.Sort(sort.Reverse(ml.ByFreeMemory(sgl)))
29-
30-
if !envconfig.SchedSpread() {
25+
// TODO - potentially sort by performance capability, existing models loaded, etc.
26+
// TODO - Eliminate any GPUs that already have envconfig.MaxRunners loaded on them
27+
// Respect Vulkan device enumeration order (ID 0, 1, 2...) instead of reordering by VRAM
28+
// This allows VkConfig and driver-level device ordering to be honored
29+
sort.Slice(sgl, func(i, j int) bool {
30+
return sgl[i].ID < sgl[j].ID
31+
})
32+
33+
// Debug log to verify GPU ordering
34+
for idx, gpu := range sgl {
35+
slog.Debug("GPU order after sort", "index", idx, "ID", gpu.ID, "name", gpu.Name, "free", format.HumanBytes2(gpu.FreeMemory))
36+
} if !envconfig.SchedSpread() {
3137
// Try to pack into as few GPUs as possible, starting from 1 GPU
3238
for numGPUs := 1; numGPUs <= len(sgl); numGPUs++ {
3339
gpuSubset := sgl[:numGPUs]

llm/server.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,21 @@ func uniqueDeviceIDs(gpuLayers ml.GPULayersList) []ml.DeviceID {
820820
// - Assigning layers
821821
// - Ensuring that we don't exceed limits, such as requirements about partial offloading or system memory
822822
func (s *ollamaServer) createLayout(systemInfo ml.SystemInfo, systemGPUs []ml.DeviceInfo, memory *ml.BackendMemory, requireFull bool, backoff float32) (ml.GPULayersList, error) {
823+
if s.totalLayers == 0 || s.options.NumGPU == 0 || len(systemGPUs) == 0 || (len(systemGPUs) == 1 && systemGPUs[0].Library == "cpu") {
824+
return ml.GPULayersList{}, nil
825+
}
826+
827+
gpus := append(make([]ml.DeviceInfo, 0, len(systemGPUs)), systemGPUs...)
828+
// Respect Vulkan device enumeration order (ID 0, 1, 2...) instead of reordering by VRAM
829+
sort.Slice(gpus, func(i, j int) bool {
830+
return gpus[i].ID < gpus[j].ID
831+
})
832+
833+
// Debug log to verify GPU ordering
834+
for idx, gpu := range gpus {
835+
slog.Debug("GPU order in createLayout", "index", idx, "ID", gpu.ID, "name", gpu.Name, "free", format.HumanBytes2(gpu.FreeMemory))
836+
}
837+
823838
if memory == nil {
824839
memory = &ml.BackendMemory{CPU: ml.DeviceMemory{
825840
Weights: make([]uint64, s.totalLayers),

0 commit comments

Comments
 (0)