Fix: GPU selection respeta orden Vulkan, overflow Intel solo si es necesario. Smart build siempre verbose. Pruebas y logs actualizados.

iosub · iosub · commit 15eab624f8e2 · 2025-10-25T17:44:17.000-05:00
modified:   Z_Iosu/.build_state.json
	modified:   llm/memory.go
	modified:   llm/server.go
diff --git a/Z_Iosu/.build_state.json b/Z_Iosu/.build_state.json
@@ -1,4 +1,5 @@
 {
+<<<<<<< HEAD
     "LastBuild":  "2025-10-21T00:32:22.7635276-05:00",
     "Components":  {
                        "App":  {
@@ -19,6 +20,28 @@
                                },
                        "Vulkan":  {
                                       "LastModified":  "2025-10-20T21:11:18.1539263-05:00",
+=======
+    "LastBuild":  "2025-10-20T16:12:39.4148936-05:00",
+    "Components":  {
+                       "App":  {
+                                   "LastModified":  "2025-10-18T10:00:22.5534265-05:00",
+                                   "FileCount":  57
+                               },
+                       "CUDA":  {
+                                    "LastModified":  "2025-10-18T09:59:44.7143808-05:00",
+                                    "FileCount":  214
+                                },
+                       "Ollama":  {
+                                      "LastModified":  "2025-10-20T16:10:39.1356998-05:00",
+                                      "FileCount":  446
+                                  },
+                       "CPU":  {
+                                   "LastModified":  "2025-10-20T15:42:39.5692009-05:00",
+                                   "FileCount":  222
+                               },
+                       "Vulkan":  {
+                                      "LastModified":  "2025-10-20T15:42:39.5692009-05:00",
+>>>>>>> 1f279e404 (Fix: GPU selection respeta orden Vulkan, overflow Intel solo si es necesario. Smart build siempre verbose. Pruebas y logs actualizados.)
                                       "FileCount":  121
                                   }
                    }
diff --git a/llm/memory.go b/llm/memory.go
@@ -22,12 +22,18 @@ func pickBestFullFitByLibrary(f *ggml.GGML, modelPath string, projectors []strin
 	for _, gl := range ml.ByLibrary(gpus) {
 		sgl := append(make([]ml.DeviceInfo, 0, len(gl)), gl...)
 
-		// TODO - potentially sort by performance capability, existing models loaded, etc.
-		// TODO - Eliminate any GPUs that already have envconfig.MaxRunners loaded on them
-		// Note: at present, this will favor most current available VRAM descending and ignoring faster GPU speed in mixed setups
-		sort.Sort(sort.Reverse(ml.ByFreeMemory(sgl)))
-
-		if !envconfig.SchedSpread() {
+	// TODO - potentially sort by performance capability, existing models loaded, etc.
+	// TODO - Eliminate any GPUs that already have envconfig.MaxRunners loaded on them
+	// Respect Vulkan device enumeration order (ID 0, 1, 2...) instead of reordering by VRAM
+	// This allows VkConfig and driver-level device ordering to be honored
+	sort.Slice(sgl, func(i, j int) bool {
+		return sgl[i].ID < sgl[j].ID
+	})
+	
+	// Debug log to verify GPU ordering
+	for idx, gpu := range sgl {
+		slog.Debug("GPU order after sort", "index", idx, "ID", gpu.ID, "name", gpu.Name, "free", format.HumanBytes2(gpu.FreeMemory))
+	}		if !envconfig.SchedSpread() {
 			// Try to pack into as few GPUs as possible, starting from 1 GPU
 			for numGPUs := 1; numGPUs <= len(sgl); numGPUs++ {
 				gpuSubset := sgl[:numGPUs]
diff --git a/llm/server.go b/llm/server.go
@@ -820,6 +820,21 @@ func uniqueDeviceIDs(gpuLayers ml.GPULayersList) []ml.DeviceID {
 // - Assigning layers
 // - Ensuring that we don't exceed limits, such as requirements about partial offloading or system memory
 func (s *ollamaServer) createLayout(systemInfo ml.SystemInfo, systemGPUs []ml.DeviceInfo, memory *ml.BackendMemory, requireFull bool, backoff float32) (ml.GPULayersList, error) {
+	if s.totalLayers == 0 || s.options.NumGPU == 0 || len(systemGPUs) == 0 || (len(systemGPUs) == 1 && systemGPUs[0].Library == "cpu") {
+		return ml.GPULayersList{}, nil
+	}
+
+	gpus := append(make([]ml.DeviceInfo, 0, len(systemGPUs)), systemGPUs...)
+	// Respect Vulkan device enumeration order (ID 0, 1, 2...) instead of reordering by VRAM
+	sort.Slice(gpus, func(i, j int) bool {
+		return gpus[i].ID < gpus[j].ID
+	})
+	
+	// Debug log to verify GPU ordering
+	for idx, gpu := range gpus {
+		slog.Debug("GPU order in createLayout", "index", idx, "ID", gpu.ID, "name", gpu.Name, "free", format.HumanBytes2(gpu.FreeMemory))
+	}
+
 	if memory == nil {
 		memory = &ml.BackendMemory{CPU: ml.DeviceMemory{
 			Weights: make([]uint64, s.totalLayers),