Skip to content

Commit 3258a89

Browse files
authored
DRY out the runner lifecycle code (ollama#12540)
* DRY out the runner lifecycle code Now that discovery uses the runners as well, this unifies the runner spawning code into a single place. This also unifies GPU discovery types with the newer ml.DeviceInfo * win: make incremental builds better Place build artifacts in discrete directories so incremental builds don't have to start fresh * Adjust sort order to consider iGPUs * handle cpu inference oom scenarios * review comments
1 parent 1c093e9 commit 3258a89

16 files changed

+705
-909
lines changed

discover/cpu_linux_test.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2065,12 +2065,6 @@ power management:
20652065
cpus := linuxCPUDetails(buf)
20662066

20672067
slog.Info("example", "scenario", k, "cpus", cpus)
2068-
si := SystemInfo{
2069-
System: CPUInfo{
2070-
CPUs: cpus,
2071-
},
2072-
}
2073-
threadCount := si.GetOptimalThreadCount()
20742068
if len(v.expCPUs) != len(cpus) {
20752069
t.Fatalf("incorrect number of sockets: expected:%v got:%v", v.expCPUs, cpus)
20762070
}
@@ -2085,10 +2079,6 @@ power management:
20852079
t.Fatalf("incorrect number of threads: expected:%v got:%v", v.expCPUs[i], c)
20862080
}
20872081
}
2088-
2089-
if threadCount != v.expThreadCount {
2090-
t.Fatalf("incorrect thread count expected:%d got:%d", v.expThreadCount, threadCount)
2091-
}
20922082
})
20932083
}
20942084
}

discover/gpu.go

Lines changed: 15 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -1,176 +1,42 @@
11
package discover
22

33
import (
4-
"context"
54
"log/slog"
65
"os"
7-
"path/filepath"
86
"regexp"
97
"runtime"
108
"strconv"
119
"strings"
1210

13-
"github.com/ollama/ollama/format"
1411
"github.com/ollama/ollama/ml"
1512
)
1613

1714
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
1815
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
1916
var CudaTegra string = os.Getenv("JETSON_JETPACK")
2017

21-
func GetCPUInfo() GpuInfo {
22-
mem, err := GetCPUMem()
18+
// GetSystemInfo returns the last cached state of the GPUs on the system
19+
func GetSystemInfo() ml.SystemInfo {
20+
memInfo, err := GetCPUMem()
2321
if err != nil {
2422
slog.Warn("error looking up system memory", "error", err)
2523
}
26-
27-
return GpuInfo{
28-
memInfo: mem,
29-
DeviceID: ml.DeviceID{
30-
Library: "cpu",
31-
ID: "0",
32-
},
33-
}
34-
}
35-
36-
func GetGPUInfo(ctx context.Context, runners []FilteredRunnerDiscovery) GpuInfoList {
37-
devs := GPUDevices(ctx, runners)
38-
return devInfoToInfoList(devs)
39-
}
40-
41-
func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
42-
resp := []GpuInfo{}
43-
// Our current packaging model places ggml-hip in the main directory
44-
// but keeps rocm in an isolated directory. We have to add it to
45-
// the [LD_LIBRARY_]PATH so ggml-hip will load properly
46-
rocmDir := filepath.Join(LibOllamaPath, "rocm")
47-
if _, err := os.Stat(rocmDir); err != nil {
48-
rocmDir = ""
49-
}
50-
51-
for _, dev := range devs {
52-
info := GpuInfo{
53-
DeviceID: dev.DeviceID,
54-
filterID: dev.FilteredID,
55-
Name: dev.Description,
56-
memInfo: memInfo{
57-
TotalMemory: dev.TotalMemory,
58-
FreeMemory: dev.FreeMemory,
59-
},
60-
// TODO can we avoid variant
61-
DependencyPath: dev.LibraryPath,
62-
DriverMajor: dev.DriverMajor,
63-
DriverMinor: dev.DriverMinor,
64-
ComputeMajor: dev.ComputeMajor,
65-
ComputeMinor: dev.ComputeMinor,
66-
}
67-
if dev.Library == "CUDA" || dev.Library == "ROCm" {
68-
info.MinimumMemory = 457 * format.MebiByte
69-
}
70-
if dev.Library == "ROCm" && rocmDir != "" {
71-
info.DependencyPath = append(info.DependencyPath, rocmDir)
72-
}
73-
// TODO any special processing of Vulkan devices?
74-
resp = append(resp, info)
75-
}
76-
if len(resp) == 0 {
77-
mem, err := GetCPUMem()
78-
if err != nil {
79-
slog.Warn("error looking up system memory", "error", err)
80-
}
81-
82-
resp = append(resp, GpuInfo{
83-
memInfo: mem,
84-
DeviceID: ml.DeviceID{
85-
Library: "cpu",
86-
ID: "0",
87-
},
88-
})
24+
var threadCount int
25+
cpus := GetCPUDetails()
26+
for _, c := range cpus {
27+
threadCount += c.CoreCount - c.EfficiencyCoreCount
8928
}
90-
return resp
91-
}
9229

93-
// Given the list of GPUs this instantiation is targeted for,
94-
// figure out the visible devices environment variable
95-
//
96-
// If different libraries are detected, the first one is what we use
97-
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
98-
if len(l) == 0 {
99-
return nil
100-
}
101-
res := []string{}
102-
envVar := rocmGetVisibleDevicesEnv(l)
103-
if envVar != "" {
104-
res = append(res, envVar)
105-
}
106-
envVar = vkGetVisibleDevicesEnv(l)
107-
if envVar != "" {
108-
res = append(res, envVar)
109-
}
110-
return res
111-
}
112-
113-
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
114-
ids := []string{}
115-
for _, info := range gpuInfo {
116-
if info.Library != "ROCm" {
117-
continue
118-
}
119-
// If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number
120-
if info.filterID != "" {
121-
ids = append(ids, info.filterID)
122-
} else {
123-
ids = append(ids, info.ID)
124-
}
125-
}
126-
if len(ids) == 0 {
127-
return ""
128-
}
129-
envVar := "ROCR_VISIBLE_DEVICES="
130-
if runtime.GOOS != "linux" {
131-
envVar = "HIP_VISIBLE_DEVICES="
132-
}
133-
// There are 3 potential env vars to use to select GPUs.
134-
// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
135-
// HIP_VISIBLE_DEVICES supports numeric IDs only
136-
// GPU_DEVICE_ORDINAL supports numeric IDs only
137-
return envVar + strings.Join(ids, ",")
138-
}
139-
140-
func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
141-
ids := []string{}
142-
for _, info := range gpuInfo {
143-
if info.Library != "Vulkan" {
144-
continue
145-
}
146-
if info.filterID != "" {
147-
ids = append(ids, info.filterID)
148-
} else {
149-
ids = append(ids, info.ID)
150-
}
151-
}
152-
if len(ids) == 0 {
153-
return ""
154-
}
155-
envVar := "GGML_VK_VISIBLE_DEVICES="
156-
return envVar + strings.Join(ids, ",")
157-
}
158-
159-
// GetSystemInfo returns the last cached state of the GPUs on the system
160-
func GetSystemInfo() SystemInfo {
161-
deviceMu.Lock()
162-
defer deviceMu.Unlock()
163-
gpus := devInfoToInfoList(devices)
164-
if len(gpus) == 1 && gpus[0].Library == "cpu" {
165-
gpus = []GpuInfo{}
30+
if threadCount == 0 {
31+
// Fall back to Go's num CPU
32+
threadCount = runtime.NumCPU()
16633
}
16734

168-
return SystemInfo{
169-
System: CPUInfo{
170-
CPUs: GetCPUDetails(),
171-
GpuInfo: GetCPUInfo(),
172-
},
173-
GPUs: gpus,
35+
return ml.SystemInfo{
36+
ThreadCount: threadCount,
37+
TotalMemory: memInfo.TotalMemory,
38+
FreeMemory: memInfo.FreeMemory,
39+
FreeSwap: memInfo.FreeSwap,
17440
}
17541
}
17642

0 commit comments

Comments
 (0)