Skip to content

Commit 013af94

Browse files
committed
chore: compute capabilities once
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 2e17edd commit 013af94

File tree

3 files changed

+38
-25
lines changed

3 files changed

+38
-25
lines changed

core/cli/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ type RunCMD struct {
8383
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
8484
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
8585
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
86-
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
86+
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
8787

8888
Version bool
8989
}

pkg/system/capabilities.go

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,8 @@ const (
4545
)
4646

4747
var (
48-
cuda13DirExists bool
49-
cuda12DirExists bool
50-
capabilityLogged bool
48+
cuda13DirExists bool
49+
cuda12DirExists bool
5150
)
5251

5352
func init() {
@@ -72,9 +71,15 @@ func (s *SystemState) Capability(capMap map[string]string) string {
7271
}
7372

7473
func (s *SystemState) getSystemCapabilities() string {
74+
75+
if s.systemCapabilities != "" {
76+
return s.systemCapabilities
77+
}
78+
7579
capability := os.Getenv(capabilityEnv)
7680
if capability != "" {
7781
xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv)
82+
s.systemCapabilities = capability
7883
return capability
7984
}
8085

@@ -88,66 +93,70 @@ func (s *SystemState) getSystemCapabilities() string {
8893
// This might be used by e.g. container images to specify which
8994
// backends to pull in automatically when installing meta backends.
9095
if _, err := os.Stat(capabilityRunFile); err == nil {
91-
if !capabilityLogged {
92-
capability, err := os.ReadFile(capabilityRunFile)
93-
if err == nil {
94-
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
95-
capabilityLogged = true
96-
return strings.Trim(strings.TrimSpace(string(capability)), "\n")
97-
}
96+
capability, err := os.ReadFile(capabilityRunFile)
97+
if err == nil {
98+
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
99+
s.systemCapabilities = strings.Trim(strings.TrimSpace(string(capability)), "\n")
100+
return s.systemCapabilities
98101
}
99102
}
100103

101104
// If we are on mac and arm64, we will return metal
102105
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
103106
xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv)
104-
return metal
107+
s.systemCapabilities = metal
108+
return s.systemCapabilities
105109
}
106110

107111
// If we are on mac and x86, we will return darwin-x86
108112
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
109113
xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv)
110-
return darwinX86
114+
s.systemCapabilities = darwinX86
115+
return s.systemCapabilities
111116
}
112117

113118
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
114119
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
115120
if s.GPUVendor == Nvidia {
116121
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
117122
if cuda13DirExists {
118-
return nvidiaL4TCuda13
123+
s.systemCapabilities = nvidiaL4TCuda13
124+
return s.systemCapabilities
119125
}
120126
if cuda12DirExists {
121-
return nvidiaL4TCuda12
127+
s.systemCapabilities = nvidiaL4TCuda12
128+
return s.systemCapabilities
122129
}
123-
return nvidiaL4T
130+
s.systemCapabilities = nvidiaL4T
131+
return s.systemCapabilities
124132
}
125133
}
126134

127135
if cuda13DirExists {
128-
return nvidiaCuda13
136+
s.systemCapabilities = nvidiaCuda13
137+
return s.systemCapabilities
129138
}
130139

131140
if cuda12DirExists {
132-
return nvidiaCuda12
141+
s.systemCapabilities = nvidiaCuda12
142+
return s.systemCapabilities
133143
}
134144

135145
if s.GPUVendor == "" {
136146
xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv)
137-
return defaultCapability
147+
s.systemCapabilities = defaultCapability
148+
return s.systemCapabilities
138149
}
139150

140-
if !capabilityLogged {
141-
xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv)
142-
capabilityLogged = true
143-
}
144151
// If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env
145152
if s.VRAM <= 4*1024*1024*1024 {
146153
xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv)
147-
return defaultCapability
154+
s.systemCapabilities = defaultCapability
155+
return s.systemCapabilities
148156
}
149157

150-
return s.GPUVendor
158+
s.systemCapabilities = s.GPUVendor
159+
return s.systemCapabilities
151160
}
152161

153162
// BackendPreferenceTokens returns a list of substrings that represent the preferred

pkg/system/state.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ type SystemState struct {
1919
Backend Backend
2020
Model Model
2121
VRAM uint64
22+
23+
systemCapabilities string
2224
}
2325

2426
type SystemStateOptions func(*SystemState)
@@ -53,5 +55,7 @@ func GetSystemState(opts ...SystemStateOptions) (*SystemState, error) {
5355
state.VRAM, _ = xsysinfo.TotalAvailableVRAM()
5456
xlog.Debug("Total available VRAM", "vram", state.VRAM)
5557

58+
state.getSystemCapabilities()
59+
5660
return state, nil
5761
}

0 commit comments

Comments
 (0)