diff --git a/main.go b/main.go index 8772c6faf..35a165ff5 100644 --- a/main.go +++ b/main.go @@ -48,6 +48,11 @@ func main() { llamacpp.ShouldUpdateServerLock.Unlock() } + desiredSeverVersion, ok := os.LookupEnv("LLAMACPP_SERVER_VERSION") + if ok { + llamacpp.SetDesiredServerVersion(desiredSeverVersion) + } + modelManager := models.NewManager( log, models.ClientConfig{ diff --git a/pkg/inference/backends/llamacpp/download.go b/pkg/inference/backends/llamacpp/download.go index 6f6122aab..24b23a4fe 100644 --- a/pkg/inference/backends/llamacpp/download.go +++ b/pkg/inference/backends/llamacpp/download.go @@ -29,10 +29,24 @@ var ( ShouldUseGPUVariantLock sync.Mutex ShouldUpdateServer = true ShouldUpdateServerLock sync.Mutex + DesiredServerVersion = "latest" + DesiredServerVersionLock sync.Mutex errLlamaCppUpToDate = errors.New("bundled llama.cpp version is up to date, no need to update") errLlamaCppUpdateDisabled = errors.New("llama.cpp auto-updated is disabled") ) +func GetDesiredServerVersion() string { + DesiredServerVersionLock.Lock() + defer DesiredServerVersionLock.Unlock() + return DesiredServerVersion +} + +func SetDesiredServerVersion(version string) { + DesiredServerVersionLock.Lock() + defer DesiredServerVersionLock.Unlock() + DesiredServerVersion = version +} + func (l *llamaCpp) downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client, llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string, ) error { diff --git a/pkg/inference/backends/llamacpp/download_darwin.go b/pkg/inference/backends/llamacpp/download_darwin.go index 9925dee15..a1c0c9d29 100644 --- a/pkg/inference/backends/llamacpp/download_darwin.go +++ b/pkg/inference/backends/llamacpp/download_darwin.go @@ -10,7 +10,7 @@ import ( func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client, llamaCppPath, vendoredServerStoragePath string, ) error { - desiredVersion := "latest" + desiredVersion := GetDesiredServerVersion() desiredVariant := "metal" return l.downloadLatestLlamaCpp(ctx, log, httpClient, llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant) diff --git a/pkg/inference/backends/llamacpp/download_windows.go b/pkg/inference/backends/llamacpp/download_windows.go index fd5cb9c28..d1a84b358 100644 --- a/pkg/inference/backends/llamacpp/download_windows.go +++ b/pkg/inference/backends/llamacpp/download_windows.go @@ -33,7 +33,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, } } } - desiredVersion := "latest" + desiredVersion := GetDesiredServerVersion() desiredVariant := "cpu" if canUseCUDA11 { desiredVariant = "cuda"