Skip to content

Commit f17e7a3

Browse files
author
Piotr Stankiewicz
committed
Allow specifying a target llama-server version
Signed-off-by: Piotr Stankiewicz <[email protected]>
1 parent e3916bc commit f17e7a3

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ func main() {
4848
llamacpp.ShouldUpdateServerLock.Unlock()
4949
}
5050

51+
desiredSeverVersion, ok := os.LookupEnv("LLAMACPP_SERVER_VERSION")
52+
if ok {
53+
llamacpp.SetDesiredServerVersion(desiredSeverVersion)
54+
}
55+
5156
modelManager := models.NewManager(
5257
log,
5358
models.ClientConfig{

pkg/inference/backends/llamacpp/download.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,24 @@ var (
2929
ShouldUseGPUVariantLock sync.Mutex
3030
ShouldUpdateServer = true
3131
ShouldUpdateServerLock sync.Mutex
32+
DesiredServerVersion = "latest"
33+
DesiredServerVersionLock sync.Mutex
3234
errLlamaCppUpToDate = errors.New("bundled llama.cpp version is up to date, no need to update")
3335
errLlamaCppUpdateDisabled = errors.New("llama.cpp auto-updated is disabled")
3436
)
3537

38+
func GetDesiredServerVersion() string {
39+
DesiredServerVersionLock.Lock()
40+
defer DesiredServerVersionLock.Unlock()
41+
return DesiredServerVersion
42+
}
43+
44+
func SetDesiredServerVersion(version string) {
45+
DesiredServerVersionLock.Lock()
46+
defer DesiredServerVersionLock.Unlock()
47+
DesiredServerVersion = version
48+
}
49+
3650
func (l *llamaCpp) downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
3751
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
3852
) error {

pkg/inference/backends/llamacpp/download_darwin.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
1111
llamaCppPath, vendoredServerStoragePath string,
1212
) error {
13-
desiredVersion := "latest"
13+
desiredVersion := GetDesiredServerVersion()
1414
desiredVariant := "metal"
1515
return l.downloadLatestLlamaCpp(ctx, log, httpClient, llamaCppPath, vendoredServerStoragePath, desiredVersion,
1616
desiredVariant)

pkg/inference/backends/llamacpp/download_windows.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
3333
}
3434
}
3535
}
36-
desiredVersion := "latest"
36+
desiredVersion := GetDesiredServerVersion()
3737
desiredVariant := "cpu"
3838
if canUseCUDA11 {
3939
desiredVariant = "cuda"

0 commit comments

Comments
 (0)