@@ -12,34 +12,49 @@ import (
1212
1313 "github.com/docker/model-runner/pkg/inference"
1414 "github.com/docker/model-runner/pkg/inference/models"
15- "github.com/docker/model-runner/pkg/logger"
16- "github.com/docker/model-runner/pkg/paths"
15+ "github.com/docker/model-runner/pkg/logging"
1716)
1817
1918const (
2019 // Name is the backend name.
2120 Name = "llama.cpp"
22- // componentName is the component name.
23- componentName = "inference-" + Name
2421)
2522
26- var (
27- // log is the log for the backend service.
28- log = logger .Default .WithComponent (componentName )
29- // serveLog is the log for llamaCppProcess
30- serveLog = logger .MakeFileOnly ("" , componentName )
31- )
23+ // VendoredServerStoragePath returns the parent path of the vendored version of
24+ // com.docker.llama-server. It can be overridden during init().
25+ var VendoredServerStoragePath = func () (string , error ) {
26+ return "." , nil
27+ }
28+
29+ // UpdatedServerStoragePath returns the parent path of the updated version of
30+ // com.docker.llama-server. It is also where updates will be stored when
31+ // downloaded. It can be overridden during init().
32+ var UpdatedServerStoragePath = func () (string , error ) {
33+ return "." , nil
34+ }
3235
3336// llamaCpp is the llama.cpp-based backend implementation.
3437type llamaCpp struct {
38+ // log is the associated logger.
39+ log logging.Logger
3540 // modelManager is the shared model manager.
36- modelManager * models.Manager
41+ modelManager * models.Manager
42+ // serverLog is the logger to use for the llama.cpp server process.
43+ serverLog logging.Logger
3744 updatedLlamaCpp bool
3845}
3946
4047// New creates a new llama.cpp-based backend.
41- func New (modelManager * models.Manager ) (inference.Backend , error ) {
42- return & llamaCpp {modelManager : modelManager }, nil
48+ func New (
49+ log logging.Logger ,
50+ modelManager * models.Manager ,
51+ serverLog logging.Logger ,
52+ ) (inference.Backend , error ) {
53+ return & llamaCpp {
54+ log : log ,
55+ modelManager : modelManager ,
56+ serverLog : serverLog ,
57+ }, nil
4358}
4459
4560// Name implements inference.Backend.Name.
@@ -67,9 +82,13 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
6782 // Internet access and an available docker/docker-model-backend-llamacpp:latest-update on Docker Hub are required.
6883 // Even if docker/docker-model-backend-llamacpp:latest-update has been downloaded before, we still require its
6984 // digest to be equal to the one on Docker Hub.
70- llamaCppPath := paths .DockerHome ("bin" , "inference" , "com.docker.llama-server" )
71- if err := ensureLatestLlamaCpp (ctx , httpClient , llamaCppPath ); err != nil {
72- log .Infof ("failed to ensure latest llama.cpp: %v\n " , err )
85+ llamaCppStorage , err := UpdatedServerStoragePath ()
86+ if err != nil {
87+ return fmt .Errorf ("unable to determine llama.cpp path: %w" , err )
88+ }
89+ llamaCppPath := filepath .Join (llamaCppStorage , "com.docker.llama-server" )
90+ if err := ensureLatestLlamaCpp (ctx , l .log , httpClient , llamaCppPath ); err != nil {
91+ l .log .Infof ("failed to ensure latest llama.cpp: %v\n " , err )
7392 if errors .Is (err , context .Canceled ) {
7493 return err
7594 }
@@ -83,21 +102,24 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
83102// Run implements inference.Backend.Run.
84103func (l * llamaCpp ) Run (ctx context.Context , socket , model string , mode inference.BackendMode ) error {
85104 modelPath , err := l .modelManager .GetModelPath (model )
86- log .Infof ("Model path: %s" , modelPath )
105+ l . log .Infof ("Model path: %s" , modelPath )
87106 if err != nil {
88107 return fmt .Errorf ("failed to get model path: %w" , err )
89108 }
90109
91110 if err := os .RemoveAll (socket ); err != nil {
92- log .Warnln ("failed to remove socket file %s: %w" , socket , err )
93- log .Warnln ("llama.cpp may not be able to start" )
111+ l . log .Warnln ("failed to remove socket file %s: %w" , socket , err )
112+ l . log .Warnln ("llama.cpp may not be able to start" )
94113 }
95114
96- binPath := paths .DockerHome ("bin" , "inference" )
115+ binPath , err := UpdatedServerStoragePath ()
116+ if err != nil {
117+ return fmt .Errorf ("unable to determine llama.cpp path: %w" , err )
118+ }
97119 if ! l .updatedLlamaCpp {
98- binPath , err = paths . InstallPaths . BinResourcesPath ()
120+ binPath , err = VendoredServerStoragePath ()
99121 if err != nil {
100- return fmt .Errorf ("failed to get llama.cpp path: %w" , err )
122+ return fmt .Errorf ("unable to determine vendored llama.cpp path: %w" , err )
101123 }
102124 }
103125 llamaCppArgs := []string {"--model" , modelPath , "--jinja" }
@@ -117,9 +139,9 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
117139 // port this backend there.
118140 return llamaCppProcess .Process .Signal (os .Interrupt )
119141 }
120- serveLogStream := serveLog .Writer ()
121- llamaCppProcess .Stdout = serveLogStream
122- llamaCppProcess .Stderr = serveLogStream
142+ serverLogStream := l . serverLog .Writer ()
143+ llamaCppProcess .Stdout = serverLogStream
144+ llamaCppProcess .Stderr = serverLogStream
123145
124146 if err := llamaCppProcess .Start (); err != nil {
125147 return fmt .Errorf ("unable to start llama.cpp: %w" , err )
@@ -128,7 +150,7 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
128150 llamaCppErrors := make (chan error , 1 )
129151 go func () {
130152 llamaCppErr := llamaCppProcess .Wait ()
131- serveLogStream .Close ()
153+ serverLogStream .Close ()
132154 llamaCppErrors <- llamaCppErr
133155 close (llamaCppErrors )
134156 }()
0 commit comments