Skip to content

Commit 89e40da

Browse files
committed
inference: adjust for lack of logger and paths packages
Signed-off-by: Jacob Howard <[email protected]>
1 parent b4e7ff2 commit 89e40da

File tree

10 files changed

+115
-72
lines changed

10 files changed

+115
-72
lines changed

pkg/inference/backends/llamacpp/download.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ import (
1414
"strings"
1515

1616
"github.com/docker/model-runner/pkg/internal/dockerhub"
17-
"github.com/docker/model-runner/pkg/paths"
17+
"github.com/docker/model-runner/pkg/logging"
1818
)
1919

2020
const (
2121
hubNamespace = "docker"
2222
hubRepo = "docker-model-backend-llamacpp"
2323
)
2424

25-
func ensureLatestLlamaCpp(ctx context.Context, httpClient *http.Client, llamaCppPath string) error {
25+
func ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client, llamaCppPath string) error {
2626
url := fmt.Sprintf("https://hub.docker.com/v2/namespaces/%s/repositories/%s/tags", hubNamespace, hubRepo)
2727
resp, err := httpClient.Get(url)
2828
if err != nil {
@@ -74,10 +74,13 @@ func ensureLatestLlamaCpp(ctx context.Context, httpClient *http.Client, llamaCpp
7474
}
7575

7676
image := fmt.Sprintf("registry-1.docker.io/%s/%s@%s", hubNamespace, hubRepo, latest)
77-
downloadDir := paths.DockerHome(".llamacpp-tmp")
77+
downloadDir, err := os.MkdirTemp("", "llamacpp-install")
78+
if err != nil {
79+
return fmt.Errorf("could not create temporary directory: %w", err)
80+
}
7881
defer os.RemoveAll(downloadDir)
7982

80-
if err := extractFromImage(ctx, image, runtime.GOOS, runtime.GOARCH, downloadDir); err != nil {
83+
if err := extractFromImage(ctx, log, image, runtime.GOOS, runtime.GOARCH, downloadDir); err != nil {
8184
return fmt.Errorf("could not extract image: %w", err)
8285
}
8386

@@ -97,7 +100,7 @@ func ensureLatestLlamaCpp(ctx context.Context, httpClient *http.Client, llamaCpp
97100
}
98101

99102
log.Infoln("successfully updated llama.cpp binary")
100-
log.Infoln("running llama.cpp version:", getLlamaCppVersion(llamaCppPath))
103+
log.Infoln("running llama.cpp version:", getLlamaCppVersion(log, llamaCppPath))
101104

102105
if err := os.WriteFile(currentVersionFile, []byte(latest), 0o644); err != nil {
103106
log.Warnf("failed to save llama.cpp version: %v", err)
@@ -106,7 +109,7 @@ func ensureLatestLlamaCpp(ctx context.Context, httpClient *http.Client, llamaCpp
106109
return nil
107110
}
108111

109-
func extractFromImage(ctx context.Context, image, requiredOs, requiredArch, destination string) error {
112+
func extractFromImage(ctx context.Context, log logging.Logger, image, requiredOs, requiredArch, destination string) error {
110113
log.Infof("Extracting image %q to %q", image, destination)
111114
tmpDir, err := os.MkdirTemp("", "docker-tar-extract")
112115
if err != nil {
@@ -119,7 +122,7 @@ func extractFromImage(ctx context.Context, image, requiredOs, requiredArch, dest
119122
return dockerhub.Extract(imageTar, requiredArch, requiredOs, destination)
120123
}
121124

122-
func getLlamaCppVersion(llamaCpp string) string {
125+
func getLlamaCppVersion(log logging.Logger, llamaCpp string) string {
123126
output, err := exec.Command(llamaCpp, "--version").CombinedOutput()
124127
if err != nil {
125128
log.Warnf("could not get llama.cpp version: %v", err)

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,49 @@ import (
1212

1313
"github.com/docker/model-runner/pkg/inference"
1414
"github.com/docker/model-runner/pkg/inference/models"
15-
"github.com/docker/model-runner/pkg/logger"
16-
"github.com/docker/model-runner/pkg/paths"
15+
"github.com/docker/model-runner/pkg/logging"
1716
)
1817

1918
const (
2019
// Name is the backend name.
2120
Name = "llama.cpp"
22-
// componentName is the component name.
23-
componentName = "inference-" + Name
2421
)
2522

26-
var (
27-
// log is the log for the backend service.
28-
log = logger.Default.WithComponent(componentName)
29-
// serveLog is the log for llamaCppProcess
30-
serveLog = logger.MakeFileOnly("", componentName)
31-
)
23+
// VendoredServerStoragePath returns the parent path of the vendored version of
24+
// com.docker.llama-server. It can be overridden during init().
25+
var VendoredServerStoragePath = func() (string, error) {
26+
return ".", nil
27+
}
28+
29+
// UpdatedServerStoragePath returns the parent path of the updated version of
30+
// com.docker.llama-server. It is also where updates will be stored when
31+
// downloaded. It can be overridden during init().
32+
var UpdatedServerStoragePath = func() (string, error) {
33+
return ".", nil
34+
}
3235

3336
// llamaCpp is the llama.cpp-based backend implementation.
3437
type llamaCpp struct {
38+
// log is the associated logger.
39+
log logging.Logger
3540
// modelManager is the shared model manager.
36-
modelManager *models.Manager
41+
modelManager *models.Manager
42+
// serverLog is the logger to use for the llama.cpp server process.
43+
serverLog logging.Logger
3744
updatedLlamaCpp bool
3845
}
3946

4047
// New creates a new llama.cpp-based backend.
41-
func New(modelManager *models.Manager) (inference.Backend, error) {
42-
return &llamaCpp{modelManager: modelManager}, nil
48+
func New(
49+
log logging.Logger,
50+
modelManager *models.Manager,
51+
serverLog logging.Logger,
52+
) (inference.Backend, error) {
53+
return &llamaCpp{
54+
log: log,
55+
modelManager: modelManager,
56+
serverLog: serverLog,
57+
}, nil
4358
}
4459

4560
// Name implements inference.Backend.Name.
@@ -67,9 +82,13 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
6782
// Internet access and an available docker/docker-model-backend-llamacpp:latest-update on Docker Hub are required.
6883
// Even if docker/docker-model-backend-llamacpp:latest-update has been downloaded before, we still require its
6984
// digest to be equal to the one on Docker Hub.
70-
llamaCppPath := paths.DockerHome("bin", "inference", "com.docker.llama-server")
71-
if err := ensureLatestLlamaCpp(ctx, httpClient, llamaCppPath); err != nil {
72-
log.Infof("failed to ensure latest llama.cpp: %v\n", err)
85+
llamaCppStorage, err := UpdatedServerStoragePath()
86+
if err != nil {
87+
return fmt.Errorf("unable to determine llama.cpp path: %w", err)
88+
}
89+
llamaCppPath := filepath.Join(llamaCppStorage, "com.docker.llama-server")
90+
if err := ensureLatestLlamaCpp(ctx, l.log, httpClient, llamaCppPath); err != nil {
91+
l.log.Infof("failed to ensure latest llama.cpp: %v\n", err)
7392
if errors.Is(err, context.Canceled) {
7493
return err
7594
}
@@ -83,21 +102,24 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
83102
// Run implements inference.Backend.Run.
84103
func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference.BackendMode) error {
85104
modelPath, err := l.modelManager.GetModelPath(model)
86-
log.Infof("Model path: %s", modelPath)
105+
l.log.Infof("Model path: %s", modelPath)
87106
if err != nil {
88107
return fmt.Errorf("failed to get model path: %w", err)
89108
}
90109

91110
if err := os.RemoveAll(socket); err != nil {
92-
log.Warnln("failed to remove socket file %s: %w", socket, err)
93-
log.Warnln("llama.cpp may not be able to start")
111+
l.log.Warnln("failed to remove socket file %s: %w", socket, err)
112+
l.log.Warnln("llama.cpp may not be able to start")
94113
}
95114

96-
binPath := paths.DockerHome("bin", "inference")
115+
binPath, err := UpdatedServerStoragePath()
116+
if err != nil {
117+
return fmt.Errorf("unable to determine llama.cpp path: %w", err)
118+
}
97119
if !l.updatedLlamaCpp {
98-
binPath, err = paths.InstallPaths.BinResourcesPath()
120+
binPath, err = VendoredServerStoragePath()
99121
if err != nil {
100-
return fmt.Errorf("failed to get llama.cpp path: %w", err)
122+
return fmt.Errorf("unable to determine vendored llama.cpp path: %w", err)
101123
}
102124
}
103125
llamaCppArgs := []string{"--model", modelPath, "--jinja"}
@@ -117,9 +139,9 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
117139
// port this backend there.
118140
return llamaCppProcess.Process.Signal(os.Interrupt)
119141
}
120-
serveLogStream := serveLog.Writer()
121-
llamaCppProcess.Stdout = serveLogStream
122-
llamaCppProcess.Stderr = serveLogStream
142+
serverLogStream := l.serverLog.Writer()
143+
llamaCppProcess.Stdout = serverLogStream
144+
llamaCppProcess.Stderr = serverLogStream
123145

124146
if err := llamaCppProcess.Start(); err != nil {
125147
return fmt.Errorf("unable to start llama.cpp: %w", err)
@@ -128,7 +150,7 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
128150
llamaCppErrors := make(chan error, 1)
129151
go func() {
130152
llamaCppErr := llamaCppProcess.Wait()
131-
serveLogStream.Close()
153+
serverLogStream.Close()
132154
llamaCppErrors <- llamaCppErr
133155
close(llamaCppErrors)
134156
}()

pkg/inference/backends/mlx/mlx.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,28 @@ import (
77

88
"github.com/docker/model-runner/pkg/inference"
99
"github.com/docker/model-runner/pkg/inference/models"
10-
"github.com/docker/model-runner/pkg/logger"
10+
"github.com/docker/model-runner/pkg/logging"
1111
)
1212

1313
const (
1414
// Name is the backend name.
1515
Name = "mlx"
16-
// componentName is the component name.
17-
componentName = "inference-" + Name
18-
)
19-
20-
var (
21-
// log is the log for the backend service.
22-
log = logger.Default.WithComponent(componentName)
2316
)
2417

2518
// mlx is the MLX-based backend implementation.
2619
type mlx struct {
20+
// log is the associated logger.
21+
log logging.Logger
2722
// modelManager is the shared model manager.
2823
modelManager *models.Manager
2924
}
3025

3126
// New creates a new MLX-based backend.
32-
func New(modelManager *models.Manager) (inference.Backend, error) {
33-
return &mlx{modelManager: modelManager}, nil
27+
func New(log logging.Logger, modelManager *models.Manager) (inference.Backend, error) {
28+
return &mlx{
29+
log: log,
30+
modelManager: modelManager,
31+
}, nil
3432
}
3533

3634
// Name implements inference.Backend.Name.
@@ -53,6 +51,6 @@ func (m *mlx) Install(ctx context.Context, httpClient *http.Client) error {
5351
// Run implements inference.Backend.Run.
5452
func (m *mlx) Run(ctx context.Context, socket, model string, mode inference.BackendMode) error {
5553
// TODO: Implement.
56-
log.Warn("MLX backend is not yet supported")
54+
m.log.Warn("MLX backend is not yet supported")
5755
return errors.New("not implemented")
5856
}

pkg/inference/backends/vllm/vllm.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,28 @@ import (
77

88
"github.com/docker/model-runner/pkg/inference"
99
"github.com/docker/model-runner/pkg/inference/models"
10-
"github.com/docker/model-runner/pkg/logger"
10+
"github.com/docker/model-runner/pkg/logging"
1111
)
1212

1313
const (
1414
// Name is the backend name.
1515
Name = "vllm"
16-
// componentName is the component name.
17-
componentName = "inference-" + Name
18-
)
19-
20-
var (
21-
// log is the log for the backend service.
22-
log = logger.Default.WithComponent(componentName)
2316
)
2417

2518
// vLLM is the vLLM-based backend implementation.
2619
type vLLM struct {
20+
// log is the associated logger.
21+
log logging.Logger
2722
// modelManager is the shared model manager.
2823
modelManager *models.Manager
2924
}
3025

3126
// New creates a new vLLM-based backend.
32-
func New(modelManager *models.Manager) (inference.Backend, error) {
33-
return &vLLM{modelManager: modelManager}, nil
27+
func New(log logging.Logger, modelManager *models.Manager) (inference.Backend, error) {
28+
return &vLLM{
29+
log: log,
30+
modelManager: modelManager,
31+
}, nil
3432
}
3533

3634
// Name implements inference.Backend.Name.
@@ -53,6 +51,6 @@ func (v *vLLM) Install(ctx context.Context, httpClient *http.Client) error {
5351
// Run implements inference.Backend.Run.
5452
func (v *vLLM) Run(ctx context.Context, socket, model string, mode inference.BackendMode) error {
5553
// TODO: Implement.
56-
log.Warn("vLLM backend is not yet supported")
54+
v.log.Warn("vLLM backend is not yet supported")
5755
return errors.New("not implemented")
5856
}

pkg/inference/models/manager.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
"github.com/docker/model-distribution/pkg/distribution"
1212
"github.com/docker/model-distribution/pkg/types"
1313
"github.com/docker/model-runner/pkg/inference"
14-
"github.com/docker/model-runner/pkg/logger"
14+
"github.com/docker/model-runner/pkg/logging"
1515
)
1616

1717
const (
@@ -23,7 +23,7 @@ const (
2323
// Manager manages inference model pulls and storage.
2424
type Manager struct {
2525
// log is the associated logger.
26-
log logger.ComponentLogger
26+
log logging.Logger
2727
// pullTokens is a semaphore used to restrict the maximum number of
2828
// concurrent pull requests.
2929
pullTokens chan struct{}
@@ -34,7 +34,7 @@ type Manager struct {
3434
}
3535

3636
// NewManager creates a new model's manager.
37-
func NewManager(log logger.ComponentLogger, client *distribution.Client) *Manager {
37+
func NewManager(log logging.Logger, client *distribution.Client) *Manager {
3838
// Create the manager.
3939
m := &Manager{
4040
log: log,

pkg/inference/scheduling/installer.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
"sync/atomic"
88

99
"github.com/docker/model-runner/pkg/inference"
10-
"github.com/docker/model-runner/pkg/logger"
10+
"github.com/docker/model-runner/pkg/logging"
1111
)
1212

1313
var (
@@ -35,7 +35,7 @@ type installStatus struct {
3535
// installer drives backend installations.
3636
type installer struct {
3737
// log is the associated logger.
38-
log logger.ComponentLogger
38+
log logging.Logger
3939
// backends are the supported inference backends.
4040
backends map[string]inference.Backend
4141
// httpClient is the HTTP client to use for backend installations.
@@ -48,7 +48,7 @@ type installer struct {
4848

4949
// newInstaller creates a new backend installer.
5050
func newInstaller(
51-
log logger.ComponentLogger,
51+
log logging.Logger,
5252
backends map[string]inference.Backend,
5353
httpClient *http.Client,
5454
) *installer {

pkg/inference/scheduling/loader.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99

1010
"github.com/docker/model-runner/pkg/inference"
1111
"github.com/docker/model-runner/pkg/inference/models"
12-
"github.com/docker/model-runner/pkg/logger"
12+
"github.com/docker/model-runner/pkg/logging"
1313
)
1414

1515
const (
@@ -48,7 +48,7 @@ type runnerKey struct {
4848
// been installed successfully.
4949
type loader struct {
5050
// log is the associated logger.
51-
log logger.ComponentLogger
51+
log logging.Logger
5252
// backends are the supported inference backends.
5353
backends map[string]inference.Backend
5454
// modelManager is the shared model manager.
@@ -86,7 +86,7 @@ type loader struct {
8686

8787
// newLoader creates a new loader.
8888
func newLoader(
89-
log logger.ComponentLogger,
89+
log logging.Logger,
9090
backends map[string]inference.Backend,
9191
modelManager *models.Manager,
9292
) *loader {

0 commit comments

Comments
 (0)