Skip to content

Commit 25eefbf

Browse files
authored
Revert "configure backend args (#41)" (#50)
This reverts commit e59c062.
1 parent de76074 commit 25eefbf

File tree

7 files changed

+29
-439
lines changed

7 files changed

+29
-439
lines changed

Makefile

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ BASE_IMAGE := ubuntu:24.04
77
DOCKER_IMAGE := docker/model-runner:latest
88
PORT := 8080
99
MODELS_PATH := $(shell pwd)/models
10-
LLAMA_ARGS ?=
1110

1211
# Main targets
1312
.PHONY: build run clean test docker-build docker-run help
@@ -21,7 +20,6 @@ build:
2120

2221
# Run the application locally
2322
run: build
24-
LLAMA_ARGS="$(LLAMA_ARGS)" \
2523
./$(APP_NAME)
2624

2725
# Clean build artifacts
@@ -57,7 +55,6 @@ docker-run: docker-build
5755
-e MODEL_RUNNER_PORT=$(PORT) \
5856
-e LLAMA_SERVER_PATH=/app/bin \
5957
-e MODELS_PATH=/models \
60-
-e LLAMA_ARGS="$(LLAMA_ARGS)" \
6158
$(DOCKER_IMAGE)
6259

6360
# Show help
@@ -70,10 +67,3 @@ help:
7067
@echo " docker-build - Build Docker image"
7168
@echo " docker-run - Run in Docker container with TCP port access and mounted model storage"
7269
@echo " help - Show this help message"
73-
@echo ""
74-
@echo "Backend configuration options:"
75-
@echo " LLAMA_ARGS - Arguments for llama.cpp (e.g., \"--verbose --jinja -ngl 100 --ctx-size 2048\")"
76-
@echo ""
77-
@echo "Example usage:"
78-
@echo " make run LLAMA_ARGS=\"--verbose --jinja -ngl 100 --ctx-size 2048\""
79-
@echo " make docker-run LLAMA_ARGS=\"--verbose --jinja -ngl 100 --threads 4 --ctx-size 2048\""

main.go

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,10 @@ import (
77
"os"
88
"os/signal"
99
"path/filepath"
10-
"strings"
1110
"syscall"
1211

1312
"github.com/docker/model-runner/pkg/inference"
1413
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
15-
"github.com/docker/model-runner/pkg/inference/config"
1614
"github.com/docker/model-runner/pkg/inference/models"
1715
"github.com/docker/model-runner/pkg/inference/scheduling"
1816
"github.com/docker/model-runner/pkg/routing"
@@ -52,9 +50,6 @@ func main() {
5250

5351
log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)
5452

55-
// Create llama.cpp configuration from environment variables
56-
llamaCppConfig := createLlamaCppConfigFromEnv()
57-
5853
llamaCppBackend, err := llamacpp.New(
5954
log,
6055
modelManager,
@@ -66,7 +61,6 @@ func main() {
6661
_ = os.MkdirAll(d, 0o755)
6762
return d
6863
}(),
69-
llamaCppConfig,
7064
)
7165
if err != nil {
7266
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
@@ -140,59 +134,3 @@ func main() {
140134
}
141135
log.Infoln("Docker Model Runner stopped")
142136
}
143-
144-
// createLlamaCppConfigFromEnv creates a LlamaCppConfig from environment variables
145-
func createLlamaCppConfigFromEnv() config.BackendConfig {
146-
// Check if any configuration environment variables are set
147-
argsStr := os.Getenv("LLAMA_ARGS")
148-
149-
// If no environment variables are set, use default configuration
150-
if argsStr == "" {
151-
return nil // nil will cause the backend to use its default configuration
152-
}
153-
154-
// Split the string by spaces, respecting quoted arguments
155-
args := splitArgs(argsStr)
156-
157-
// Check for disallowed arguments
158-
disallowedArgs := []string{"--model", "--host", "--embeddings", "--mmproj"}
159-
for _, arg := range args {
160-
for _, disallowed := range disallowedArgs {
161-
if arg == disallowed {
162-
log.Fatalf("LLAMA_ARGS cannot override the %s argument as it is controlled by the model runner", disallowed)
163-
}
164-
}
165-
}
166-
167-
log.Infof("Using custom arguments: %v", args)
168-
return &llamacpp.Config{
169-
Args: args,
170-
}
171-
}
172-
173-
// splitArgs splits a string into arguments, respecting quoted arguments
174-
func splitArgs(s string) []string {
175-
var args []string
176-
var currentArg strings.Builder
177-
inQuotes := false
178-
179-
for _, r := range s {
180-
switch {
181-
case r == '"' || r == '\'':
182-
inQuotes = !inQuotes
183-
case r == ' ' && !inQuotes:
184-
if currentArg.Len() > 0 {
185-
args = append(args, currentArg.String())
186-
currentArg.Reset()
187-
}
188-
default:
189-
currentArg.WriteRune(r)
190-
}
191-
}
192-
193-
if currentArg.Len() > 0 {
194-
args = append(args, currentArg.String())
195-
}
196-
197-
return args
198-
}

main_test.go

Lines changed: 0 additions & 108 deletions
This file was deleted.

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ import (
1010
"os/exec"
1111
"path/filepath"
1212
"runtime"
13+
"strconv"
1314

1415
"github.com/docker/model-runner/pkg/diskusage"
1516
"github.com/docker/model-runner/pkg/inference"
16-
"github.com/docker/model-runner/pkg/inference/config"
1717
"github.com/docker/model-runner/pkg/inference/models"
1818
"github.com/docker/model-runner/pkg/logging"
1919
)
@@ -39,8 +39,6 @@ type llamaCpp struct {
3939
updatedServerStoragePath string
4040
// status is the state in which the llama.cpp backend is in.
4141
status string
42-
// config is the configuration for the llama.cpp backend.
43-
config config.BackendConfig
4442
}
4543

4644
// New creates a new llama.cpp-based backend.
@@ -50,20 +48,13 @@ func New(
5048
serverLog logging.Logger,
5149
vendoredServerStoragePath string,
5250
updatedServerStoragePath string,
53-
conf config.BackendConfig,
5451
) (inference.Backend, error) {
55-
// If no config is provided, use the default configuration
56-
if conf == nil {
57-
conf = NewDefaultLlamaCppConfig()
58-
}
59-
6052
return &llamaCpp{
6153
log: log,
6254
modelManager: modelManager,
6355
serverLog: serverLog,
6456
vendoredServerStoragePath: vendoredServerStoragePath,
6557
updatedServerStoragePath: updatedServerStoragePath,
66-
config: conf,
6758
}, nil
6859
}
6960

@@ -124,6 +115,11 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
124115
return fmt.Errorf("failed to get model path: %w", err)
125116
}
126117

118+
modelDesc, err := l.modelManager.GetModel(model)
119+
if err != nil {
120+
return fmt.Errorf("failed to get model: %w", err)
121+
}
122+
127123
if err := os.RemoveAll(socket); err != nil && !errors.Is(err, fs.ErrNotExist) {
128124
l.log.Warnf("failed to remove socket file %s: %w\n", socket, err)
129125
l.log.Warnln("llama.cpp may not be able to start")
@@ -133,13 +129,32 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
133129
if l.updatedLlamaCpp {
134130
binPath = l.updatedServerStoragePath
135131
}
136-
137-
args := l.config.GetArgs(modelPath, socket, mode)
138-
l.log.Infof("llamaCppArgs: %v", args)
132+
llamaCppArgs := []string{"--model", modelPath, "--jinja", "--host", socket}
133+
if mode == inference.BackendModeEmbedding {
134+
llamaCppArgs = append(llamaCppArgs, "--embeddings")
135+
}
136+
if runtime.GOOS == "windows" && runtime.GOARCH == "arm64" {
137+
// Using a thread count equal to core count results in bad performance, and there seems to be little to no gain
138+
// in going beyond core_count/2.
139+
// TODO(p1-0tr): dig into why the defaults don't work well on windows/arm64
140+
nThreads := min(2, runtime.NumCPU()/2)
141+
llamaCppArgs = append(llamaCppArgs, "--threads", strconv.Itoa(nThreads))
142+
143+
modelConfig, err := modelDesc.Config()
144+
if err != nil {
145+
return fmt.Errorf("failed to get model config: %w", err)
146+
}
147+
// The Adreno OpenCL implementation currently only supports Q4_0
148+
if modelConfig.Quantization == "Q4_0" {
149+
llamaCppArgs = append(llamaCppArgs, "-ngl", "100")
150+
}
151+
} else {
152+
llamaCppArgs = append(llamaCppArgs, "-ngl", "100")
153+
}
139154
llamaCppProcess := exec.CommandContext(
140155
ctx,
141156
filepath.Join(binPath, "com.docker.llama-server"),
142-
args...,
157+
llamaCppArgs...,
143158
)
144159
llamaCppProcess.Cancel = func() error {
145160
if runtime.GOOS == "windows" {

pkg/inference/backends/llamacpp/llamacpp_config.go

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)