diff --git a/README.md b/README.md index b2bfc79e8..27f7a1cf3 100644 --- a/README.md +++ b/README.md @@ -319,6 +319,45 @@ docker buildx build \ The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version. +### OVMS integration + +Docker Model Runner can also run an OVMS backend. + +- Default OVMS binary: resolved from `PATH` (looks up `ovms`) +- Override binary path with: `OVMS_SERVER_PATH` + +OVMS can be installed based on this [guide](https://docs.openvino.ai/2026/model-server/ovms_docs_deploying_server_baremetal.html). Minimal version is 2026.2. + +When the runner starts, it will try to initialize OVMS as an available backend. + +```sh +MODEL_RUNNER_PORT=13434 ./model-runner +``` + +Create a new model +Use models from HugginFace Hub using repository with OpenVINO format. +```sh +curl http://localhost:13434/models/create -X POST -d '{"from": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov"}' +``` + +You can target OVMS explicitly through the backend-prefixed OpenAI-compatible routes: +```sh +# List models exposed via OVMS backend routing +curl http://localhost:13434/engines/ovms/v1/models + +# Example chat/completions call through OVMS backend routing +curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{ + "model": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov", + "messages": [ + {"role": "user", "content": "Hello from OVMS"} + ] +}' +``` +Delete model +```sh +curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE +``` + ## API Examples The Model Runner exposes a REST API that can be accessed via TCP port. You can interact with it using curl commands. @@ -335,7 +374,7 @@ curl http://localhost:8080/models curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}' # Get information about a specific model -curl http://localhost:8080/models/ai/smollm2 +curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov # Chat with a model curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{ @@ -347,12 +386,11 @@ curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{ }' # Delete a model -curl http://localhost:8080/models/ai/smollm2 -X DELETE +curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE # Get metrics curl http://localhost:8080/metrics ``` - The response will contain the model's reply: ```json diff --git a/pkg/distribution/builder/from_directory.go b/pkg/distribution/builder/from_directory.go index 68b7818ce..f399c7991 100644 --- a/pkg/distribution/builder/from_directory.go +++ b/pkg/distribution/builder/from_directory.go @@ -34,6 +34,10 @@ type DirectoryOptions struct { // This is useful for producing deterministic OCI digests. Created *time.Time + // AllowNoWeightFiles allows packaging a directory even when it contains no + // GGUF/SafeTensors/DDUF weight files. + AllowNoWeightFiles bool + // Format is the output artifact format. Defaults to BuildFormatDocker. Format BuildFormat } @@ -66,6 +70,15 @@ func WithCreatedTime(t time.Time) DirectoryOption { } } +// WithAllowNoWeightFiles allows FromDirectory to succeed even when no standard +// model weight files are present. This is used for formats such as OpenVINO IR +// where model files are represented differently (for example .xml + .bin pairs). +func WithAllowNoWeightFiles() DirectoryOption { + return func(opts *DirectoryOptions) { + opts.AllowNoWeightFiles = true + } +} + // WithOutputFormat sets the output artifact format for the directory builder. // Defaults to BuildFormatDocker if not specified. // This is the DirectoryOption equivalent of WithFormat (BuildOption). @@ -208,7 +221,7 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) { return nil, fmt.Errorf("no files found in directory: %s", dirPath) } - if len(weightFiles) == 0 { + if len(weightFiles) == 0 && !options.AllowNoWeightFiles { return nil, fmt.Errorf("no weight files (safetensors, GGUF, or DDUF) found in directory: %s", dirPath) } diff --git a/pkg/distribution/builder/from_directory_test.go b/pkg/distribution/builder/from_directory_test.go index 1fc1145aa..7dca73a99 100644 --- a/pkg/distribution/builder/from_directory_test.go +++ b/pkg/distribution/builder/from_directory_test.go @@ -3,6 +3,7 @@ package builder import ( "os" "path/filepath" + "strings" "testing" "time" ) @@ -170,6 +171,44 @@ func TestFromDirectoryWithExclusions(t *testing.T) { } } +func TestFromDirectoryNoStandardWeights(t *testing.T) { + tmpDir := t.TempDir() + createTestFile(t, tmpDir, "openvino/model.xml", "") + createTestFile(t, tmpDir, "openvino/model.bin", "weights") + createTestFile(t, tmpDir, "openvino/config.json", "{}") + + _, err := FromDirectory(tmpDir) + if err == nil { + t.Fatal("expected error when directory has no GGUF/SafeTensors/DDUF weights") + } + + if got := err.Error(); got == "" || !strings.Contains(got, "no weight files") { + t.Fatalf("expected no weight files error, got %q", got) + } +} + +func TestFromDirectoryAllowNoWeightFiles(t *testing.T) { + tmpDir := t.TempDir() + createTestFile(t, tmpDir, "openvino/model.xml", "") + createTestFile(t, tmpDir, "openvino/model.bin", "weights") + createTestFile(t, tmpDir, "openvino/config.json", "{}") + + b, err := FromDirectory(tmpDir, WithAllowNoWeightFiles()) + if err != nil { + t.Fatalf("FromDirectory with WithAllowNoWeightFiles failed: %v", err) + } + + mdl := b.Model() + layers, err := mdl.Layers() + if err != nil { + t.Fatalf("Failed to get layers: %v", err) + } + + if len(layers) != 3 { + t.Errorf("Expected 3 layers, got %d", len(layers)) + } +} + func TestShouldExclude(t *testing.T) { tests := []struct { name string diff --git a/pkg/distribution/huggingface/model.go b/pkg/distribution/huggingface/model.go index 529f3ae9f..a8a64dd2e 100644 --- a/pkg/distribution/huggingface/model.go +++ b/pkg/distribution/huggingface/model.go @@ -31,8 +31,9 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string, // Filter to model files (weights + configs) weightFiles, configFiles := FilterModelFiles(files) + isOpenVINORepo := IsOpenVINOModel(files) - if len(weightFiles) == 0 { + if len(weightFiles) == 0 && !isOpenVINORepo { return nil, fmt.Errorf("no model weight files (GGUF or SafeTensors) found in repository %s", repo) } @@ -54,10 +55,20 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string, } } - // Combine all files to download - allFiles := append(weightFiles, configFiles...) - if mmprojFile != nil { - allFiles = append(allFiles, *mmprojFile) + // Combine all files to download. + // For OpenVINO repositories, pull all repository files so the full IR layout is preserved. + var allFiles []RepoFile + if isOpenVINORepo { + for _, f := range files { + if f.Type == "file" { + allFiles = append(allFiles, f) + } + } + } else { + allFiles = append(weightFiles, configFiles...) + if mmprojFile != nil { + allFiles = append(allFiles, *mmprojFile) + } } if progressWriter != nil { @@ -91,7 +102,7 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string, } model, err := buildModelFromFiles( - result.LocalPaths, weightFiles, configFiles, mmprojFile, tempDir, createdTime, + result.LocalPaths, weightFiles, configFiles, mmprojFile, tempDir, createdTime, isOpenVINORepo, ) if err != nil { return nil, fmt.Errorf("build model: %w", err) @@ -111,26 +122,30 @@ func buildModelFromFiles( mmprojFile *RepoFile, tempDir string, createdTime *time.Time, + allowNoStandardWeights bool, ) (types.ModelArtifact, error) { - // Check if this is a safetensors model - use V0.2 packaging - if isSafetensorsModel(weightFiles) { - return buildSafetensorsModelV02(tempDir, createdTime) + // Safetensors and OpenVINO repos are packaged with V0.2 layer-per-file packaging. + if isSafetensorsModel(weightFiles) || allowNoStandardWeights { + return buildDirectoryModelV02(tempDir, createdTime, allowNoStandardWeights) } // For GGUF models, use V0.1 packaging (backward compatible) return buildGGUFModelV01(localPaths, weightFiles, configFiles, mmprojFile, createdTime) } -// buildSafetensorsModelV02 builds a safetensors model using V0.2 layer-per-file packaging. +// buildDirectoryModelV02 builds a model using V0.2 layer-per-file packaging. // It uses builder.FromDirectory which recursively scans the tempDir and creates one layer // per file, preserving nested directory structure with filepath annotations. // If createdTime is non-nil, it is used as the creation timestamp for the OCI config // to produce deterministic digests. Otherwise time.Now() is used. -func buildSafetensorsModelV02(tempDir string, createdTime *time.Time) (types.ModelArtifact, error) { +func buildDirectoryModelV02(tempDir string, createdTime *time.Time, allowNoStandardWeights bool) (types.ModelArtifact, error) { var dirOpts []builder.DirectoryOption if createdTime != nil { dirOpts = append(dirOpts, builder.WithCreatedTime(*createdTime)) } + if allowNoStandardWeights { + dirOpts = append(dirOpts, builder.WithAllowNoWeightFiles()) + } b, err := builder.FromDirectory(tempDir, dirOpts...) if err != nil { diff --git a/pkg/distribution/huggingface/repository.go b/pkg/distribution/huggingface/repository.go index 1c7bb3f86..cd5bbd85b 100644 --- a/pkg/distribution/huggingface/repository.go +++ b/pkg/distribution/huggingface/repository.go @@ -62,6 +62,40 @@ func FilterModelFiles(repoFiles []RepoFile) (weights []RepoFile, configs []RepoF return weights, configs } +// IsOpenVINOModel returns true when the repository contains at least one OpenVINO +// IR weight pair (.xml + .bin with the same stem), including nested paths. +func IsOpenVINOModel(repoFiles []RepoFile) bool { + xmlFiles := make(map[string]struct{}) + binFiles := make(map[string]struct{}) + + for _, f := range repoFiles { + if f.Type != "file" { + continue + } + + ext := strings.ToLower(path.Ext(f.Path)) + if ext != ".xml" && ext != ".bin" { + continue + } + + stem := strings.TrimSuffix(f.Path, path.Ext(f.Path)) + switch ext { + case ".xml": + xmlFiles[stem] = struct{}{} + case ".bin": + binFiles[stem] = struct{}{} + } + } + + for stem := range xmlFiles { + if _, ok := binFiles[stem]; ok { + return true + } + } + + return false +} + // TotalSize calculates the total size of files func TotalSize(repoFiles []RepoFile) int64 { var total int64 diff --git a/pkg/distribution/huggingface/repository_test.go b/pkg/distribution/huggingface/repository_test.go index 16e3a159b..4cbe350ec 100644 --- a/pkg/distribution/huggingface/repository_test.go +++ b/pkg/distribution/huggingface/repository_test.go @@ -102,3 +102,53 @@ func TestIsSafetensorsModel(t *testing.T) { }) } } + +func TestIsOpenVINOModel(t *testing.T) { + tests := []struct { + name string + files []RepoFile + want bool + }{ + { + name: "matching xml/bin pair at root", + files: []RepoFile{ + {Type: "file", Path: "openvino_model.xml"}, + {Type: "file", Path: "openvino_model.bin"}, + }, + want: true, + }, + { + name: "matching xml/bin pair in subdirectory", + files: []RepoFile{ + {Type: "file", Path: "int4/openvino_model.xml"}, + {Type: "file", Path: "int4/openvino_model.bin"}, + {Type: "file", Path: "int4/config.json"}, + }, + want: true, + }, + { + name: "xml without matching bin", + files: []RepoFile{ + {Type: "file", Path: "openvino_model.xml"}, + {Type: "file", Path: "other_model.bin"}, + }, + want: false, + }, + { + name: "no openvino files", + files: []RepoFile{ + {Type: "file", Path: "model.safetensors"}, + {Type: "file", Path: "config.json"}, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsOpenVINOModel(tt.files); got != tt.want { + t.Errorf("IsOpenVINOModel() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/envconfig/envconfig.go b/pkg/envconfig/envconfig.go index 4f6ab60c5..c6ac64da5 100644 --- a/pkg/envconfig/envconfig.go +++ b/pkg/envconfig/envconfig.go @@ -152,6 +152,12 @@ func VLLMMetalServerPath() string { return Var("VLLM_METAL_SERVER_PATH") } +// OVMSServerPath returns the optional path to the OVMS server binary. +// Configured via OVMS_SERVER_PATH. +func OVMSServerPath() string { + return Var("OVMS_SERVER_PATH") +} + // LogDir returns the directory containing DMR log files. // Configured via MODEL_RUNNER_LOG_DIR. When empty, the server // auto-creates a default log directory so that the /logs API diff --git a/pkg/inference/backends/ovms/ovms.go b/pkg/inference/backends/ovms/ovms.go new file mode 100644 index 000000000..7a6f133ae --- /dev/null +++ b/pkg/inference/backends/ovms/ovms.go @@ -0,0 +1,183 @@ +package ovms + +import ( + "context" + "errors" + "fmt" + "log/slog" + "net" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/docker/model-runner/pkg/inference" + "github.com/docker/model-runner/pkg/inference/backends" + "github.com/docker/model-runner/pkg/inference/models" + "github.com/docker/model-runner/pkg/logging" +) + +const ( + // Name is the backend name. + Name = "ovms" +) + +var ErrOVMSNotFound = errors.New("ovms binary not found") + +type ovms struct { + log logging.Logger + modelManager *models.Manager + serverLog logging.Logger + status string + customBinaryPath string +} + +func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, customBinaryPath string) (inference.Backend, error) { + return &ovms{ + log: log, + modelManager: modelManager, + serverLog: serverLog, + status: inference.FormatNotInstalled(""), + customBinaryPath: customBinaryPath, + }, nil +} + +func (o *ovms) Name() string { + return Name +} + +func (o *ovms) UsesExternalModelManagement() bool { + return false +} + +func (o *ovms) UsesTCP() bool { + return true +} + +func (o *ovms) HealthPath() string { + return "/v2/health/ready" +} + +func (o *ovms) RewritePath(path string) string { + if len(path) > 3 && path[:4] == "/v1/" { + return "/v3/" + path[4:] + } + return path +} + +func (o *ovms) Install(ctx context.Context, _ *http.Client) error { + binary := o.binaryPath() + if o.customBinaryPath != "" { + o.log.Info("OVMS binary configured via OVMS_SERVER_PATH", "path", binary) + } else if resolved, err := exec.LookPath(Name); err == nil { + o.log.Info("OVMS binary resolved from PATH", "path", resolved) + } + if _, err := exec.LookPath(binary); err != nil { + o.status = inference.FormatNotInstalled("") + return ErrOVMSNotFound + } + + checkCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + output, err := exec.CommandContext(checkCtx, binary, "--version").Output() + if err != nil { + o.log.Warn("could not get OVMS version", "error", err) + o.status = inference.FormatRunning(inference.DetailVersionUnknown) + return nil + } + + versionLine := strings.TrimSpace(string(output)) + if versionLine == "" { + o.status = inference.FormatRunning(inference.DetailVersionUnknown) + return nil + } + + o.status = inference.FormatRunning(versionLine) + return nil +} + +func (o *ovms) Run(ctx context.Context, socket, model string, modelRef string, _ inference.BackendMode, _ *inference.BackendConfiguration) error { + bundle, err := o.modelManager.GetBundle(model) + if err != nil { + return fmt.Errorf("failed to get model: %w", err) + } + modelPath := resolveOVMSModelPath(bundle.RootDir()) + + _, port, err := net.SplitHostPort(socket) + if err != nil { + return fmt.Errorf("invalid backend socket address %q: %w", socket, err) + } + + // Use the human-readable model reference for --model_name so that + // incoming requests (which carry the original name) match. + modelName := modelRef + if modelName == "" { + modelName = model + } + logLevel := ovmsLogLevel(o.log) + + args := []string{ + "--rest_port", port, + "--port", "0", + "--model_name", modelName, + "--model_path", modelPath, + "--task", "text_generation", + "--log_level", logLevel, + } + + return backends.RunBackend(ctx, backends.RunnerConfig{ + BackendName: "OVMS", + Socket: socket, + BinaryPath: o.binaryPath(), + SandboxPath: filepath.Dir(o.binaryPath()), + SandboxConfig: "", + Args: args, + Logger: o.log, + ServerLogWriter: logging.NewWriter(o.serverLog), + }) +} + +// Uninstall implements inference.Backend.Uninstall. +func (o *ovms) Uninstall() error { + return nil +} + +func (o *ovms) Status() string { + return o.status +} + +func (o *ovms) GetDiskUsage() (int64, error) { + return 0, nil +} + +func (o *ovms) binaryPath() string { + if o.customBinaryPath != "" { + return o.customBinaryPath + } + if path, err := exec.LookPath(Name); err == nil { + return path + } + // Keep command name as a last resort so error reporting remains clear. + return Name +} + +// resolveOVMSModelPath returns the path OVMS should receive via --model_path. +// Runtime bundles store model files under a dedicated "model" subdirectory. +// Fallback to the bundle root for backward compatibility if it does not exist. +func resolveOVMSModelPath(bundleRoot string) string { + modelDir := filepath.Join(bundleRoot, "model") + if info, err := os.Stat(modelDir); err == nil && info.IsDir() { + return modelDir + } + return bundleRoot +} + +func ovmsLogLevel(logger logging.Logger) string { + if logger.Enabled(context.Background(), slog.LevelDebug) { + return "DEBUG" + } + return "INFO" +} diff --git a/pkg/inference/backends/ovms/ovms_test.go b/pkg/inference/backends/ovms/ovms_test.go new file mode 100644 index 000000000..3a67e4953 --- /dev/null +++ b/pkg/inference/backends/ovms/ovms_test.go @@ -0,0 +1,74 @@ +package ovms + +import ( + "log/slog" + "os" + "path/filepath" + "testing" + + "github.com/docker/model-runner/pkg/logging" +) + +func TestBinaryPath(t *testing.T) { + t.Run("uses custom binary path when provided", func(t *testing.T) { + o := &ovms{customBinaryPath: "/tmp/custom-ovms"} + if got := o.binaryPath(); got != "/tmp/custom-ovms" { + t.Fatalf("binaryPath() = %q, want %q", got, "/tmp/custom-ovms") + } + }) + + t.Run("uses ovms from PATH when custom path is empty", func(t *testing.T) { + binDir := t.TempDir() + binary := filepath.Join(binDir, Name) + if err := os.WriteFile(binary, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write fake ovms binary: %v", err) + } + + originalPath := os.Getenv("PATH") + t.Setenv("PATH", binDir+string(os.PathListSeparator)+originalPath) + + o := &ovms{} + if got := o.binaryPath(); got != binary { + t.Fatalf("binaryPath() = %q, want %q", got, binary) + } + }) +} + +func TestResolveOVMSModelPath(t *testing.T) { + t.Run("uses model subdirectory when present", func(t *testing.T) { + bundleRoot := t.TempDir() + modelDir := filepath.Join(bundleRoot, "model") + if err := os.MkdirAll(modelDir, 0755); err != nil { + t.Fatalf("mkdir model dir: %v", err) + } + + got := resolveOVMSModelPath(bundleRoot) + if got != modelDir { + t.Fatalf("resolveOVMSModelPath() = %q, want %q", got, modelDir) + } + }) + + t.Run("falls back to bundle root when model subdirectory is missing", func(t *testing.T) { + bundleRoot := t.TempDir() + got := resolveOVMSModelPath(bundleRoot) + if got != bundleRoot { + t.Fatalf("resolveOVMSModelPath() = %q, want %q", got, bundleRoot) + } + }) +} + +func TestOVMSLogLevel(t *testing.T) { + t.Run("debug logger uses DEBUG", func(t *testing.T) { + logger := logging.NewLogger(slog.LevelDebug) + if got := ovmsLogLevel(logger); got != "DEBUG" { + t.Fatalf("ovmsLogLevel() = %q, want %q", got, "DEBUG") + } + }) + + t.Run("non-debug logger uses INFO", func(t *testing.T) { + logger := logging.NewLogger(slog.LevelInfo) + if got := ovmsLogLevel(logger); got != "INFO" { + t.Fatalf("ovmsLogLevel() = %q, want %q", got, "INFO") + } + }) +} diff --git a/pkg/inference/backends/runner.go b/pkg/inference/backends/runner.go index 8e0608053..5d7684f43 100644 --- a/pkg/inference/backends/runner.go +++ b/pkg/inference/backends/runner.go @@ -23,7 +23,7 @@ type ErrorTransformer func(output string) string // RunnerConfig holds configuration for a backend runner type RunnerConfig struct { - // BackendName is the display name of the backend (e.g., "llama.cpp", "vLLM") + // BackendName is the display name of the backend (e.g., "llama.cpp", "vLLM", "ovms") BackendName string // Socket is the unix socket path Socket string diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go index 323fbe82d..da9143f74 100644 --- a/pkg/inference/scheduling/http_handler.go +++ b/pkg/inference/scheduling/http_handler.go @@ -290,6 +290,12 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque // a 422 response. upstreamRequest := r.Clone(r.Context()) upstreamRequest.Body = io.NopCloser(bytes.NewReader(body)) + // OpenAI-compatible inference endpoints always expect JSON payloads. + // Some clients (for example curl without explicit headers) default to + // application/x-www-form-urlencoded for -d bodies, which breaks OVMS + // routing and causes path-based model resolution. Normalize to JSON. + upstreamRequest.Header.Set("Content-Type", "application/json") + // Ensure the backend always receives a Content-Length header. upstreamRequest.ContentLength = int64(len(body)) // Perform the request. diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go index ed5ebff9c..64aa9ef82 100644 --- a/pkg/inference/scheduling/runner.go +++ b/pkg/inference/scheduling/runner.go @@ -131,6 +131,11 @@ func run( // Remove the prefix up to the OpenAI API root. pr.Out.URL.Path = trimRequestPathToOpenAIRoot(pr.Out.URL.Path) pr.Out.URL.RawPath = trimRequestPathToOpenAIRoot(pr.Out.URL.RawPath) + // Allow backends to rewrite the proxied path. + if rp, ok := backend.(interface{ RewritePath(string) string }); ok { + pr.Out.URL.Path = rp.RewritePath(pr.Out.URL.Path) + pr.Out.URL.RawPath = rp.RewritePath(pr.Out.URL.RawPath) + } }, } proxy.ModifyResponse = func(resp *http.Response) error { @@ -210,6 +215,12 @@ func run( // wait waits for the runner to be ready. func (r *runner) wait(ctx context.Context) error { + // Determine the health endpoint for this backend. + healthPath := "/health" + if hp, ok := r.backend.(interface{ HealthPath() string }); ok { + healthPath = hp.HealthPath() + } + // Loop and poll for readiness. for p := 0; p < maximumReadinessPings; p++ { select { @@ -222,7 +233,7 @@ func (r *runner) wait(ctx context.Context) error { } // Create and execute a request targeting the health endpoint. // Note: /health returns 503 during model loading, 200 when ready. - readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody) + readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost"+healthPath, http.NoBody) if err != nil { return fmt.Errorf("readiness request creation failed: %w", err) } diff --git a/pkg/server/server.go b/pkg/server/server.go index a66bb7d04..fca94da85 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -18,6 +18,7 @@ import ( "github.com/docker/model-runner/pkg/envconfig" "github.com/docker/model-runner/pkg/inference" "github.com/docker/model-runner/pkg/inference/backends/llamacpp" + "github.com/docker/model-runner/pkg/inference/backends/ovms" "github.com/docker/model-runner/pkg/inference/backends/sglang" "github.com/docker/model-runner/pkg/inference/config" "github.com/docker/model-runner/pkg/inference/models" @@ -65,6 +66,7 @@ func Run(ctx context.Context, cfg Config) error { sglangServerPath := envconfig.SGLangServerPath() mlxServerPath := envconfig.MLXServerPath() diffusersServerPath := envconfig.DiffusersServerPath() + ovmsServerPath := envconfig.OVMSServerPath() vllmMetalServerPath := envconfig.VLLMMetalServerPath() // Create a proxy-aware HTTP transport. @@ -92,6 +94,9 @@ func Run(ctx context.Context, cfg Config) error { if vllmMetalServerPath != "" { log.Info("VLLM_METAL_SERVER_PATH", "path", vllmMetalServerPath) } + if ovmsServerPath != "" { + log.Info("OVMS_SERVER_PATH", "path", ovmsServerPath) + } // Determine log directory. When MODEL_RUNNER_LOG_DIR is set use it; // otherwise auto-create a default directory so that the /logs endpoint is @@ -186,6 +191,9 @@ func Run(ctx context.Context, cfg Config) error { DiffusersPath: diffusersServerPath, RegistryMirrors: envconfig.RegistryMirrors(), }), + routing.BackendDef{Name: ovms.Name, Init: func(mm *models.Manager) (inference.Backend, error) { + return ovms.New(log, mm, log.With("component", ovms.Name), ovmsServerPath) + }}, routing.BackendDef{Name: sglang.Name, Init: func(mm *models.Manager) (inference.Backend, error) { return sglang.New(log, mm, log.With("component", sglang.Name), nil, sglangServerPath) }},