diff --git a/README.md b/README.md
index b2bfc79e8..27f7a1cf3 100644
--- a/README.md
+++ b/README.md
@@ -319,6 +319,45 @@ docker buildx build \
The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version.
+### OVMS integration
+
+Docker Model Runner can also run an OVMS backend.
+
+- Default OVMS binary: resolved from `PATH` (looks up `ovms`)
+- Override binary path with: `OVMS_SERVER_PATH`
+
+OVMS can be installed based on this [guide](https://docs.openvino.ai/2026/model-server/ovms_docs_deploying_server_baremetal.html). Minimal version is 2026.2.
+
+When the runner starts, it will try to initialize OVMS as an available backend.
+
+```sh
+MODEL_RUNNER_PORT=13434 ./model-runner
+```
+
+Create a new model
+Use models from HugginFace Hub using repository with OpenVINO format.
+```sh
+curl http://localhost:13434/models/create -X POST -d '{"from": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov"}'
+```
+
+You can target OVMS explicitly through the backend-prefixed OpenAI-compatible routes:
+```sh
+# List models exposed via OVMS backend routing
+curl http://localhost:13434/engines/ovms/v1/models
+
+# Example chat/completions call through OVMS backend routing
+curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
+ "model": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov",
+ "messages": [
+ {"role": "user", "content": "Hello from OVMS"}
+ ]
+}'
+```
+Delete model
+```sh
+curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE
+```
+
## API Examples
The Model Runner exposes a REST API that can be accessed via TCP port. You can interact with it using curl commands.
@@ -335,7 +374,7 @@ curl http://localhost:8080/models
curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}'
# Get information about a specific model
-curl http://localhost:8080/models/ai/smollm2
+curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov
# Chat with a model
curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
@@ -347,12 +386,11 @@ curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
}'
# Delete a model
-curl http://localhost:8080/models/ai/smollm2 -X DELETE
+curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE
# Get metrics
curl http://localhost:8080/metrics
```
-
The response will contain the model's reply:
```json
diff --git a/pkg/distribution/builder/from_directory.go b/pkg/distribution/builder/from_directory.go
index 68b7818ce..f399c7991 100644
--- a/pkg/distribution/builder/from_directory.go
+++ b/pkg/distribution/builder/from_directory.go
@@ -34,6 +34,10 @@ type DirectoryOptions struct {
// This is useful for producing deterministic OCI digests.
Created *time.Time
+ // AllowNoWeightFiles allows packaging a directory even when it contains no
+ // GGUF/SafeTensors/DDUF weight files.
+ AllowNoWeightFiles bool
+
// Format is the output artifact format. Defaults to BuildFormatDocker.
Format BuildFormat
}
@@ -66,6 +70,15 @@ func WithCreatedTime(t time.Time) DirectoryOption {
}
}
+// WithAllowNoWeightFiles allows FromDirectory to succeed even when no standard
+// model weight files are present. This is used for formats such as OpenVINO IR
+// where model files are represented differently (for example .xml + .bin pairs).
+func WithAllowNoWeightFiles() DirectoryOption {
+ return func(opts *DirectoryOptions) {
+ opts.AllowNoWeightFiles = true
+ }
+}
+
// WithOutputFormat sets the output artifact format for the directory builder.
// Defaults to BuildFormatDocker if not specified.
// This is the DirectoryOption equivalent of WithFormat (BuildOption).
@@ -208,7 +221,7 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) {
return nil, fmt.Errorf("no files found in directory: %s", dirPath)
}
- if len(weightFiles) == 0 {
+ if len(weightFiles) == 0 && !options.AllowNoWeightFiles {
return nil, fmt.Errorf("no weight files (safetensors, GGUF, or DDUF) found in directory: %s", dirPath)
}
diff --git a/pkg/distribution/builder/from_directory_test.go b/pkg/distribution/builder/from_directory_test.go
index 1fc1145aa..7dca73a99 100644
--- a/pkg/distribution/builder/from_directory_test.go
+++ b/pkg/distribution/builder/from_directory_test.go
@@ -3,6 +3,7 @@ package builder
import (
"os"
"path/filepath"
+ "strings"
"testing"
"time"
)
@@ -170,6 +171,44 @@ func TestFromDirectoryWithExclusions(t *testing.T) {
}
}
+func TestFromDirectoryNoStandardWeights(t *testing.T) {
+ tmpDir := t.TempDir()
+ createTestFile(t, tmpDir, "openvino/model.xml", "")
+ createTestFile(t, tmpDir, "openvino/model.bin", "weights")
+ createTestFile(t, tmpDir, "openvino/config.json", "{}")
+
+ _, err := FromDirectory(tmpDir)
+ if err == nil {
+ t.Fatal("expected error when directory has no GGUF/SafeTensors/DDUF weights")
+ }
+
+ if got := err.Error(); got == "" || !strings.Contains(got, "no weight files") {
+ t.Fatalf("expected no weight files error, got %q", got)
+ }
+}
+
+func TestFromDirectoryAllowNoWeightFiles(t *testing.T) {
+ tmpDir := t.TempDir()
+ createTestFile(t, tmpDir, "openvino/model.xml", "")
+ createTestFile(t, tmpDir, "openvino/model.bin", "weights")
+ createTestFile(t, tmpDir, "openvino/config.json", "{}")
+
+ b, err := FromDirectory(tmpDir, WithAllowNoWeightFiles())
+ if err != nil {
+ t.Fatalf("FromDirectory with WithAllowNoWeightFiles failed: %v", err)
+ }
+
+ mdl := b.Model()
+ layers, err := mdl.Layers()
+ if err != nil {
+ t.Fatalf("Failed to get layers: %v", err)
+ }
+
+ if len(layers) != 3 {
+ t.Errorf("Expected 3 layers, got %d", len(layers))
+ }
+}
+
func TestShouldExclude(t *testing.T) {
tests := []struct {
name string
diff --git a/pkg/distribution/huggingface/model.go b/pkg/distribution/huggingface/model.go
index 529f3ae9f..a8a64dd2e 100644
--- a/pkg/distribution/huggingface/model.go
+++ b/pkg/distribution/huggingface/model.go
@@ -31,8 +31,9 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
// Filter to model files (weights + configs)
weightFiles, configFiles := FilterModelFiles(files)
+ isOpenVINORepo := IsOpenVINOModel(files)
- if len(weightFiles) == 0 {
+ if len(weightFiles) == 0 && !isOpenVINORepo {
return nil, fmt.Errorf("no model weight files (GGUF or SafeTensors) found in repository %s", repo)
}
@@ -54,10 +55,20 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
}
}
- // Combine all files to download
- allFiles := append(weightFiles, configFiles...)
- if mmprojFile != nil {
- allFiles = append(allFiles, *mmprojFile)
+ // Combine all files to download.
+ // For OpenVINO repositories, pull all repository files so the full IR layout is preserved.
+ var allFiles []RepoFile
+ if isOpenVINORepo {
+ for _, f := range files {
+ if f.Type == "file" {
+ allFiles = append(allFiles, f)
+ }
+ }
+ } else {
+ allFiles = append(weightFiles, configFiles...)
+ if mmprojFile != nil {
+ allFiles = append(allFiles, *mmprojFile)
+ }
}
if progressWriter != nil {
@@ -91,7 +102,7 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
}
model, err := buildModelFromFiles(
- result.LocalPaths, weightFiles, configFiles, mmprojFile, tempDir, createdTime,
+ result.LocalPaths, weightFiles, configFiles, mmprojFile, tempDir, createdTime, isOpenVINORepo,
)
if err != nil {
return nil, fmt.Errorf("build model: %w", err)
@@ -111,26 +122,30 @@ func buildModelFromFiles(
mmprojFile *RepoFile,
tempDir string,
createdTime *time.Time,
+ allowNoStandardWeights bool,
) (types.ModelArtifact, error) {
- // Check if this is a safetensors model - use V0.2 packaging
- if isSafetensorsModel(weightFiles) {
- return buildSafetensorsModelV02(tempDir, createdTime)
+ // Safetensors and OpenVINO repos are packaged with V0.2 layer-per-file packaging.
+ if isSafetensorsModel(weightFiles) || allowNoStandardWeights {
+ return buildDirectoryModelV02(tempDir, createdTime, allowNoStandardWeights)
}
// For GGUF models, use V0.1 packaging (backward compatible)
return buildGGUFModelV01(localPaths, weightFiles, configFiles, mmprojFile, createdTime)
}
-// buildSafetensorsModelV02 builds a safetensors model using V0.2 layer-per-file packaging.
+// buildDirectoryModelV02 builds a model using V0.2 layer-per-file packaging.
// It uses builder.FromDirectory which recursively scans the tempDir and creates one layer
// per file, preserving nested directory structure with filepath annotations.
// If createdTime is non-nil, it is used as the creation timestamp for the OCI config
// to produce deterministic digests. Otherwise time.Now() is used.
-func buildSafetensorsModelV02(tempDir string, createdTime *time.Time) (types.ModelArtifact, error) {
+func buildDirectoryModelV02(tempDir string, createdTime *time.Time, allowNoStandardWeights bool) (types.ModelArtifact, error) {
var dirOpts []builder.DirectoryOption
if createdTime != nil {
dirOpts = append(dirOpts, builder.WithCreatedTime(*createdTime))
}
+ if allowNoStandardWeights {
+ dirOpts = append(dirOpts, builder.WithAllowNoWeightFiles())
+ }
b, err := builder.FromDirectory(tempDir, dirOpts...)
if err != nil {
diff --git a/pkg/distribution/huggingface/repository.go b/pkg/distribution/huggingface/repository.go
index 1c7bb3f86..cd5bbd85b 100644
--- a/pkg/distribution/huggingface/repository.go
+++ b/pkg/distribution/huggingface/repository.go
@@ -62,6 +62,40 @@ func FilterModelFiles(repoFiles []RepoFile) (weights []RepoFile, configs []RepoF
return weights, configs
}
+// IsOpenVINOModel returns true when the repository contains at least one OpenVINO
+// IR weight pair (.xml + .bin with the same stem), including nested paths.
+func IsOpenVINOModel(repoFiles []RepoFile) bool {
+ xmlFiles := make(map[string]struct{})
+ binFiles := make(map[string]struct{})
+
+ for _, f := range repoFiles {
+ if f.Type != "file" {
+ continue
+ }
+
+ ext := strings.ToLower(path.Ext(f.Path))
+ if ext != ".xml" && ext != ".bin" {
+ continue
+ }
+
+ stem := strings.TrimSuffix(f.Path, path.Ext(f.Path))
+ switch ext {
+ case ".xml":
+ xmlFiles[stem] = struct{}{}
+ case ".bin":
+ binFiles[stem] = struct{}{}
+ }
+ }
+
+ for stem := range xmlFiles {
+ if _, ok := binFiles[stem]; ok {
+ return true
+ }
+ }
+
+ return false
+}
+
// TotalSize calculates the total size of files
func TotalSize(repoFiles []RepoFile) int64 {
var total int64
diff --git a/pkg/distribution/huggingface/repository_test.go b/pkg/distribution/huggingface/repository_test.go
index 16e3a159b..4cbe350ec 100644
--- a/pkg/distribution/huggingface/repository_test.go
+++ b/pkg/distribution/huggingface/repository_test.go
@@ -102,3 +102,53 @@ func TestIsSafetensorsModel(t *testing.T) {
})
}
}
+
+func TestIsOpenVINOModel(t *testing.T) {
+ tests := []struct {
+ name string
+ files []RepoFile
+ want bool
+ }{
+ {
+ name: "matching xml/bin pair at root",
+ files: []RepoFile{
+ {Type: "file", Path: "openvino_model.xml"},
+ {Type: "file", Path: "openvino_model.bin"},
+ },
+ want: true,
+ },
+ {
+ name: "matching xml/bin pair in subdirectory",
+ files: []RepoFile{
+ {Type: "file", Path: "int4/openvino_model.xml"},
+ {Type: "file", Path: "int4/openvino_model.bin"},
+ {Type: "file", Path: "int4/config.json"},
+ },
+ want: true,
+ },
+ {
+ name: "xml without matching bin",
+ files: []RepoFile{
+ {Type: "file", Path: "openvino_model.xml"},
+ {Type: "file", Path: "other_model.bin"},
+ },
+ want: false,
+ },
+ {
+ name: "no openvino files",
+ files: []RepoFile{
+ {Type: "file", Path: "model.safetensors"},
+ {Type: "file", Path: "config.json"},
+ },
+ want: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := IsOpenVINOModel(tt.files); got != tt.want {
+ t.Errorf("IsOpenVINOModel() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/pkg/envconfig/envconfig.go b/pkg/envconfig/envconfig.go
index 4f6ab60c5..c6ac64da5 100644
--- a/pkg/envconfig/envconfig.go
+++ b/pkg/envconfig/envconfig.go
@@ -152,6 +152,12 @@ func VLLMMetalServerPath() string {
return Var("VLLM_METAL_SERVER_PATH")
}
+// OVMSServerPath returns the optional path to the OVMS server binary.
+// Configured via OVMS_SERVER_PATH.
+func OVMSServerPath() string {
+ return Var("OVMS_SERVER_PATH")
+}
+
// LogDir returns the directory containing DMR log files.
// Configured via MODEL_RUNNER_LOG_DIR. When empty, the server
// auto-creates a default log directory so that the /logs API
diff --git a/pkg/inference/backends/ovms/ovms.go b/pkg/inference/backends/ovms/ovms.go
new file mode 100644
index 000000000..7a6f133ae
--- /dev/null
+++ b/pkg/inference/backends/ovms/ovms.go
@@ -0,0 +1,183 @@
+package ovms
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "log/slog"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "github.com/docker/model-runner/pkg/inference"
+ "github.com/docker/model-runner/pkg/inference/backends"
+ "github.com/docker/model-runner/pkg/inference/models"
+ "github.com/docker/model-runner/pkg/logging"
+)
+
+const (
+ // Name is the backend name.
+ Name = "ovms"
+)
+
+var ErrOVMSNotFound = errors.New("ovms binary not found")
+
+type ovms struct {
+ log logging.Logger
+ modelManager *models.Manager
+ serverLog logging.Logger
+ status string
+ customBinaryPath string
+}
+
+func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, customBinaryPath string) (inference.Backend, error) {
+ return &ovms{
+ log: log,
+ modelManager: modelManager,
+ serverLog: serverLog,
+ status: inference.FormatNotInstalled(""),
+ customBinaryPath: customBinaryPath,
+ }, nil
+}
+
+func (o *ovms) Name() string {
+ return Name
+}
+
+func (o *ovms) UsesExternalModelManagement() bool {
+ return false
+}
+
+func (o *ovms) UsesTCP() bool {
+ return true
+}
+
+func (o *ovms) HealthPath() string {
+ return "/v2/health/ready"
+}
+
+func (o *ovms) RewritePath(path string) string {
+ if len(path) > 3 && path[:4] == "/v1/" {
+ return "/v3/" + path[4:]
+ }
+ return path
+}
+
+func (o *ovms) Install(ctx context.Context, _ *http.Client) error {
+ binary := o.binaryPath()
+ if o.customBinaryPath != "" {
+ o.log.Info("OVMS binary configured via OVMS_SERVER_PATH", "path", binary)
+ } else if resolved, err := exec.LookPath(Name); err == nil {
+ o.log.Info("OVMS binary resolved from PATH", "path", resolved)
+ }
+ if _, err := exec.LookPath(binary); err != nil {
+ o.status = inference.FormatNotInstalled("")
+ return ErrOVMSNotFound
+ }
+
+ checkCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+ defer cancel()
+
+ output, err := exec.CommandContext(checkCtx, binary, "--version").Output()
+ if err != nil {
+ o.log.Warn("could not get OVMS version", "error", err)
+ o.status = inference.FormatRunning(inference.DetailVersionUnknown)
+ return nil
+ }
+
+ versionLine := strings.TrimSpace(string(output))
+ if versionLine == "" {
+ o.status = inference.FormatRunning(inference.DetailVersionUnknown)
+ return nil
+ }
+
+ o.status = inference.FormatRunning(versionLine)
+ return nil
+}
+
+func (o *ovms) Run(ctx context.Context, socket, model string, modelRef string, _ inference.BackendMode, _ *inference.BackendConfiguration) error {
+ bundle, err := o.modelManager.GetBundle(model)
+ if err != nil {
+ return fmt.Errorf("failed to get model: %w", err)
+ }
+ modelPath := resolveOVMSModelPath(bundle.RootDir())
+
+ _, port, err := net.SplitHostPort(socket)
+ if err != nil {
+ return fmt.Errorf("invalid backend socket address %q: %w", socket, err)
+ }
+
+ // Use the human-readable model reference for --model_name so that
+ // incoming requests (which carry the original name) match.
+ modelName := modelRef
+ if modelName == "" {
+ modelName = model
+ }
+ logLevel := ovmsLogLevel(o.log)
+
+ args := []string{
+ "--rest_port", port,
+ "--port", "0",
+ "--model_name", modelName,
+ "--model_path", modelPath,
+ "--task", "text_generation",
+ "--log_level", logLevel,
+ }
+
+ return backends.RunBackend(ctx, backends.RunnerConfig{
+ BackendName: "OVMS",
+ Socket: socket,
+ BinaryPath: o.binaryPath(),
+ SandboxPath: filepath.Dir(o.binaryPath()),
+ SandboxConfig: "",
+ Args: args,
+ Logger: o.log,
+ ServerLogWriter: logging.NewWriter(o.serverLog),
+ })
+}
+
+// Uninstall implements inference.Backend.Uninstall.
+func (o *ovms) Uninstall() error {
+ return nil
+}
+
+func (o *ovms) Status() string {
+ return o.status
+}
+
+func (o *ovms) GetDiskUsage() (int64, error) {
+ return 0, nil
+}
+
+func (o *ovms) binaryPath() string {
+ if o.customBinaryPath != "" {
+ return o.customBinaryPath
+ }
+ if path, err := exec.LookPath(Name); err == nil {
+ return path
+ }
+ // Keep command name as a last resort so error reporting remains clear.
+ return Name
+}
+
+// resolveOVMSModelPath returns the path OVMS should receive via --model_path.
+// Runtime bundles store model files under a dedicated "model" subdirectory.
+// Fallback to the bundle root for backward compatibility if it does not exist.
+func resolveOVMSModelPath(bundleRoot string) string {
+ modelDir := filepath.Join(bundleRoot, "model")
+ if info, err := os.Stat(modelDir); err == nil && info.IsDir() {
+ return modelDir
+ }
+ return bundleRoot
+}
+
+func ovmsLogLevel(logger logging.Logger) string {
+ if logger.Enabled(context.Background(), slog.LevelDebug) {
+ return "DEBUG"
+ }
+ return "INFO"
+}
diff --git a/pkg/inference/backends/ovms/ovms_test.go b/pkg/inference/backends/ovms/ovms_test.go
new file mode 100644
index 000000000..3a67e4953
--- /dev/null
+++ b/pkg/inference/backends/ovms/ovms_test.go
@@ -0,0 +1,74 @@
+package ovms
+
+import (
+ "log/slog"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/docker/model-runner/pkg/logging"
+)
+
+func TestBinaryPath(t *testing.T) {
+ t.Run("uses custom binary path when provided", func(t *testing.T) {
+ o := &ovms{customBinaryPath: "/tmp/custom-ovms"}
+ if got := o.binaryPath(); got != "/tmp/custom-ovms" {
+ t.Fatalf("binaryPath() = %q, want %q", got, "/tmp/custom-ovms")
+ }
+ })
+
+ t.Run("uses ovms from PATH when custom path is empty", func(t *testing.T) {
+ binDir := t.TempDir()
+ binary := filepath.Join(binDir, Name)
+ if err := os.WriteFile(binary, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+ t.Fatalf("write fake ovms binary: %v", err)
+ }
+
+ originalPath := os.Getenv("PATH")
+ t.Setenv("PATH", binDir+string(os.PathListSeparator)+originalPath)
+
+ o := &ovms{}
+ if got := o.binaryPath(); got != binary {
+ t.Fatalf("binaryPath() = %q, want %q", got, binary)
+ }
+ })
+}
+
+func TestResolveOVMSModelPath(t *testing.T) {
+ t.Run("uses model subdirectory when present", func(t *testing.T) {
+ bundleRoot := t.TempDir()
+ modelDir := filepath.Join(bundleRoot, "model")
+ if err := os.MkdirAll(modelDir, 0755); err != nil {
+ t.Fatalf("mkdir model dir: %v", err)
+ }
+
+ got := resolveOVMSModelPath(bundleRoot)
+ if got != modelDir {
+ t.Fatalf("resolveOVMSModelPath() = %q, want %q", got, modelDir)
+ }
+ })
+
+ t.Run("falls back to bundle root when model subdirectory is missing", func(t *testing.T) {
+ bundleRoot := t.TempDir()
+ got := resolveOVMSModelPath(bundleRoot)
+ if got != bundleRoot {
+ t.Fatalf("resolveOVMSModelPath() = %q, want %q", got, bundleRoot)
+ }
+ })
+}
+
+func TestOVMSLogLevel(t *testing.T) {
+ t.Run("debug logger uses DEBUG", func(t *testing.T) {
+ logger := logging.NewLogger(slog.LevelDebug)
+ if got := ovmsLogLevel(logger); got != "DEBUG" {
+ t.Fatalf("ovmsLogLevel() = %q, want %q", got, "DEBUG")
+ }
+ })
+
+ t.Run("non-debug logger uses INFO", func(t *testing.T) {
+ logger := logging.NewLogger(slog.LevelInfo)
+ if got := ovmsLogLevel(logger); got != "INFO" {
+ t.Fatalf("ovmsLogLevel() = %q, want %q", got, "INFO")
+ }
+ })
+}
diff --git a/pkg/inference/backends/runner.go b/pkg/inference/backends/runner.go
index 8e0608053..5d7684f43 100644
--- a/pkg/inference/backends/runner.go
+++ b/pkg/inference/backends/runner.go
@@ -23,7 +23,7 @@ type ErrorTransformer func(output string) string
// RunnerConfig holds configuration for a backend runner
type RunnerConfig struct {
- // BackendName is the display name of the backend (e.g., "llama.cpp", "vLLM")
+ // BackendName is the display name of the backend (e.g., "llama.cpp", "vLLM", "ovms")
BackendName string
// Socket is the unix socket path
Socket string
diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go
index 323fbe82d..da9143f74 100644
--- a/pkg/inference/scheduling/http_handler.go
+++ b/pkg/inference/scheduling/http_handler.go
@@ -290,6 +290,12 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
// a 422 response.
upstreamRequest := r.Clone(r.Context())
upstreamRequest.Body = io.NopCloser(bytes.NewReader(body))
+ // OpenAI-compatible inference endpoints always expect JSON payloads.
+ // Some clients (for example curl without explicit headers) default to
+ // application/x-www-form-urlencoded for -d bodies, which breaks OVMS
+ // routing and causes path-based model resolution. Normalize to JSON.
+ upstreamRequest.Header.Set("Content-Type", "application/json")
+ // Ensure the backend always receives a Content-Length header.
upstreamRequest.ContentLength = int64(len(body))
// Perform the request.
diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go
index ed5ebff9c..64aa9ef82 100644
--- a/pkg/inference/scheduling/runner.go
+++ b/pkg/inference/scheduling/runner.go
@@ -131,6 +131,11 @@ func run(
// Remove the prefix up to the OpenAI API root.
pr.Out.URL.Path = trimRequestPathToOpenAIRoot(pr.Out.URL.Path)
pr.Out.URL.RawPath = trimRequestPathToOpenAIRoot(pr.Out.URL.RawPath)
+ // Allow backends to rewrite the proxied path.
+ if rp, ok := backend.(interface{ RewritePath(string) string }); ok {
+ pr.Out.URL.Path = rp.RewritePath(pr.Out.URL.Path)
+ pr.Out.URL.RawPath = rp.RewritePath(pr.Out.URL.RawPath)
+ }
},
}
proxy.ModifyResponse = func(resp *http.Response) error {
@@ -210,6 +215,12 @@ func run(
// wait waits for the runner to be ready.
func (r *runner) wait(ctx context.Context) error {
+ // Determine the health endpoint for this backend.
+ healthPath := "/health"
+ if hp, ok := r.backend.(interface{ HealthPath() string }); ok {
+ healthPath = hp.HealthPath()
+ }
+
// Loop and poll for readiness.
for p := 0; p < maximumReadinessPings; p++ {
select {
@@ -222,7 +233,7 @@ func (r *runner) wait(ctx context.Context) error {
}
// Create and execute a request targeting the health endpoint.
// Note: /health returns 503 during model loading, 200 when ready.
- readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody)
+ readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost"+healthPath, http.NoBody)
if err != nil {
return fmt.Errorf("readiness request creation failed: %w", err)
}
diff --git a/pkg/server/server.go b/pkg/server/server.go
index a66bb7d04..fca94da85 100644
--- a/pkg/server/server.go
+++ b/pkg/server/server.go
@@ -18,6 +18,7 @@ import (
"github.com/docker/model-runner/pkg/envconfig"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
+ "github.com/docker/model-runner/pkg/inference/backends/ovms"
"github.com/docker/model-runner/pkg/inference/backends/sglang"
"github.com/docker/model-runner/pkg/inference/config"
"github.com/docker/model-runner/pkg/inference/models"
@@ -65,6 +66,7 @@ func Run(ctx context.Context, cfg Config) error {
sglangServerPath := envconfig.SGLangServerPath()
mlxServerPath := envconfig.MLXServerPath()
diffusersServerPath := envconfig.DiffusersServerPath()
+ ovmsServerPath := envconfig.OVMSServerPath()
vllmMetalServerPath := envconfig.VLLMMetalServerPath()
// Create a proxy-aware HTTP transport.
@@ -92,6 +94,9 @@ func Run(ctx context.Context, cfg Config) error {
if vllmMetalServerPath != "" {
log.Info("VLLM_METAL_SERVER_PATH", "path", vllmMetalServerPath)
}
+ if ovmsServerPath != "" {
+ log.Info("OVMS_SERVER_PATH", "path", ovmsServerPath)
+ }
// Determine log directory. When MODEL_RUNNER_LOG_DIR is set use it;
// otherwise auto-create a default directory so that the /logs endpoint is
@@ -186,6 +191,9 @@ func Run(ctx context.Context, cfg Config) error {
DiffusersPath: diffusersServerPath,
RegistryMirrors: envconfig.RegistryMirrors(),
}),
+ routing.BackendDef{Name: ovms.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
+ return ovms.New(log, mm, log.With("component", ovms.Name), ovmsServerPath)
+ }},
routing.BackendDef{Name: sglang.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
return sglang.New(log, mm, log.With("component", sglang.Name), nil, sglangServerPath)
}},