Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 41 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,45 @@ docker buildx build \

The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version.

### OVMS integration

Docker Model Runner can also run an OVMS backend.

- Default OVMS binary: resolved from `PATH` (looks up `ovms`)
- Override binary path with: `OVMS_SERVER_PATH`

OVMS can be installed based on this [guide](https://docs.openvino.ai/2026/model-server/ovms_docs_deploying_server_baremetal.html). Minimal version is 2026.2.

When the runner starts, it will try to initialize OVMS as an available backend.

```sh
MODEL_RUNNER_PORT=13434 ./model-runner
```

Create a new model
Use models from HugginFace Hub using repository with OpenVINO format.
```sh
curl http://localhost:13434/models/create -X POST -d '{"from": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov"}'
```

You can target OVMS explicitly through the backend-prefixed OpenAI-compatible routes:
```sh
# List models exposed via OVMS backend routing
curl http://localhost:13434/engines/ovms/v1/models

# Example chat/completions call through OVMS backend routing
curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
"model": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov",
"messages": [
{"role": "user", "content": "Hello from OVMS"}
]
}'
```
Delete model
```sh
curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE
```

## API Examples

The Model Runner exposes a REST API that can be accessed via TCP port. You can interact with it using curl commands.
Expand All @@ -335,7 +374,7 @@ curl http://localhost:8080/models
curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}'

# Get information about a specific model
curl http://localhost:8080/models/ai/smollm2
curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can add openvino examples, but we should avoid replacing the existing ones


# Chat with a model
curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
Expand All @@ -347,12 +386,11 @@ curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
}'

# Delete a model
curl http://localhost:8080/models/ai/smollm2 -X DELETE
curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE

# Get metrics
curl http://localhost:8080/metrics
```

The response will contain the model's reply:

```json
Expand Down
15 changes: 14 additions & 1 deletion pkg/distribution/builder/from_directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ type DirectoryOptions struct {
// This is useful for producing deterministic OCI digests.
Created *time.Time

// AllowNoWeightFiles allows packaging a directory even when it contains no
// GGUF/SafeTensors/DDUF weight files.
AllowNoWeightFiles bool

// Format is the output artifact format. Defaults to BuildFormatDocker.
Format BuildFormat
}
Expand Down Expand Up @@ -66,6 +70,15 @@ func WithCreatedTime(t time.Time) DirectoryOption {
}
}

// WithAllowNoWeightFiles allows FromDirectory to succeed even when no standard
// model weight files are present. This is used for formats such as OpenVINO IR
// where model files are represented differently (for example .xml + .bin pairs).
func WithAllowNoWeightFiles() DirectoryOption {
return func(opts *DirectoryOptions) {
opts.AllowNoWeightFiles = true
}
}

// WithOutputFormat sets the output artifact format for the directory builder.
// Defaults to BuildFormatDocker if not specified.
// This is the DirectoryOption equivalent of WithFormat (BuildOption).
Expand Down Expand Up @@ -208,7 +221,7 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) {
return nil, fmt.Errorf("no files found in directory: %s", dirPath)
}

if len(weightFiles) == 0 {
if len(weightFiles) == 0 && !options.AllowNoWeightFiles {
return nil, fmt.Errorf("no weight files (safetensors, GGUF, or DDUF) found in directory: %s", dirPath)
}

Expand Down
39 changes: 39 additions & 0 deletions pkg/distribution/builder/from_directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package builder
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
)
Expand Down Expand Up @@ -170,6 +171,44 @@ func TestFromDirectoryWithExclusions(t *testing.T) {
}
}

func TestFromDirectoryNoStandardWeights(t *testing.T) {
tmpDir := t.TempDir()
createTestFile(t, tmpDir, "openvino/model.xml", "<net></net>")
createTestFile(t, tmpDir, "openvino/model.bin", "weights")
createTestFile(t, tmpDir, "openvino/config.json", "{}")

_, err := FromDirectory(tmpDir)
if err == nil {
t.Fatal("expected error when directory has no GGUF/SafeTensors/DDUF weights")
}

if got := err.Error(); got == "" || !strings.Contains(got, "no weight files") {
t.Fatalf("expected no weight files error, got %q", got)
}
}

func TestFromDirectoryAllowNoWeightFiles(t *testing.T) {
tmpDir := t.TempDir()
createTestFile(t, tmpDir, "openvino/model.xml", "<net></net>")
createTestFile(t, tmpDir, "openvino/model.bin", "weights")
createTestFile(t, tmpDir, "openvino/config.json", "{}")

b, err := FromDirectory(tmpDir, WithAllowNoWeightFiles())
if err != nil {
t.Fatalf("FromDirectory with WithAllowNoWeightFiles failed: %v", err)
}

mdl := b.Model()
layers, err := mdl.Layers()
if err != nil {
t.Fatalf("Failed to get layers: %v", err)
}

if len(layers) != 3 {
t.Errorf("Expected 3 layers, got %d", len(layers))
}
}

func TestShouldExclude(t *testing.T) {
tests := []struct {
name string
Expand Down
37 changes: 26 additions & 11 deletions pkg/distribution/huggingface/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,

// Filter to model files (weights + configs)
weightFiles, configFiles := FilterModelFiles(files)
isOpenVINORepo := IsOpenVINOModel(files)

if len(weightFiles) == 0 {
if len(weightFiles) == 0 && !isOpenVINORepo {
return nil, fmt.Errorf("no model weight files (GGUF or SafeTensors) found in repository %s", repo)
}

Expand All @@ -54,10 +55,20 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
}
}

// Combine all files to download
allFiles := append(weightFiles, configFiles...)
if mmprojFile != nil {
allFiles = append(allFiles, *mmprojFile)
// Combine all files to download.
// For OpenVINO repositories, pull all repository files so the full IR layout is preserved.
var allFiles []RepoFile
if isOpenVINORepo {
for _, f := range files {
if f.Type == "file" {
allFiles = append(allFiles, f)
}
}
Comment on lines +61 to +66
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

Downloading all files in an OpenVINO repository can result in downloading unnecessary large weight files (such as .safetensors, .gguf, or non-OpenVINO .bin files like pytorch_model.bin). This can cause significant performance degradation, slow model creation, and potential disk space exhaustion. We should filter the files to only download the required OpenVINO IR files (.xml and matching .bin pairs) and configuration files.

if isOpenVINORepo {
		xmlStems := make(map[string]bool)
		for _, f := range files {
			if f.Type == "file" && strings.HasSuffix(strings.ToLower(f.Path), ".xml") {
				xmlStems[f.Path[:len(f.Path)-4]] = true
			}
		}
		for _, f := range files {
			if f.Type == "file" {
				lowerPath := strings.ToLower(f.Path)
				if strings.HasSuffix(lowerPath, ".safetensors") || strings.HasSuffix(lowerPath, ".gguf") || strings.HasSuffix(lowerPath, ".dduf") {
					continue
				}
				if strings.HasSuffix(lowerPath, ".bin") {
					stem := f.Path[:len(f.Path)-4]
					if !xmlStems[stem] {
						continue
					}
				}
				allFiles = append(allFiles, f)
			}
		}
	}
References
  1. User empathy — How does this affect the people who use, operate, and maintain this system? Consider developer ergonomics, operational burden, error messages, failure modes, and the debugging experience. (link)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtrawins does this make sense? You know more about this repo format/file format than me

} else {
allFiles = append(weightFiles, configFiles...)
if mmprojFile != nil {
allFiles = append(allFiles, *mmprojFile)
}
}

if progressWriter != nil {
Expand Down Expand Up @@ -91,7 +102,7 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
}

model, err := buildModelFromFiles(
result.LocalPaths, weightFiles, configFiles, mmprojFile, tempDir, createdTime,
result.LocalPaths, weightFiles, configFiles, mmprojFile, tempDir, createdTime, isOpenVINORepo,
)
if err != nil {
return nil, fmt.Errorf("build model: %w", err)
Expand All @@ -111,26 +122,30 @@ func buildModelFromFiles(
mmprojFile *RepoFile,
tempDir string,
createdTime *time.Time,
allowNoStandardWeights bool,
) (types.ModelArtifact, error) {
// Check if this is a safetensors model - use V0.2 packaging
if isSafetensorsModel(weightFiles) {
return buildSafetensorsModelV02(tempDir, createdTime)
// Safetensors and OpenVINO repos are packaged with V0.2 layer-per-file packaging.
if isSafetensorsModel(weightFiles) || allowNoStandardWeights {
return buildDirectoryModelV02(tempDir, createdTime, allowNoStandardWeights)
}

// For GGUF models, use V0.1 packaging (backward compatible)
return buildGGUFModelV01(localPaths, weightFiles, configFiles, mmprojFile, createdTime)
}

// buildSafetensorsModelV02 builds a safetensors model using V0.2 layer-per-file packaging.
// buildDirectoryModelV02 builds a model using V0.2 layer-per-file packaging.
// It uses builder.FromDirectory which recursively scans the tempDir and creates one layer
// per file, preserving nested directory structure with filepath annotations.
// If createdTime is non-nil, it is used as the creation timestamp for the OCI config
// to produce deterministic digests. Otherwise time.Now() is used.
func buildSafetensorsModelV02(tempDir string, createdTime *time.Time) (types.ModelArtifact, error) {
func buildDirectoryModelV02(tempDir string, createdTime *time.Time, allowNoStandardWeights bool) (types.ModelArtifact, error) {
var dirOpts []builder.DirectoryOption
if createdTime != nil {
dirOpts = append(dirOpts, builder.WithCreatedTime(*createdTime))
}
if allowNoStandardWeights {
dirOpts = append(dirOpts, builder.WithAllowNoWeightFiles())
}

b, err := builder.FromDirectory(tempDir, dirOpts...)
if err != nil {
Expand Down
34 changes: 34 additions & 0 deletions pkg/distribution/huggingface/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,40 @@ func FilterModelFiles(repoFiles []RepoFile) (weights []RepoFile, configs []RepoF
return weights, configs
}

// IsOpenVINOModel returns true when the repository contains at least one OpenVINO
// IR weight pair (.xml + .bin with the same stem), including nested paths.
func IsOpenVINOModel(repoFiles []RepoFile) bool {
xmlFiles := make(map[string]struct{})
binFiles := make(map[string]struct{})

for _, f := range repoFiles {
if f.Type != "file" {
continue
}

ext := strings.ToLower(path.Ext(f.Path))
if ext != ".xml" && ext != ".bin" {
continue
}

stem := strings.TrimSuffix(f.Path, path.Ext(f.Path))
switch ext {
case ".xml":
xmlFiles[stem] = struct{}{}
case ".bin":
binFiles[stem] = struct{}{}
}
}

for stem := range xmlFiles {
if _, ok := binFiles[stem]; ok {
return true
}
}

return false
}

// TotalSize calculates the total size of files
func TotalSize(repoFiles []RepoFile) int64 {
var total int64
Expand Down
50 changes: 50 additions & 0 deletions pkg/distribution/huggingface/repository_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,53 @@ func TestIsSafetensorsModel(t *testing.T) {
})
}
}

func TestIsOpenVINOModel(t *testing.T) {
tests := []struct {
name string
files []RepoFile
want bool
}{
{
name: "matching xml/bin pair at root",
files: []RepoFile{
{Type: "file", Path: "openvino_model.xml"},
{Type: "file", Path: "openvino_model.bin"},
},
want: true,
},
{
name: "matching xml/bin pair in subdirectory",
files: []RepoFile{
{Type: "file", Path: "int4/openvino_model.xml"},
{Type: "file", Path: "int4/openvino_model.bin"},
{Type: "file", Path: "int4/config.json"},
},
want: true,
},
{
name: "xml without matching bin",
files: []RepoFile{
{Type: "file", Path: "openvino_model.xml"},
{Type: "file", Path: "other_model.bin"},
},
want: false,
},
{
name: "no openvino files",
files: []RepoFile{
{Type: "file", Path: "model.safetensors"},
{Type: "file", Path: "config.json"},
},
want: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsOpenVINOModel(tt.files); got != tt.want {
t.Errorf("IsOpenVINOModel() = %v, want %v", got, tt.want)
}
})
}
}
6 changes: 6 additions & 0 deletions pkg/envconfig/envconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ func VLLMMetalServerPath() string {
return Var("VLLM_METAL_SERVER_PATH")
}

// OVMSServerPath returns the optional path to the OVMS server binary.
// Configured via OVMS_SERVER_PATH.
func OVMSServerPath() string {
return Var("OVMS_SERVER_PATH")
}

// LogDir returns the directory containing DMR log files.
// Configured via MODEL_RUNNER_LOG_DIR. When empty, the server
// auto-creates a default log directory so that the /logs API
Expand Down
Loading
Loading