vllm: Linux support only

doringeman · doringeman · commit b2ffe15be4f1 · 2025-10-16T15:04:02.000+03:00
Signed-off-by: Dorin Geman &lt;dorin.geman@docker.com&gt;
diff --git a/pkg/distribution/distribution/client.go b/pkg/distribution/distribution/client.go
@@ -6,17 +6,17 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"runtime"
 	"slices"
 
-	"github.com/docker/model-runner/pkg/distribution/internal/utils"
 	"github.com/sirupsen/logrus"
 
 	"github.com/docker/model-runner/pkg/distribution/internal/progress"
 	"github.com/docker/model-runner/pkg/distribution/internal/store"
+	"github.com/docker/model-runner/pkg/distribution/internal/utils"
 	"github.com/docker/model-runner/pkg/distribution/registry"
 	"github.com/docker/model-runner/pkg/distribution/tarball"
 	"github.com/docker/model-runner/pkg/distribution/types"
+	"github.com/docker/model-runner/pkg/inference/platform"
 )
 
 // Client provides model distribution functionality
@@ -403,7 +403,7 @@ func (c *Client) GetBundle(ref string) (types.ModelBundle, error) {
 }
 
 func GetSupportedFormats() []types.Format {
-	if runtime.GOOS == "linux" {
+	if platform.SupportsVLLM() {
 		return []types.Format{types.FormatGGUF, types.FormatSafetensors}
 	}
 	return []types.Format{types.FormatGGUF}
diff --git a/pkg/distribution/distribution/client_test.go b/pkg/distribution/distribution/client_test.go
@@ -13,7 +13,6 @@ import (
 	"net/url"
 	"os"
 	"path/filepath"
-	"runtime"
 	"strings"
 	"testing"
 
@@ -27,6 +26,7 @@ import (
 	"github.com/docker/model-runner/pkg/distribution/internal/progress"
 	"github.com/docker/model-runner/pkg/distribution/internal/safetensors"
 	mdregistry "github.com/docker/model-runner/pkg/distribution/registry"
+	"github.com/docker/model-runner/pkg/inference/platform"
 )
 
 var (
@@ -464,7 +464,7 @@ func TestClientPullModel(t *testing.T) {
 
 		// Try to pull the safetensors model
 		err = testClient.PullModel(context.Background(), tag, nil)
-		if runtime.GOOS == "linux" {
+		if platform.SupportsVLLM() {
 			// On Linux, safetensors should be supported
 			if err != nil {
 				t.Fatalf("Expected no error on Linux, got: %v", err)
diff --git a/pkg/inference/backends/vllm/vllm.go b/pkg/inference/backends/vllm/vllm.go
@@ -13,16 +13,19 @@ import (
 	"runtime"
 	"strings"
 
+	"github.com/docker/model-runner/pkg/diskusage"
 	"github.com/docker/model-runner/pkg/inference"
 	"github.com/docker/model-runner/pkg/inference/models"
+	"github.com/docker/model-runner/pkg/inference/platform"
 	"github.com/docker/model-runner/pkg/logging"
 	"github.com/docker/model-runner/pkg/sandbox"
 	"github.com/docker/model-runner/pkg/tailbuffer"
 )
 
 const (
 	// Name is the backend name.
-	Name = "vllm"
+	Name    = "vllm"
+	vllmDir = "/opt/vllm-env/bin/"
 )
 
 // vLLM is the vLLM-based backend implementation.
@@ -64,28 +67,48 @@ func (v *vLLM) UsesExternalModelManagement() bool {
 	return false
 }
 
-func (v *vLLM) Install(ctx context.Context, httpClient *http.Client) error {
+func (v *vLLM) Install(_ context.Context, _ *http.Client) error {
+	if !platform.SupportsVLLM() {
+		return errors.New("not implemented")
+	}
+
+	vllmBinaryPath := v.binaryPath()
+	if _, err := os.Stat(vllmBinaryPath); err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return fmt.Errorf("vLLM binary not found at %s", vllmBinaryPath)
+		}
+		return fmt.Errorf("failed to check vLLM binary: %w", err)
+	}
+
+	// TODO: Find a way to get vllm's version. Running `vllm --version` is too slow.
+	v.status = "running"
+
 	return nil
 }
 
-func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, mode inference.BackendMode, config *inference.BackendConfiguration) error {
+func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, _ inference.BackendMode, _ *inference.BackendConfiguration) error {
+	if !platform.SupportsVLLM() {
+		v.log.Warn("vLLM backend is not yet supported")
+		return errors.New("not implemented")
+	}
+
 	bundle, err := v.modelManager.GetBundle(model)
 	if err != nil {
 		return fmt.Errorf("failed to get model: %w", err)
 	}
 
 	if err := os.RemoveAll(socket); err != nil && !errors.Is(err, fs.ErrNotExist) {
-		v.log.Warnf("failed to remove socket file %s: %w\n", socket, err)
+		v.log.Warnf("failed to remove socket file %s: %v\n", socket, err)
 		v.log.Warnln("vLLM may not be able to start")
 	}
 
-	binPath := "/opt/vllm-env/bin"
 	args := []string{
 		"serve",
 		filepath.Dir(bundle.SafetensorsPath()),
 		"--uds", socket,
 		"--served-model-name", modelRef,
 	}
+	// TODO: Add inference.BackendConfiguration.
 
 	v.log.Infof("vLLM args: %v", args)
 	tailBuf := tailbuffer.NewTailBuffer(1024)
@@ -104,8 +127,8 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m
 			command.Stdout = serverLogStream
 			command.Stderr = out
 		},
-		binPath,
-		filepath.Join(binPath, "vllm"),
+		vllmDir,
+		v.binaryPath(),
 		args...,
 	)
 	if err != nil {
@@ -120,7 +143,7 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m
 
 		errOutput := new(strings.Builder)
 		if _, err := io.Copy(errOutput, tailBuf); err != nil {
-			v.log.Warnf("failed to read server output tail: %w", err)
+			v.log.Warnf("failed to read server output tail: %v", err)
 		}
 
 		if len(errOutput.String()) != 0 {
@@ -132,7 +155,7 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m
 		vllmErrors <- vllmErr
 		close(vllmErrors)
 		if err := os.Remove(socket); err != nil && !errors.Is(err, fs.ErrNotExist) {
-			v.log.Warnf("failed to remove socket file %s on exit: %w\n", socket, err)
+			v.log.Warnf("failed to remove socket file %s on exit: %v\n", socket, err)
 		}
 	}()
 	defer func() {
@@ -153,18 +176,28 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m
 }
 
 func (v *vLLM) Status() string {
-	return "enabled"
+	return v.status
 }
 
 func (v *vLLM) GetDiskUsage() (int64, error) {
-	// TODO implement me
-	return 0, nil
+	size, err := diskusage.Size(vllmDir)
+	if err != nil {
+		return 0, fmt.Errorf("error while getting store size: %v", err)
+	}
+	return size, nil
 }
 
-func (v *vLLM) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (inference.RequiredMemory, error) {
-	// TODO implement me
+func (v *vLLM) GetRequiredMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (inference.RequiredMemory, error) {
+	if !platform.SupportsVLLM() {
+		return inference.RequiredMemory{}, errors.New("not implemented")
+	}
+
 	return inference.RequiredMemory{
 		RAM:  1,
 		VRAM: 1,
 	}, nil
 }
+
+func (v *vLLM) binaryPath() string {
+	return filepath.Join(vllmDir, "vllm")
+}
diff --git a/pkg/inference/platform/platform.go b/pkg/inference/platform/platform.go
@@ -0,0 +1,8 @@
+package platform
+
+import "runtime"
+
+// SupportsVLLM returns true if vLLM is supported on the current platform.
+func SupportsVLLM() bool {
+	return runtime.GOOS == "linux"
+}