Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,92 @@ curl http://localhost:8080/metrics

Check [METRICS.md](./METRICS.md) for more details.

## ollama Integration

Docker Model Runner supports running ollama as an alternative runner. This allows you to use ollama's model format and API alongside Docker Model Runner.

### Installing ollama Runner

To install the ollama runner instead of the default Docker Model Runner:

```bash
docker model install-runner --ollama
```

This will:
- Start an ollama container on port 11434 (the standard ollama port)
- Create an `ollama` volume for model storage (instead of `docker-model-runner-models`)
- Use the `ollama/ollama:latest` image

### GPU Support for ollama

ollama supports both NVIDIA CUDA and AMD ROCm GPUs:

```bash
# For AMD GPUs with ROCm
docker model install-runner --ollama --gpu rocm

# For NVIDIA GPUs (auto-detected)
docker model install-runner --ollama --gpu auto
```

The `--gpu rocm` flag will use the `ollama/ollama:rocm` image which includes ROCm support.

### Managing ollama Runner

All standard runner commands support the `--ollama` flag:

```bash
# Start ollama runner
docker model start-runner --ollama

# Stop ollama runner
docker model stop-runner --ollama

# Restart ollama runner
docker model restart-runner --ollama

# Reinstall ollama runner
docker model reinstall-runner --ollama

# Uninstall ollama runner (optionally remove images)
docker model uninstall-runner --ollama --images
```

### Using ollama Models

Models from `ollama.com` are automatically detected:

```bash
# Pull an ollama model (will auto-start ollama runner)
docker model pull ollama.com/library/smollm:135m

# Run an ollama model
docker model run ollama.com/library/smollm:135m
```

**Note:** Direct ollama API integration is currently in development. For now, you can interact with ollama models using:

```bash
# Pull a model
docker exec docker-ollama-runner ollama pull smollm:135m

# Run a model interactively
docker exec -it docker-ollama-runner ollama run smollm:135m

# List ollama models
docker exec docker-ollama-runner ollama list
```

### ollama vs Docker Model Runner

Key differences:

- **Port**: ollama uses port 11434, Docker Model Runner uses 12434 (Docker Engine) or 12435 (Cloud)
- **Volume**: ollama uses the `ollama` volume, Docker Model Runner uses `docker-model-runner-models`
- **Image**: ollama uses `ollama/ollama:latest` or `ollama/ollama:rocm`
- **Platform support**: ollama controller containers are used on all platforms (including Docker Desktop)

## Kubernetes

Experimental support for running in Kubernetes is available
Expand Down
156 changes: 140 additions & 16 deletions cmd/cli/commands/install-runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,80 @@ func inspectStandaloneRunner(container container.Summary) *standaloneRunner {
return result
}

// ensureOllamaRunnerAvailable is a utility function that ensures an ollama runner
// is available. Unlike the regular runner, ollama runners are always used via
// controller containers on all platforms.
func ensureOllamaRunnerAvailable(ctx context.Context, printer standalone.StatusPrinter) (*standaloneRunner, error) {
// For ollama, we always use controller containers on all platforms
// If automatic installation has been disabled, then don't do anything.
if os.Getenv("MODEL_RUNNER_NO_AUTO_INSTALL") != "" {
return nil, nil
}

// Ensure that the output printer is non-nil.
if printer == nil {
printer = standalone.NoopPrinter()
}

// Create a Docker client for the active context.
dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
if err != nil {
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}

// Check if an ollama runner container exists.
containerID, _, container, err := standalone.FindOllamaControllerContainer(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to identify existing ollama runner: %w", err)
} else if containerID != "" {
return inspectStandaloneRunner(container), nil
}

// Automatically determine GPU support.
gpu, err := gpupkg.ProbeGPUSupport(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to probe GPU support: %w", err)
}

// Ensure that we have an up-to-date copy of the ollama image.
if err := standalone.EnsureOllamaImage(ctx, dockerClient, gpu, "", printer); err != nil {
return nil, fmt.Errorf("unable to pull latest ollama image: %w", err)
}

// Ensure that we have an ollama storage volume.
modelStorageVolume, err := standalone.EnsureOllamaStorageVolume(ctx, dockerClient, printer)
if err != nil {
return nil, fmt.Errorf("unable to initialize ollama storage: %w", err)
}

// Create the ollama runner container.
port := uint16(standalone.DefaultOllamaPort)
host := "127.0.0.1"
engineKind := modelRunner.EngineKind()
environment := "moby"
if engineKind == types.ModelRunnerEngineKindCloud {
environment = "cloud"
}
if err := standalone.CreateOllamaControllerContainer(ctx, dockerClient, port, host, environment, false, gpu, "", modelStorageVolume, printer, engineKind); err != nil {
return nil, fmt.Errorf("unable to initialize ollama container: %w", err)
}

// Poll until we get a response from the ollama runner.
// Note: We reuse the same wait logic, assuming ollama responds similarly
if err := waitForStandaloneRunnerAfterInstall(ctx); err != nil {
return nil, err
Comment on lines +133 to +136
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

waitForStandaloneRunnerAfterInstall checks the Desktop model-runner status, not the ollama container, so ollama readiness will likely time out even when the daemon is up. Implement a dedicated readiness check (e.g., try HTTP to http://127.0.0.1:11434/api/tags or poll the container state) and use it here instead.

Copilot uses AI. Check for mistakes.
}
Comment on lines +135 to +137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The waitForStandaloneRunnerAfterInstall function is not suitable for waiting for the ollama runner. This function uses the global desktopClient, which is configured for the default model runner's port (e.g., 12434), not the ollama runner's port (11434). This will cause the installation to hang and eventually time out, preventing the ollama runner from being installed correctly.

A dedicated waiting logic that polls the ollama port is required.

(You will also need to add "net/http" to your imports for this suggestion to work).

Suggested change
if err := waitForStandaloneRunnerAfterInstall(ctx); err != nil {
return nil, err
}
if err := func() error {
url := fmt.Sprintf("http://127.0.0.1:%d", standalone.DefaultOllamaPort)
for tries := installWaitTries; tries > 0; tries-- {
resp, err := http.Get(url)
if err == nil {
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return nil
}
}
select {
case <-time.After(installWaitRetryInterval):
case <-ctx.Done():
return errors.New("cancelled waiting for ollama runner to initialize")
}
}
return errors.New("ollama runner took too long to initialize")
}(); err != nil {
return nil, err
}


// Find the runner container.
containerID, _, container, err = standalone.FindOllamaControllerContainer(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to identify existing ollama runner: %w", err)
} else if containerID == "" {
return nil, errors.New("ollama runner not found after installation")
}
return inspectStandaloneRunner(container), nil
}

// ensureStandaloneRunnerAvailable is a utility function that other commands can
// use to initialize a default standalone model runner. It is a no-op in
// unsupported contexts or if automatic installs have been disabled.
Expand Down Expand Up @@ -168,6 +242,7 @@ type runnerOptions struct {
doNotTrack bool
pullImage bool
pruneContainers bool
ollama bool
}

// runInstallOrStart is shared logic for install-runner and start-runner commands
Expand All @@ -191,7 +266,11 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions) error {
// Use "0" as a sentinel default flag value so it's not displayed automatically.
// The default values are written in the usage string.
// Hence, the user currently won't be able to set the port to 0 in order to get a random available port.
port = standalone.DefaultControllerPortMoby
if opts.ollama {
port = standalone.DefaultOllamaPort
} else {
port = standalone.DefaultControllerPortMoby
}
}
// HACK: If we're in a Cloud context, then we need to use a
// different default port because it conflicts with Docker Desktop's
Expand All @@ -200,7 +279,7 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions) error {
// when context detection happens. So assume that a default value
// indicates that we want the Cloud default port. This is less
// problematic in Cloud since the UX there is mostly invisible.
if engineKind == types.ModelRunnerEngineKindCloud &&
if !opts.ollama && engineKind == types.ModelRunnerEngineKindCloud &&
port == standalone.DefaultControllerPortMoby {
port = standalone.DefaultControllerPortCloud
}
Expand All @@ -219,51 +298,93 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions) error {

// If pruning containers (reinstall), remove any existing model runner containers.
if opts.pruneContainers {
if err := standalone.PruneControllerContainers(cmd.Context(), dockerClient, false, cmd); err != nil {
return fmt.Errorf("unable to remove model runner container(s): %w", err)
if opts.ollama {
if err := standalone.PruneOllamaControllerContainers(cmd.Context(), dockerClient, false, cmd); err != nil {
return fmt.Errorf("unable to remove ollama runner container(s): %w", err)
}
} else {
if err := standalone.PruneControllerContainers(cmd.Context(), dockerClient, false, cmd); err != nil {
return fmt.Errorf("unable to remove model runner container(s): %w", err)
}
}
} else {
// Check if an active model runner container already exists (install only).
if ctrID, ctrName, _, err := standalone.FindControllerContainer(cmd.Context(), dockerClient); err != nil {
var ctrID, ctrName string
var err error
if opts.ollama {
ctrID, ctrName, _, err = standalone.FindOllamaControllerContainer(cmd.Context(), dockerClient)
} else {
ctrID, ctrName, _, err = standalone.FindControllerContainer(cmd.Context(), dockerClient)
}
if err != nil {
return err
} else if ctrID != "" {
runnerType := "Model Runner"
if opts.ollama {
runnerType = "ollama runner"
}
if ctrName != "" {
cmd.Printf("Model Runner container %s (%s) is already running\n", ctrName, ctrID[:12])
cmd.Printf("%s container %s (%s) is already running\n", runnerType, ctrName, ctrID[:12])
} else {
cmd.Printf("Model Runner container %s is already running\n", ctrID[:12])
cmd.Printf("%s container %s is already running\n", runnerType, ctrID[:12])
}
return nil
}
}
Comment on lines 299 to 333
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This function contains a significant amount of duplicated logic within if opts.ollama blocks for handling different runner types. This reduces readability and maintainability.

You can refactor this by defining runner-specific configurations at the beginning of the function and then using a single, unified code path. The standalone package has already been refactored with generic functions like PruneControllerContainersByType and FindControllerContainerByType that you can leverage here.

This same pattern of duplication appears later in the function when ensuring images and volumes, and creating the container, which could also be refactored similarly.

runnerTypeStr := "Model Runner"
runnerTypeLabel := "model-runner"
pruneErrMsg := "unable to remove model runner container(s): %w"
if opts.ollama {
	runnerTypeStr = "ollama runner"
	runnerTypeLabel = "ollama"
	pruneErrMsg = "unable to remove ollama runner container(s): %w"
}

// If pruning containers (reinstall), remove any existing model runner containers.
if opts.pruneContainers {
	if err := standalone.PruneControllerContainersByType(cmd.Context(), dockerClient, false, cmd, runnerTypeLabel); err != nil {
		return fmt.Errorf(pruneErrMsg, err)
	}
} else {
	// Check if an active model runner container already exists (install only).
	ctrID, ctrName, _, err := standalone.FindControllerContainerByType(cmd.Context(), dockerClient, runnerTypeLabel)
	if err != nil {
		return err
	} else if ctrID != "" {
		if ctrName != "" {
			cmd.Printf("%s container %s (%s) is already running\n", runnerTypeStr, ctrName, ctrID[:12])
		} else {
			cmd.Printf("%s container %s is already running\n", runnerTypeStr, ctrID[:12])
		}
		return nil
	}
}


// Determine GPU support.
var gpu gpupkg.GPUSupport
var gpuVariant string
if opts.gpuMode == "auto" {
gpu, err = gpupkg.ProbeGPUSupport(cmd.Context(), dockerClient)
if err != nil {
return fmt.Errorf("unable to probe GPU support: %w", err)
}
} else if opts.gpuMode == "cuda" {
gpu = gpupkg.GPUSupportCUDA
} else if opts.gpuMode == "rocm" {
gpu = gpupkg.GPUSupportROCm
gpuVariant = "rocm"
} else if opts.gpuMode != "none" {
return fmt.Errorf("unknown GPU specification: %q", opts.gpuMode)
}

// Ensure that we have an up-to-date copy of the image, if requested.
if opts.pullImage {
if err := standalone.EnsureControllerImage(cmd.Context(), dockerClient, gpu, cmd); err != nil {
return fmt.Errorf("unable to pull latest standalone model runner image: %w", err)
if opts.ollama {
if err := standalone.EnsureOllamaImage(cmd.Context(), dockerClient, gpu, gpuVariant, cmd); err != nil {
return fmt.Errorf("unable to pull latest ollama image: %w", err)
}
} else {
if err := standalone.EnsureControllerImage(cmd.Context(), dockerClient, gpu, cmd); err != nil {
return fmt.Errorf("unable to pull latest standalone model runner image: %w", err)
}
}
}

// Ensure that we have a model storage volume.
modelStorageVolume, err := standalone.EnsureModelStorageVolume(cmd.Context(), dockerClient, cmd)
if err != nil {
return fmt.Errorf("unable to initialize standalone model storage: %w", err)
var modelStorageVolume string
if opts.ollama {
modelStorageVolume, err = standalone.EnsureOllamaStorageVolume(cmd.Context(), dockerClient, cmd)
if err != nil {
return fmt.Errorf("unable to initialize ollama storage: %w", err)
}
} else {
modelStorageVolume, err = standalone.EnsureModelStorageVolume(cmd.Context(), dockerClient, cmd)
if err != nil {
return fmt.Errorf("unable to initialize standalone model storage: %w", err)
}
}

// Create the model runner container.
if err := standalone.CreateControllerContainer(cmd.Context(), dockerClient, port, opts.host, environment, opts.doNotTrack, gpu, modelStorageVolume, cmd, engineKind); err != nil {
return fmt.Errorf("unable to initialize standalone model runner container: %w", err)
if opts.ollama {
if err := standalone.CreateOllamaControllerContainer(cmd.Context(), dockerClient, port, opts.host, environment, opts.doNotTrack, gpu, gpuVariant, modelStorageVolume, cmd, engineKind); err != nil {
return fmt.Errorf("unable to initialize ollama container: %w", err)
}
} else {
if err := standalone.CreateControllerContainer(cmd.Context(), dockerClient, port, opts.host, environment, opts.doNotTrack, gpu, modelStorageVolume, cmd, engineKind); err != nil {
return fmt.Errorf("unable to initialize standalone model runner container: %w", err)
}
}

// Poll until we get a response from the model runner.
Expand All @@ -275,6 +396,7 @@ func newInstallRunner() *cobra.Command {
var host string
var gpuMode string
var doNotTrack bool
var ollama bool
c := &cobra.Command{
Use: "install-runner",
Short: "Install Docker Model Runner (Docker Engine only)",
Expand All @@ -286,14 +408,16 @@ func newInstallRunner() *cobra.Command {
doNotTrack: doNotTrack,
pullImage: true,
pruneContainers: false,
ollama: ollama,
})
},
ValidArgsFunction: completion.NoComplete,
}
c.Flags().Uint16Var(&port, "port", 0,
"Docker container port for Docker Model Runner (default: 12434 for Docker Engine, 12435 for Cloud mode)")
"Docker container port for Docker Model Runner (default: 12434 for Docker Engine, 12435 for Cloud mode, 11434 for ollama)")
c.Flags().StringVar(&host, "host", "127.0.0.1", "Host address to bind Docker Model Runner")
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda)")
c.Flags().StringVar(&gpuMode, "gpu", "auto", "Specify GPU support (none|auto|cuda|rocm)")
c.Flags().BoolVar(&doNotTrack, "do-not-track", false, "Do not track models usage in Docker Model Runner")
c.Flags().BoolVar(&ollama, "ollama", false, "Use ollama runner instead of Docker Model Runner")
return c
}
Loading
Loading