Skip to content

Commit 7f3479b

Browse files
authored
Merge pull request docker#107 from docker/container-start-cleanup-rebase
Clean up standalone container startup using Moby idioms
2 parents c5fea3a + 462ef1a commit 7f3479b

File tree

1 file changed

+44
-52
lines changed

1 file changed

+44
-52
lines changed

cmd/cli/pkg/standalone/containers.go

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ import (
66
"context"
77
"errors"
88
"fmt"
9+
"io"
910
"os"
10-
"regexp"
1111
"strconv"
1212
"strings"
1313
"time"
1414

15+
"github.com/containerd/errdefs"
1516
"github.com/docker/docker/api/types/container"
1617
"github.com/docker/docker/api/types/filters"
1718
"github.com/docker/docker/api/types/mount"
@@ -25,11 +26,6 @@ import (
2526
// controllerContainerName is the name to use for the controller container.
2627
const controllerContainerName = "docker-model-runner"
2728

28-
// concurrentInstallMatcher matches error message that indicate a concurrent
29-
// standalone model runner installation is taking place. It extracts the ID of
30-
// the conflicting container in a capture group.
31-
var concurrentInstallMatcher = regexp.MustCompile(`is already in use by container "([a-z0-9]+)"`)
32-
3329
// copyDockerConfigToContainer copies the Docker config file from the host to the container
3430
// and sets up proper ownership and permissions for the modelrunner user.
3531
// It does nothing for Desktop and Cloud engine kinds.
@@ -177,38 +173,33 @@ func determineBridgeGatewayIP(ctx context.Context, dockerClient client.NetworkAP
177173
return "", nil
178174
}
179175

180-
// waitForContainerToStart waits for a container to start.
181-
func waitForContainerToStart(ctx context.Context, dockerClient client.ContainerAPIClient, containerID string) error {
182-
// Unfortunately the Docker API's /containers/{id}/wait API (and the
183-
// corresponding Client.ContainerWait method) don't allow waiting for
184-
// container startup, so instead we'll take a polling approach.
176+
// ensureContainerStarted ensures that a container has started. It may be called
177+
// concurrently, taking advantage of the fact that ContainerStart is idempotent.
178+
func ensureContainerStarted(ctx context.Context, dockerClient client.ContainerAPIClient, containerID string) error {
185179
for i := 10; i > 0; i-- {
186-
if status, err := dockerClient.ContainerInspect(ctx, containerID); err != nil {
187-
// There is a small gap between the time that a container ID and
188-
// name are registered and the time that the container is actually
189-
// created and shows up in container list and inspect requests:
190-
//
191-
// https://github.com/moby/moby/blob/de24c536b0ea208a09e0fff3fd896c453da6ef2e/daemon/container.go#L138-L156
192-
//
193-
// Given that multiple install operations tend to end up tightly
194-
// synchronized by the preceeding pull operation and that this
195-
// method is specifically designed to work around these race
196-
// conditions, we'll allow 404 errors to pass silently (at least up
197-
// until the polling time out - unfortunately we can't make the 404
198-
// acceptance window any smaller than that because the CUDA-based
199-
// containers are large and can take time to create).
200-
if !strings.Contains(err.Error(), "No such container") {
201-
return fmt.Errorf("unable to inspect container (%s): %w", containerID[:12], err)
202-
}
203-
} else {
204-
switch status.State.Status {
205-
case container.StateRunning:
206-
return nil
207-
case container.StateCreated, container.StateRestarting:
208-
// wait for container to start
209-
default:
210-
return fmt.Errorf("container is in unexpected state %q", status.State.Status)
211-
}
180+
err := dockerClient.ContainerStart(ctx, containerID, container.StartOptions{})
181+
if err == nil {
182+
return nil
183+
}
184+
// There is a small gap between the time that a container ID and
185+
// name are registered and the time that the container is actually
186+
// created and shows up in container list and inspect requests:
187+
//
188+
// https://github.com/moby/moby/blob/de24c536b0ea208a09e0fff3fd896c453da6ef2e/daemon/container.go#L138-L156
189+
//
190+
// Given that multiple install operations tend to end up tightly
191+
// synchronized by the preceeding pull operation and that this
192+
// method is specifically designed to work around these race
193+
// conditions, we'll allow 404 errors to pass silently (at least up
194+
// until the polling time out - unfortunately we can't make the 404
195+
// acceptance window any smaller than that because the CUDA-based
196+
// containers are large and can take time to create).
197+
//
198+
// For some reason, this error case can also manifest as an EOF on the
199+
// request (I'm not sure where this arises in the Moby server), so we'll
200+
// let that pass silently too.
201+
if !(errdefs.IsNotFound(err) || errors.Is(err, io.EOF)) {
202+
return err
212203
}
213204
if i > 1 {
214205
select {
@@ -280,30 +271,31 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client,
280271
}
281272

282273
// Create the container. If we detect that a concurrent installation is in
283-
// progress, then we wait for whichever install process creates the
284-
// container first and then wait for its container to be ready.
274+
// progress (as indicated by a conflicting container name (which should have
275+
// been detected just before installation)), then we'll allow the error to
276+
// pass silently and simply work in conjunction with any concurrent
277+
// installers to start the container.
285278
resp, err := dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, controllerContainerName)
286-
if err != nil {
287-
if match := concurrentInstallMatcher.FindStringSubmatch(err.Error()); match != nil {
288-
if err := waitForContainerToStart(ctx, dockerClient, match[1]); err != nil {
289-
return fmt.Errorf("failed waiting for concurrent installation: %w", err)
290-
}
291-
return nil
292-
}
279+
if err != nil && !errdefs.IsConflict(err) {
293280
return fmt.Errorf("failed to create container %s: %w", controllerContainerName, err)
294281
}
282+
created := err == nil
295283

296284
// Start the container.
297285
printer.Printf("Starting model runner container %s...\n", controllerContainerName)
298-
if err := dockerClient.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
299-
_ = dockerClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
286+
if err := ensureContainerStarted(ctx, dockerClient, controllerContainerName); err != nil {
287+
if created {
288+
_ = dockerClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
289+
}
300290
return fmt.Errorf("failed to start container %s: %w", controllerContainerName, err)
301291
}
302292

303-
// Copy Docker config file if it exists
304-
if err := copyDockerConfigToContainer(ctx, dockerClient, resp.ID, engineKind); err != nil {
305-
// Log warning but continue - don't fail container creation
306-
printer.Printf("Warning: failed to copy Docker config: %v\n", err)
293+
// Copy Docker config file if it exists and we're the container creator.
294+
if created {
295+
if err := copyDockerConfigToContainer(ctx, dockerClient, resp.ID, engineKind); err != nil {
296+
// Log warning but continue - don't fail container creation
297+
printer.Printf("Warning: failed to copy Docker config: %v\n", err)
298+
}
307299
}
308300
return nil
309301
}

0 commit comments

Comments
 (0)