@@ -320,15 +320,35 @@ func (d *Docker) WaitForService(ctx context.Context, serviceID string, timeout t
320320 ctx , cancel := context .WithTimeout (ctx , timeout )
321321 defer cancel ()
322322
323- errChan := make (chan error )
324- go func () {
325- for {
323+ // The Docker Swarm API doesn't give us a reliable way to wait until the
324+ // service spec has been propagated, so calling the inspect endpoint
325+ // immediately after update can return a stale result. This ticker not only
326+ // gives us an elegant way to poll, it also gives us a brief delay before
327+ // the polling starts. That delay should be enough for the service spec to
328+ // propagate.
329+ //
330+ // For comparison, the Docker CLI validates that the service hits a steady
331+ // state for a specified time period, which defaults to 5 seconds:
332+ // https://github.com/docker/cli/blob/8ed44f916fa908a04f03f69369bc2a16e0db7cc9/cli/command/service/progress/progress.go#L143
333+ // We can use a simpler implementation because our service always has a
334+ // health check.
335+ ticker := time .NewTicker (5 * time .Second )
336+ defer ticker .Stop ()
337+
338+ for {
339+ select {
340+ case <- ctx .Done ():
341+ return ctx .Err ()
342+ case <- ticker .C :
326343 service , _ , err := d .client .ServiceInspectWithRaw (ctx , serviceID , types.ServiceInspectOptions {})
327344 if err != nil {
328- errChan <- fmt .Errorf ("failed to inspect service: %w" , errTranslate (err ))
345+ return fmt .Errorf ("failed to inspect service: %w" , errTranslate (err ))
329346 }
330347 if service .Spec .Mode .Replicated == nil {
331- errChan <- fmt .Errorf ("WaitForServiceScale is only usable for replicated services: %w" , err )
348+ return fmt .Errorf ("WaitForService is only usable for replicated services: %w" , err )
349+ }
350+ if service .UpdateStatus != nil && service .UpdateStatus .State != swarm .UpdateStateCompleted {
351+ continue
332352 }
333353 var desired uint64
334354 if r := service .Spec .Mode .Replicated .Replicas ; r != nil {
@@ -341,26 +361,18 @@ func (d *Docker) WaitForService(ctx context.Context, serviceID string, timeout t
341361 }),
342362 })
343363 if err != nil {
344- errChan <- fmt .Errorf ("failed to list tasks for service: %w" , errTranslate (err ))
364+ return fmt .Errorf ("failed to list tasks for service: %w" , errTranslate (err ))
345365 }
346366 var running uint64
347367 for _ , t := range tasks {
348- if t .Status .State == swarm .TaskStateRunning {
368+ if t .DesiredState == swarm . TaskStateRunning && t . Status .State == swarm .TaskStateRunning {
349369 running ++
350370 }
351371 }
352372 if running == desired {
353- errChan <- nil
373+ return nil
354374 }
355- time .Sleep (time .Second )
356375 }
357- }()
358-
359- select {
360- case err := <- errChan :
361- return err
362- case <- ctx .Done ():
363- return fmt .Errorf ("timed out waiting for service %q to scale" , serviceID )
364376 }
365377}
366378
0 commit comments