@@ -4,14 +4,18 @@ package libpod
4
4
5
5
import (
6
6
"context"
7
+ "errors"
7
8
"fmt"
8
9
"strings"
10
+ "sync"
9
11
10
12
"github.com/containers/podman/v5/libpod/define"
13
+ "github.com/containers/podman/v5/pkg/parallel"
11
14
"github.com/sirupsen/logrus"
12
15
)
13
16
14
17
type containerNode struct {
18
+ lock sync.Mutex
15
19
id string
16
20
container * Container
17
21
dependsOn []* containerNode
@@ -284,99 +288,241 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError
284
288
}
285
289
}
286
290
287
- // Visit a node on the container graph and remove it, or set an error if it
288
- // failed to remove. Only intended for use in pod removal; do *not* use when
289
- // removing individual containers.
290
- // All containers are assumed to be *UNLOCKED* on running this function.
291
- // Container locks will be acquired as necessary.
292
- // Pod and infraID are optional. If a pod is given it must be *LOCKED*.
293
- func removeNode (ctx context.Context , node * containerNode , pod * Pod , force bool , timeout * uint , setError bool , ctrErrors map [string ]error , ctrsVisited map [string ]bool , ctrNamedVolumes map [string ]* ContainerNamedVolume ) {
291
+ // Contains all details required for traversing the container graph.
292
+ type nodeTraversal struct {
293
+ // Protects reads and writes to the two maps.
294
+ lock sync.Mutex
295
+ // Optional. but *MUST* be locked.
296
+ // Should NOT be changed once a traversal is started.
297
+ pod * Pod
298
+ // Function to execute on the individual container being acted on.
299
+ // Should NOT be changed once a traversal is started.
300
+ actionFunc func (ctr * Container , pod * Pod ) error
301
+ // Shared list of errors for all containers currently acted on.
302
+ ctrErrors map [string ]error
303
+ // Shared list of what containers have been visited.
304
+ ctrsVisited map [string ]bool
305
+ }
306
+
307
+ // Perform a traversal of the graph in an inwards direction - meaning from nodes
308
+ // with no dependencies, recursing inwards to the nodes they depend on.
309
+ // Safe to run in parallel on multiple nodes.
310
+ func traverseNodeInwards (node * containerNode , nodeDetails * nodeTraversal , setError bool ) {
311
+ node .lock .Lock ()
312
+
294
313
// If we already visited this node, we're done.
295
- if ctrsVisited [node .id ] {
314
+ nodeDetails .lock .Lock ()
315
+ visited := nodeDetails .ctrsVisited [node .id ]
316
+ nodeDetails .lock .Unlock ()
317
+ if visited {
318
+ node .lock .Unlock ()
296
319
return
297
320
}
298
321
299
322
// Someone who depends on us failed.
300
323
// Mark us as failed and recurse.
301
324
if setError {
302
- ctrsVisited [node .id ] = true
303
- ctrErrors [node .id ] = fmt .Errorf ("a container that depends on container %s could not be removed: %w" , node .id , define .ErrCtrStateInvalid )
325
+ nodeDetails .lock .Lock ()
326
+ nodeDetails .ctrsVisited [node .id ] = true
327
+ nodeDetails .ctrErrors [node .id ] = fmt .Errorf ("a container that depends on container %s could not be stopped: %w" , node .id , define .ErrCtrStateInvalid )
328
+ nodeDetails .lock .Unlock ()
329
+
330
+ node .lock .Unlock ()
304
331
305
332
// Hit anyone who depends on us, set errors there as well.
306
333
for _ , successor := range node .dependsOn {
307
- removeNode ( ctx , successor , pod , force , timeout , true , ctrErrors , ctrsVisited , ctrNamedVolumes )
334
+ traverseNodeInwards ( successor , nodeDetails , true )
308
335
}
336
+
337
+ return
309
338
}
310
339
311
340
// Does anyone still depend on us?
312
- // Cannot remove if true. Once all our dependencies have been removed ,
313
- // we will be removed .
341
+ // Cannot stop if true. Once all our dependencies have been stopped ,
342
+ // we will be stopped .
314
343
for _ , dep := range node .dependedOn {
315
344
// The container that depends on us hasn't been removed yet.
316
345
// OK to continue on
317
- if ok := ctrsVisited [dep .id ]; ! ok {
346
+ nodeDetails .lock .Lock ()
347
+ ok := nodeDetails .ctrsVisited [dep .id ]
348
+ nodeDetails .lock .Unlock ()
349
+ if ! ok {
350
+ node .lock .Unlock ()
318
351
return
319
352
}
320
353
}
321
354
322
- // Going to try to remove the node, mark us as visited
323
- ctrsVisited [node .id ] = true
324
-
325
355
ctrErrored := false
356
+ if err := nodeDetails .actionFunc (node .container , nodeDetails .pod ); err != nil {
357
+ ctrErrored = true
358
+ nodeDetails .lock .Lock ()
359
+ nodeDetails .ctrErrors [node .id ] = err
360
+ nodeDetails .lock .Unlock ()
361
+ }
326
362
327
- // Verify that all that depend on us are gone.
328
- // Graph traversal should guarantee this is true, but this isn't that
329
- // expensive, and it's better to be safe.
330
- for _ , dep := range node .dependedOn {
331
- if _ , err := node .container .runtime .GetContainer (dep .id ); err == nil {
332
- ctrErrored = true
333
- ctrErrors [node .id ] = fmt .Errorf ("a container that depends on container %s still exists: %w" , node .id , define .ErrDepExists )
334
- }
363
+ // Mark as visited *only after* finished with operation.
364
+ // This ensures that the operation has completed, one way or the other.
365
+ // If an error was set, only do this after the viral ctrErrored
366
+ // propagates in traverseNodeInwards below.
367
+ // Same with the node lock - we don't want to release it until we are
368
+ // marked as visited.
369
+ if ! ctrErrored {
370
+ nodeDetails .lock .Lock ()
371
+ nodeDetails .ctrsVisited [node .id ] = true
372
+ nodeDetails .lock .Unlock ()
373
+
374
+ node .lock .Unlock ()
335
375
}
336
376
337
- // Lock the container
338
- node .container .lock .Lock ()
377
+ // Recurse to anyone who we depend on and work on them
378
+ for _ , successor := range node .dependsOn {
379
+ traverseNodeInwards (successor , nodeDetails , ctrErrored )
380
+ }
339
381
340
- // Gate all subsequent bits behind a ctrErrored check - we don't want to
341
- // proceed if a previous step failed.
342
- if ! ctrErrored {
343
- if err := node .container .syncContainer (); err != nil {
344
- ctrErrored = true
345
- ctrErrors [node .id ] = err
382
+ // If we propagated an error, finally mark us as visited here, after
383
+ // all nodes we traverse to have already been marked failed.
384
+ // If we don't do this, there is a race condition where a node could try
385
+ // and perform its operation before it was marked failed by the
386
+ // traverseNodeInwards triggered by this process.
387
+ if ctrErrored {
388
+ nodeDetails .lock .Lock ()
389
+ nodeDetails .ctrsVisited [node .id ] = true
390
+ nodeDetails .lock .Unlock ()
391
+
392
+ node .lock .Unlock ()
393
+ }
394
+ }
395
+
396
+ // Stop all containers in the given graph, assumed to be a graph of pod.
397
+ // Pod is mandatory and should be locked.
398
+ func stopContainerGraph (ctx context.Context , graph * ContainerGraph , pod * Pod , timeout * uint , cleanup bool ) (map [string ]error , error ) {
399
+ // Are there actually any containers in the graph?
400
+ // If not, return immediately.
401
+ if len (graph .nodes ) == 0 {
402
+ return map [string ]error {}, nil
403
+ }
404
+
405
+ nodeDetails := new (nodeTraversal )
406
+ nodeDetails .pod = pod
407
+ nodeDetails .ctrErrors = make (map [string ]error )
408
+ nodeDetails .ctrsVisited = make (map [string ]bool )
409
+
410
+ traversalFunc := func (ctr * Container , pod * Pod ) error {
411
+ ctr .lock .Lock ()
412
+ defer ctr .lock .Unlock ()
413
+
414
+ if err := ctr .syncContainer (); err != nil {
415
+ return err
416
+ }
417
+
418
+ realTimeout := ctr .config .StopTimeout
419
+ if timeout != nil {
420
+ realTimeout = * timeout
346
421
}
422
+
423
+ if err := ctr .stop (realTimeout ); err != nil && ! errors .Is (err , define .ErrCtrStateInvalid ) && ! errors .Is (err , define .ErrCtrStopped ) {
424
+ return err
425
+ }
426
+
427
+ if cleanup {
428
+ return ctr .fullCleanup (ctx , false )
429
+ }
430
+
431
+ return nil
347
432
}
433
+ nodeDetails .actionFunc = traversalFunc
348
434
349
- if ! ctrErrored {
350
- for _ , vol := range node .container .config .NamedVolumes {
435
+ doneChans := make ([]<- chan error , 0 , len (graph .notDependedOnNodes ))
436
+
437
+ // Parallel enqueue jobs for all our starting nodes.
438
+ if len (graph .notDependedOnNodes ) == 0 {
439
+ return nil , fmt .Errorf ("no containers in pod %s are not dependencies of other containers, unable to stop" , pod .ID ())
440
+ }
441
+ for _ , node := range graph .notDependedOnNodes {
442
+ doneChan := parallel .Enqueue (ctx , func () error {
443
+ traverseNodeInwards (node , nodeDetails , false )
444
+ return nil
445
+ })
446
+ doneChans = append (doneChans , doneChan )
447
+ }
448
+
449
+ // We don't care about the returns values, these functions always return nil
450
+ // But we do need all of the parallel jobs to terminate.
451
+ for _ , doneChan := range doneChans {
452
+ <- doneChan
453
+ }
454
+
455
+ return nodeDetails .ctrErrors , nil
456
+ }
457
+
458
+ // Remove all containers in the given graph
459
+ // Pod is optional, and must be locked if given.
460
+ func removeContainerGraph (ctx context.Context , graph * ContainerGraph , pod * Pod , timeout * uint , force bool ) (map [string ]* ContainerNamedVolume , map [string ]bool , map [string ]error , error ) {
461
+ // Are there actually any containers in the graph?
462
+ // If not, return immediately.
463
+ if len (graph .nodes ) == 0 {
464
+ return nil , nil , nil , nil
465
+ }
466
+
467
+ nodeDetails := new (nodeTraversal )
468
+ nodeDetails .pod = pod
469
+ nodeDetails .ctrErrors = make (map [string ]error )
470
+ nodeDetails .ctrsVisited = make (map [string ]bool )
471
+
472
+ ctrNamedVolumes := make (map [string ]* ContainerNamedVolume )
473
+
474
+ traversalFunc := func (ctr * Container , pod * Pod ) error {
475
+ ctr .lock .Lock ()
476
+ defer ctr .lock .Unlock ()
477
+
478
+ if err := ctr .syncContainer (); err != nil {
479
+ return err
480
+ }
481
+
482
+ for _ , vol := range ctr .config .NamedVolumes {
351
483
ctrNamedVolumes [vol .Name ] = vol
352
484
}
353
485
354
- if pod != nil && pod .state .InfraContainerID == node . id {
486
+ if pod != nil && pod .state .InfraContainerID == ctr . ID () {
355
487
pod .state .InfraContainerID = ""
356
488
if err := pod .save (); err != nil {
357
- ctrErrored = true
358
- ctrErrors [node .id ] = fmt .Errorf ("error removing infra container %s from pod %s: %w" , node .id , pod .ID (), err )
489
+ return fmt .Errorf ("error removing infra container %s from pod %s: %w" , ctr .ID (), pod .ID (), err )
359
490
}
360
491
}
361
- }
362
492
363
- if ! ctrErrored {
364
493
opts := ctrRmOpts {
365
494
Force : force ,
366
495
RemovePod : true ,
367
496
Timeout : timeout ,
368
497
}
369
498
370
- if _ , _ , err := node .container .runtime .removeContainer (ctx , node .container , opts ); err != nil {
371
- ctrErrored = true
372
- ctrErrors [node .id ] = err
499
+ if _ , _ , err := ctr .runtime .removeContainer (ctx , ctr , opts ); err != nil {
500
+ return err
373
501
}
502
+
503
+ return nil
374
504
}
505
+ nodeDetails .actionFunc = traversalFunc
375
506
376
- node . container . lock . Unlock ( )
507
+ doneChans := make ([] <- chan error , 0 , len ( graph . notDependedOnNodes ) )
377
508
378
- // Recurse to anyone who we depend on and remove them
379
- for _ , successor := range node . dependsOn {
380
- removeNode ( ctx , successor , pod , force , timeout , ctrErrored , ctrErrors , ctrsVisited , ctrNamedVolumes )
509
+ // Parallel enqueue jobs for all our starting nodes.
510
+ if len ( graph . notDependedOnNodes ) == 0 {
511
+ return nil , nil , nil , fmt . Errorf ( "no containers in graph are not dependencies of other containers, unable to stop" )
381
512
}
513
+ for _ , node := range graph .notDependedOnNodes {
514
+ doneChan := parallel .Enqueue (ctx , func () error {
515
+ traverseNodeInwards (node , nodeDetails , false )
516
+ return nil
517
+ })
518
+ doneChans = append (doneChans , doneChan )
519
+ }
520
+
521
+ // We don't care about the returns values, these functions always return nil
522
+ // But we do need all of the parallel jobs to terminate.
523
+ for _ , doneChan := range doneChans {
524
+ <- doneChan
525
+ }
526
+
527
+ return ctrNamedVolumes , nodeDetails .ctrsVisited , nodeDetails .ctrErrors , nil
382
528
}
0 commit comments