Skip to content

Commit 9983e87

Browse files
committed
Remove leftover autoremove containers during refresh
During system shutdown, Podman should go down gracefully, meaning that we have time to spawn cleanup processes which remove any containers set to autoremove. Unfortunately, this isn't always the case. If we get a SIGKILL because the system is going down immediately, we can't recover from this, and the autoremove containers are not removed. However, we can pick up any leftover autoremove containers when we refesh the DB state, which is the first thing Podman does after a reboot. By detecting any autoremove containers that have actually run (a container that was created but never run doesn't need to be removed) at that point and removing them, we keep the fresh boot clean, even if Podman was terminated abnormally. Fixes #21482 [NO NEW TESTS NEEDED] This requires a reboot to realistically test. Signed-off-by: Matt Heon <[email protected]>
1 parent 22b1650 commit 9983e87

File tree

1 file changed

+30
-11
lines changed

1 file changed

+30
-11
lines changed

libpod/runtime.go

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
166166
if err != nil {
167167
return nil, err
168168
}
169-
return newRuntimeFromConfig(conf, options...)
169+
return newRuntimeFromConfig(ctx, conf, options...)
170170
}
171171

172172
// NewRuntimeFromConfig creates a new container runtime using the given
@@ -175,10 +175,10 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
175175
// An error will be returned if the configuration file at the given path does
176176
// not exist or cannot be loaded
177177
func NewRuntimeFromConfig(ctx context.Context, userConfig *config.Config, options ...RuntimeOption) (*Runtime, error) {
178-
return newRuntimeFromConfig(userConfig, options...)
178+
return newRuntimeFromConfig(ctx, userConfig, options...)
179179
}
180180

181-
func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
181+
func newRuntimeFromConfig(ctx context.Context, conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
182182
runtime := new(Runtime)
183183

184184
if conf.Engine.OCIRuntime == "" {
@@ -223,7 +223,7 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
223223
return nil, fmt.Errorf("starting shutdown signal handler: %w", err)
224224
}
225225

226-
if err := makeRuntime(runtime); err != nil {
226+
if err := makeRuntime(ctx, runtime); err != nil {
227227
return nil, err
228228
}
229229

@@ -333,7 +333,7 @@ func getDBState(runtime *Runtime) (State, error) {
333333

334334
// Make a new runtime based on the given configuration
335335
// Sets up containers/storage, state store, OCI runtime
336-
func makeRuntime(runtime *Runtime) (retErr error) {
336+
func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) {
337337
// Find a working conmon binary
338338
cPath, err := runtime.config.FindConmon()
339339
if err != nil {
@@ -629,6 +629,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
629629
return err
630630
}
631631

632+
// Mark the runtime as valid - ready to be used, cannot be modified
633+
// further.
634+
// Need to do this *before* refresh as we can remove containers there.
635+
// Should not be a big deal as we don't return it to users until after
636+
// refresh runs.
637+
runtime.valid = true
638+
632639
// If we need to refresh the state, do it now - things are guaranteed to
633640
// be set up by now.
634641
if doRefresh {
@@ -639,17 +646,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
639646
}
640647
}
641648

642-
if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
649+
if err2 := runtime.refresh(ctx, runtimeAliveFile); err2 != nil {
643650
return err2
644651
}
645652
}
646653

647654
runtime.startWorker()
648655

649-
// Mark the runtime as valid - ready to be used, cannot be modified
650-
// further
651-
runtime.valid = true
652-
653656
return nil
654657
}
655658

@@ -819,7 +822,7 @@ func (r *Runtime) Shutdown(force bool) error {
819822
// Reconfigures the runtime after a reboot
820823
// Refreshes the state, recreating temporary files
821824
// Does not check validity as the runtime is not valid until after this has run
822-
func (r *Runtime) refresh(alivePath string) error {
825+
func (r *Runtime) refresh(ctx context.Context, alivePath string) error {
823826
logrus.Debugf("Podman detected system restart - performing state refresh")
824827

825828
// Clear state of database if not running in container
@@ -856,6 +859,22 @@ func (r *Runtime) refresh(alivePath string) error {
856859
if err := ctr.refresh(); err != nil {
857860
logrus.Errorf("Refreshing container %s: %v", ctr.ID(), err)
858861
}
862+
// This is the only place it's safe to use ctr.state.State unlocked
863+
// We're holding the alive lock, guaranteed to be the only Libpod on the system right now.
864+
if ctr.AutoRemove() && ctr.state.State == define.ContainerStateExited {
865+
opts := ctrRmOpts{
866+
// Don't force-remove, we're supposed to be fresh off a reboot
867+
// If we have to force something is seriously wrong
868+
Force: false,
869+
RemoveVolume: true,
870+
}
871+
// This container should have autoremoved before the
872+
// reboot but did not.
873+
// Get rid of it.
874+
if _, _, err := r.removeContainer(ctx, ctr, opts); err != nil {
875+
logrus.Errorf("Unable to remove container %s which should have autoremoved: %v", ctr.ID(), err)
876+
}
877+
}
859878
}
860879
for _, pod := range pods {
861880
if err := pod.refresh(); err != nil {

0 commit comments

Comments
 (0)