Skip to content

Commit 1a55c72

Browse files
authored
Merge pull request containerd#3362 from apostasie/hardening-lifecycle-state-3350
Hardening lifecycle-state-store, name-store, and oci-hooks
2 parents 05c0aa2 + b954c7a commit 1a55c72

File tree

3 files changed

+52
-19
lines changed

3 files changed

+52
-19
lines changed

pkg/namestore/namestore.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import (
2222
"path/filepath"
2323
"strings"
2424

25+
"github.com/containerd/log"
26+
2527
"github.com/containerd/nerdctl/v2/pkg/identifiers"
2628
"github.com/containerd/nerdctl/v2/pkg/lockutil"
2729
)
@@ -56,12 +58,21 @@ func (x *nameStore) Acquire(name, id string) error {
5658
}
5759
fn := func() error {
5860
fileName := filepath.Join(x.dir, name)
59-
// If containerd was bounced, previously running containers that would get restarted will go again through
60-
// onCreateRuntime (unlike in a "normal" stop/start flow).
61-
// As such, we need to allow reacquiring by the same id
62-
// See: https://github.com/containerd/nerdctl/issues/3354
63-
if b, err := os.ReadFile(fileName); err == nil && string(b) != id {
64-
return fmt.Errorf("name %q is already used by ID %q", name, string(b))
61+
if b, err := os.ReadFile(fileName); err == nil {
62+
if strings.TrimSpace(string(b)) == "" {
63+
// currently acquired for an empty id - this obviously should never happen
64+
// this is recoverable, and we are not hard erroring, but still indicative that something was wrong
65+
// https://github.com/containerd/nerdctl/issues/3351
66+
log.L.Errorf("current name %q is reserved for a an empty id - please report this is as a bug", name)
67+
} else if string(b) != id {
68+
// if acquired by a different container, we error out here
69+
return fmt.Errorf("name %q is already used by ID %q", name, string(b))
70+
}
71+
// Otherwise, this is just re-acquiring after a restart
72+
// For example, if containerd was bounced, previously running containers that would get restarted will go
73+
// again through onCreateRuntime (unlike in a "normal" stop/start flow).
74+
// As such, we are allowing reacquiring by the same id
75+
// See: https://github.com/containerd/nerdctl/issues/3354
6576
}
6677
return os.WriteFile(fileName, []byte(id), 0600)
6778
}

pkg/ocihook/ocihook.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -505,25 +505,37 @@ func onCreateRuntime(opts *handlerOpts) error {
505505
log.L.WithError(err).Error("failed re-acquiring name - see https://github.com/containerd/nerdctl/issues/2992")
506506
}
507507

508+
var netError error
508509
if opts.cni != nil {
509-
if err = applyNetworkSettings(opts); err != nil {
510-
return err
511-
}
510+
netError = applyNetworkSettings(opts)
512511
}
513512

514-
// Set StartedAt
515513
lf := state.NewLifecycleState(opts.state.Annotations[labels.StateDir])
516-
return lf.WithLock(func() error {
517-
err := lf.Load()
518-
if err != nil {
519-
return err
520-
}
514+
515+
return errors.Join(netError, lf.WithLock(func() error {
516+
// Errors are voluntarily ignored here, as they should not be fatal.
517+
// The lifecycle struct is also already warning about the issue.
518+
_ = lf.Load()
521519
lf.StartedAt = time.Now()
520+
lf.CreateError = netError != nil
522521
return lf.Save()
523-
})
522+
}))
524523
}
525524

526525
func onPostStop(opts *handlerOpts) error {
526+
// See https://github.com/containerd/nerdctl/issues/3357
527+
// Check if we actually errored during runtimeCreate
528+
// If that is the case, CreateError is set, and we are in postStop while the container will NOT be deleted (see ticket).
529+
// In that case, do NOT treat this as a deletion, as the container is still there.
530+
// Reset CreateError, and return.
531+
lf := state.NewLifecycleState(opts.state.Annotations[labels.StateDir])
532+
if lf.WithLock(lf.Load) == nil {
533+
if lf.CreateError {
534+
lf.CreateError = false
535+
return lf.WithLock(lf.Save)
536+
}
537+
}
538+
527539
ctx := context.Background()
528540
ns := opts.state.Annotations[labels.Namespace]
529541
if opts.cni != nil {

pkg/ocihook/state/state.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424
"path/filepath"
2525
"time"
2626

27+
"github.com/containerd/log"
28+
2729
"github.com/containerd/nerdctl/v2/pkg/lockutil"
2830
)
2931

@@ -49,8 +51,9 @@ func NewLifecycleState(stateDir string) *LifecycleState {
4951
}
5052

5153
type LifecycleState struct {
52-
stateDir string
53-
StartedAt time.Time `json:"started_at"`
54+
stateDir string
55+
StartedAt time.Time `json:"started_at"`
56+
CreateError bool `json:"create_error"`
5457
}
5558

5659
func (lf *LifecycleState) WithLock(fun func() error) error {
@@ -71,18 +74,25 @@ func (lf *LifecycleState) Load() error {
7174
} else {
7275
err = json.Unmarshal(data, lf)
7376
if err != nil {
77+
// Logging an error, as Load errors are generally ignored downstream
78+
log.L.Error("unable to unmarshall lifecycle data")
7479
return fmt.Errorf("unable to unmarshall lifecycle data: %w", err)
7580
}
7681
}
7782
return nil
7883
}
7984

8085
func (lf *LifecycleState) Save() error {
86+
// Write atomically (write, then move) to avoid incomplete writes from happening
8187
data, err := json.Marshal(lf)
8288
if err != nil {
8389
return fmt.Errorf("unable to marshall lifecycle data: %w", err)
8490
}
85-
err = os.WriteFile(filepath.Join(lf.stateDir, lifecycleFile), data, 0600)
91+
err = os.WriteFile(filepath.Join(lf.stateDir, "."+lifecycleFile), data, 0600)
92+
if err != nil {
93+
return fmt.Errorf("unable to write lifecycle file: %w", err)
94+
}
95+
err = os.Rename(filepath.Join(lf.stateDir, "."+lifecycleFile), filepath.Join(lf.stateDir, lifecycleFile))
8696
if err != nil {
8797
return fmt.Errorf("unable to write lifecycle file: %w", err)
8898
}

0 commit comments

Comments
 (0)