Skip to content

Commit a0b73ae

Browse files
committed
sandbox: optimize the lock in PodSandbox
Signed-off-by: Abel Feng <[email protected]>
1 parent 0f1d274 commit a0b73ae

File tree

9 files changed

+138
-71
lines changed

9 files changed

+138
-71
lines changed

internal/cri/server/podsandbox/controller.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,10 @@ func (c *Controller) Wait(ctx context.Context, sandboxID string) (sandbox.ExitSt
158158

159159
}
160160

161-
func (c *Controller) waitSandboxExit(ctx context.Context, p *types.PodSandbox, exitCh <-chan containerd.ExitStatus) (exitStatus uint32, exitedAt time.Time, err error) {
161+
func (c *Controller) waitSandboxExit(ctx context.Context, p *types.PodSandbox, exitCh <-chan containerd.ExitStatus) error {
162162
select {
163163
case e := <-exitCh:
164-
exitStatus, exitedAt, err = e.Result()
164+
exitStatus, exitedAt, err := e.Result()
165165
if err != nil {
166166
log.G(ctx).WithError(err).Errorf("failed to get task exit status for %q", p.ID)
167167
exitStatus = unknownExitCode
@@ -171,12 +171,16 @@ func (c *Controller) waitSandboxExit(ctx context.Context, p *types.PodSandbox, e
171171
dctx, dcancel := context.WithTimeout(dctx, handleEventTimeout)
172172
defer dcancel()
173173
event := &eventtypes.TaskExit{ExitStatus: exitStatus, ExitedAt: protobuf.ToTimestamp(exitedAt)}
174-
if cleanErr := handleSandboxTaskExit(dctx, p, event); cleanErr != nil {
174+
if err := handleSandboxTaskExit(dctx, p, event); err != nil {
175+
// TODO will backoff the event to the controller's own EventMonitor, but not cri's,
176+
// because we should call handleSandboxTaskExit again the next time
177+
// eventMonitor handle this event. but now it goes into cri's EventMonitor,
178+
// the handleSandboxTaskExit will not be called anymore
175179
c.cri.BackOffEvent(p.ID, e)
176180
}
177-
return
181+
return nil
178182
case <-ctx.Done():
179-
return unknownExitCode, time.Now(), ctx.Err()
183+
return ctx.Err()
180184
}
181185
}
182186

@@ -196,5 +200,8 @@ func handleSandboxTaskExit(ctx context.Context, sb *types.PodSandbox, e *eventty
196200
}
197201
}
198202
}
203+
if err := sb.Exit(e.ExitStatus, protobuf.FromTimestamp(e.ExitedAt)); err != nil {
204+
return err
205+
}
199206
return nil
200207
}

internal/cri/server/podsandbox/controller_test.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323

2424
"github.com/stretchr/testify/assert"
2525

26-
containerd "github.com/containerd/containerd/v2/client"
2726
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
2827
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
2928
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
@@ -63,10 +62,7 @@ func Test_Status(t *testing.T) {
6362
CreatedAt: createdAt,
6463
})
6564
sb.Metadata = sandboxstore.Metadata{ID: sandboxID}
66-
err := controller.store.Save(sb)
67-
if err != nil {
68-
t.Fatal(err)
69-
}
65+
assert.NoError(t, controller.store.Save(sb))
7066
s, err := controller.Status(context.Background(), sandboxID, false)
7167
if err != nil {
7268
t.Fatal(err)
@@ -75,7 +71,8 @@ func Test_Status(t *testing.T) {
7571
assert.Equal(t, s.CreatedAt, createdAt)
7672
assert.Equal(t, s.State, sandboxstore.StateReady.String())
7773

78-
sb.Exit(*containerd.NewExitStatus(exitStatus, exitedAt, nil))
74+
assert.NoError(t, sb.Exit(exitStatus, exitedAt))
75+
7976
exit, err := controller.Wait(context.Background(), sandboxID)
8077
if err != nil {
8178
t.Fatal(err)

internal/cri/server/podsandbox/recover.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,9 @@ func (c *Controller) RecoverContainer(ctx context.Context, cntr containerd.Conta
134134
}
135135
if ch != nil {
136136
go func() {
137-
code, exitTime, err := c.waitSandboxExit(ctrdutil.NamespacedContext(), podSandbox, ch)
138-
podSandbox.Exit(*containerd.NewExitStatus(code, exitTime, err))
137+
if err := c.waitSandboxExit(ctrdutil.NamespacedContext(), podSandbox, ch); err != nil {
138+
log.G(context.Background()).Warnf("failed to wait pod sandbox exit %v", err)
139+
}
139140
}()
140141
}
141142

internal/cri/server/podsandbox/recover_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ func TestRecoverContainer(t *testing.T) {
403403

404404
pSb := controller.store.Get(cont.ID())
405405
assert.NotNil(t, pSb)
406-
assert.Equal(t, c.expectedState, pSb.State, "%s state is not expected", cont.ID())
406+
assert.Equal(t, c.expectedState, pSb.Status.Get().State, "%s state is not expected", cont.ID())
407407

408408
if c.expectedExitCode > 0 {
409409
cont.t.waitExitCh <- struct{}{}

internal/cri/server/podsandbox/sandbox_run.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll
216216
if err != nil {
217217
return cin, fmt.Errorf("failed to get sandbox container info: %w", err)
218218
}
219-
podSandbox.CreatedAt = info.CreatedAt
220219

221220
// Create sandbox task in containerd.
222221
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).", id, metadata.Name)
@@ -242,7 +241,6 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll
242241
}
243242
}
244243
}()
245-
podSandbox.Pid = task.Pid()
246244

247245
// wait is a long running background request, no timeout needed.
248246
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
@@ -267,16 +265,25 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll
267265
if err := task.Start(ctx); err != nil {
268266
return cin, fmt.Errorf("failed to start sandbox container task %q: %w", id, err)
269267
}
270-
podSandbox.State = sandboxstore.StateReady
268+
pid := task.Pid()
269+
if err := podSandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
270+
status.Pid = pid
271+
status.State = sandboxstore.StateReady
272+
status.CreatedAt = info.CreatedAt
273+
return status, nil
274+
}); err != nil {
275+
return cin, fmt.Errorf("failed to update status of pod sandbox %q: %w", id, err)
276+
}
271277

272278
cin.SandboxID = id
273279
cin.Pid = task.Pid()
274280
cin.CreatedAt = info.CreatedAt
275281
cin.Labels = labels
276282

277283
go func() {
278-
code, exitTime, err := c.waitSandboxExit(ctrdutil.NamespacedContext(), podSandbox, exitCh)
279-
podSandbox.Exit(*containerd.NewExitStatus(code, exitTime, err))
284+
if err := c.waitSandboxExit(ctrdutil.NamespacedContext(), podSandbox, exitCh); err != nil {
285+
log.G(context.Background()).Warnf("failed to wait pod sandbox exit %v", err)
286+
}
280287
}()
281288

282289
return

internal/cri/server/podsandbox/sandbox_status.go

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,15 @@ func (c *Controller) Status(ctx context.Context, sandboxID string, verbose bool)
3636
if sb == nil {
3737
return sandbox.ControllerStatus{}, fmt.Errorf("unable to find sandbox %q: %w", sandboxID, errdefs.ErrNotFound)
3838
}
39-
39+
status := sb.Status.Get()
4040
cstatus := sandbox.ControllerStatus{
4141
SandboxID: sandboxID,
42-
Pid: sb.Pid,
43-
State: sb.State.String(),
44-
CreatedAt: sb.CreatedAt,
42+
Pid: status.Pid,
43+
State: status.State.String(),
44+
CreatedAt: status.CreatedAt,
45+
ExitedAt: status.ExitedAt,
4546
Extra: nil,
4647
}
47-
exitStatus := sb.GetExitStatus()
48-
if exitStatus != nil {
49-
cstatus.ExitedAt = exitStatus.ExitTime()
50-
}
5148

5249
if verbose {
5350
info, err := toCRISandboxInfo(ctx, sb)
@@ -64,7 +61,7 @@ func (c *Controller) Status(ctx context.Context, sandboxID string, verbose bool)
6461
// toCRISandboxInfo converts internal container object information to CRI sandbox status response info map.
6562
func toCRISandboxInfo(ctx context.Context, sb *types.PodSandbox) (map[string]string, error) {
6663
si := &critypes.SandboxInfo{
67-
Pid: sb.Pid,
64+
Pid: sb.Status.Get().Pid,
6865
Config: sb.Metadata.Config,
6966
RuntimeHandler: sb.Metadata.RuntimeHandler,
7067
CNIResult: sb.Metadata.CNIResult,

internal/cri/server/podsandbox/sandbox_stop.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,15 @@ import (
2222
"syscall"
2323
"time"
2424

25+
"github.com/containerd/errdefs"
2526
"github.com/containerd/log"
2627

2728
eventtypes "github.com/containerd/containerd/v2/api/events"
28-
containerd "github.com/containerd/containerd/v2/client"
2929
"github.com/containerd/containerd/v2/core/sandbox"
3030
"github.com/containerd/containerd/v2/internal/cri/server/podsandbox/types"
3131
sandboxstore "github.com/containerd/containerd/v2/internal/cri/store/sandbox"
3232
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
3333
"github.com/containerd/containerd/v2/protobuf"
34-
"github.com/containerd/errdefs"
3534
)
3635

3736
func (c *Controller) Stop(ctx context.Context, sandboxID string, _ ...sandbox.StopOpt) error {
@@ -46,7 +45,7 @@ func (c *Controller) Stop(ctx context.Context, sandboxID string, _ ...sandbox.St
4645
if err != nil {
4746
return err
4847
}
49-
state := podSandbox.State
48+
state := podSandbox.Status.Get().State
5049
if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown {
5150
if err := c.stopSandboxContainer(ctx, podSandbox); err != nil {
5251
return fmt.Errorf("failed to stop sandbox container %q in %q state: %w", sandboxID, state, err)
@@ -64,7 +63,7 @@ func (c *Controller) Stop(ctx context.Context, sandboxID string, _ ...sandbox.St
6463
func (c *Controller) stopSandboxContainer(ctx context.Context, podSandbox *types.PodSandbox) error {
6564
id := podSandbox.ID
6665
container := podSandbox.Container
67-
state := podSandbox.State
66+
state := podSandbox.Status.Get().State
6867
task, err := container.Task(ctx, nil)
6968
if err != nil {
7069
if !errdefs.IsNotFound(err) {
@@ -95,12 +94,8 @@ func (c *Controller) stopSandboxContainer(ctx context.Context, podSandbox *types
9594
stopCh := make(chan struct{})
9695
go func() {
9796
defer close(stopCh)
98-
exitStatus, exitedAt, err := c.waitSandboxExit(exitCtx, podSandbox, exitCh)
99-
if err != context.Canceled && err != context.DeadlineExceeded {
100-
// The error of context.Canceled or context.DeadlineExceeded indicates the task.Wait is not finished,
101-
// so we can not set the exit status of the pod sandbox.
102-
podSandbox.Exit(*containerd.NewExitStatus(exitStatus, exitedAt, err))
103-
} else {
97+
err := c.waitSandboxExit(exitCtx, podSandbox, exitCh)
98+
if err != nil {
10499
log.G(ctx).WithError(err).Errorf("Failed to wait pod sandbox exit %+v", err)
105100
}
106101
}()

internal/cri/server/podsandbox/types/podsandbox.go

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package types
1818

1919
import (
2020
"context"
21-
"sync"
2221
"time"
2322

2423
containerd "github.com/containerd/containerd/v2/client"
@@ -28,56 +27,47 @@ import (
2827
)
2928

3029
type PodSandbox struct {
31-
mu sync.Mutex
32-
ID string
33-
Container containerd.Container
34-
State sandboxstore.State
35-
Metadata sandboxstore.Metadata
36-
Runtime sandbox.RuntimeOpts
37-
Pid uint32
38-
CreatedAt time.Time
39-
stopChan *store.StopCh
40-
exitStatus *containerd.ExitStatus
30+
ID string
31+
Container containerd.Container
32+
Metadata sandboxstore.Metadata
33+
Runtime sandbox.RuntimeOpts
34+
Status sandboxstore.StatusStorage
35+
stopChan *store.StopCh
4136
}
4237

4338
func NewPodSandbox(id string, status sandboxstore.Status) *PodSandbox {
4439
podSandbox := &PodSandbox{
4540
ID: id,
4641
Container: nil,
4742
stopChan: store.NewStopCh(),
48-
CreatedAt: status.CreatedAt,
49-
State: status.State,
50-
Pid: status.Pid,
43+
Status: sandboxstore.StoreStatus(status),
5144
}
5245
if status.State == sandboxstore.StateNotReady {
53-
podSandbox.Exit(*containerd.NewExitStatus(status.ExitStatus, status.ExitedAt, nil))
46+
podSandbox.stopChan.Stop()
5447
}
5548
return podSandbox
5649
}
5750

58-
func (p *PodSandbox) Exit(status containerd.ExitStatus) {
59-
p.mu.Lock()
60-
defer p.mu.Unlock()
61-
p.exitStatus = &status
62-
p.State = sandboxstore.StateNotReady
51+
func (p *PodSandbox) Exit(code uint32, exitTime time.Time) error {
52+
if err := p.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
53+
status.State = sandboxstore.StateNotReady
54+
status.ExitStatus = code
55+
status.ExitedAt = exitTime
56+
status.Pid = 0
57+
return status, nil
58+
}); err != nil {
59+
return err
60+
}
6361
p.stopChan.Stop()
62+
return nil
6463
}
6564

66-
func (p *PodSandbox) Wait(ctx context.Context) (*containerd.ExitStatus, error) {
67-
s := p.GetExitStatus()
68-
if s != nil {
69-
return s, nil
70-
}
65+
func (p *PodSandbox) Wait(ctx context.Context) (containerd.ExitStatus, error) {
7166
select {
7267
case <-ctx.Done():
73-
return nil, ctx.Err()
68+
return containerd.ExitStatus{}, ctx.Err()
7469
case <-p.stopChan.Stopped():
75-
return p.GetExitStatus(), nil
70+
status := p.Status.Get()
71+
return *containerd.NewExitStatus(status.ExitStatus, status.ExitedAt, nil), nil
7672
}
7773
}
78-
79-
func (p *PodSandbox) GetExitStatus() *containerd.ExitStatus {
80-
p.mu.Lock()
81-
defer p.mu.Unlock()
82-
return p.exitStatus
83-
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package types
18+
19+
import (
20+
"context"
21+
"sync"
22+
"testing"
23+
"time"
24+
25+
"github.com/stretchr/testify/assert"
26+
27+
"github.com/containerd/containerd/v2/internal/cri/store/sandbox"
28+
)
29+
30+
func Test_PodSandbox(t *testing.T) {
31+
p := NewPodSandbox("test", sandbox.Status{State: sandbox.StateUnknown})
32+
assert.Equal(t, p.Status.Get().State, sandbox.StateUnknown)
33+
assert.Equal(t, p.ID, "test")
34+
p.Metadata = sandbox.Metadata{ID: "test", NetNSPath: "/test"}
35+
createAt := time.Now()
36+
assert.NoError(t, p.Status.Update(func(status sandbox.Status) (sandbox.Status, error) {
37+
status.State = sandbox.StateReady
38+
status.Pid = uint32(100)
39+
status.CreatedAt = createAt
40+
return status, nil
41+
}))
42+
status := p.Status.Get()
43+
assert.Equal(t, status.State, sandbox.StateReady)
44+
assert.Equal(t, status.Pid, uint32(100))
45+
assert.Equal(t, status.CreatedAt, createAt)
46+
47+
exitAt := time.Now().Add(time.Second)
48+
wg := sync.WaitGroup{}
49+
wg.Add(1)
50+
go func() {
51+
defer wg.Done()
52+
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500)
53+
defer cancel()
54+
_, err := p.Wait(ctx)
55+
assert.Equal(t, err, ctx.Err())
56+
}()
57+
58+
wg.Add(1)
59+
go func() {
60+
defer wg.Done()
61+
exitStatus, err := p.Wait(context.Background())
62+
assert.Equal(t, err, nil)
63+
code, exitTime, err := exitStatus.Result()
64+
assert.Equal(t, err, nil)
65+
assert.Equal(t, code, uint32(128))
66+
assert.Equal(t, exitTime, exitAt)
67+
}()
68+
time.Sleep(time.Second)
69+
if err := p.Exit(uint32(128), exitAt); err != nil {
70+
t.Fatalf("failed to set exit of pod sandbox %v", err)
71+
}
72+
wg.Wait()
73+
}

0 commit comments

Comments
 (0)