Skip to content

Commit 3c34e2c

Browse files
authored
Merge pull request containerd#6965 from adrianreber/2022-05-20-checkpoint-cri-rpc
Wire through CRI checkpoint RPC
2 parents e53663c + f25770e commit 3c34e2c

File tree

20 files changed

+4085
-1
lines changed

20 files changed

+4085
-1
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ require (
88
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0
99
github.com/Microsoft/go-winio v0.6.1
1010
github.com/Microsoft/hcsshim v0.12.0
11+
github.com/checkpoint-restore/checkpointctl v1.1.0
12+
github.com/checkpoint-restore/go-criu/v7 v7.0.0
1113
github.com/containerd/btrfs/v2 v2.0.0
1214
github.com/containerd/cgroups/v3 v3.0.3
1315
github.com/containerd/console v1.0.4

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
6363
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
6464
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
6565
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
66+
github.com/checkpoint-restore/checkpointctl v1.1.0 h1:plS/2zBzbAXO6DH/H+TqD7ZGhz8iQVb+NLgsOJSTWaw=
67+
github.com/checkpoint-restore/checkpointctl v1.1.0/go.mod h1:DtPd9M4bt/jdt+7DodFxm0lrzdevabk3cbni/FL4BY0=
68+
github.com/checkpoint-restore/go-criu/v7 v7.0.0 h1:R4UF/njKOuq8ooG7naFGsCeKsjv5j+rIhgFgSSeC2KY=
69+
github.com/checkpoint-restore/go-criu/v7 v7.0.0/go.mod h1:xD1v3cPww1QYpJR3+XTTdC8hYubPnptIPsT1daXhbr4=
6670
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
6771
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
6872
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=

internal/cri/instrument/instrumented_service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ func (in *instrumentedService) CheckpointContainer(ctx context.Context, r *runti
565565
}
566566
}()
567567

568-
res, err = in.c.CheckpointContainer(ctx, r)
568+
res, err = in.c.CheckpointContainer(ctrdutil.WithNamespace(ctx), r)
569569
return res, errdefs.ToGRPC(err)
570570
}
571571

internal/cri/server/container_checkpoint.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//go:build !linux
2+
13
/*
24
Copyright The containerd Authors.
35
@@ -18,12 +20,15 @@ package server
1820

1921
import (
2022
"context"
23+
"time"
2124

2225
"google.golang.org/grpc/codes"
2326
"google.golang.org/grpc/status"
2427
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2528
)
2629

2730
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) {
31+
// The next line is just needed to make the linter happy.
32+
containerCheckpointTimer.WithValues("no-runtime").UpdateSince(time.Now())
2833
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
2934
}
Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package server
20+
21+
import (
22+
"archive/tar"
23+
"context"
24+
"encoding/json"
25+
"errors"
26+
"fmt"
27+
"io"
28+
"os"
29+
"path/filepath"
30+
"strings"
31+
"time"
32+
33+
crmetadata "github.com/checkpoint-restore/checkpointctl/lib"
34+
"github.com/checkpoint-restore/go-criu/v7"
35+
"github.com/containerd/containerd/v2/core/content"
36+
"github.com/containerd/containerd/v2/core/images"
37+
"github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
38+
"github.com/containerd/containerd/v2/pkg/archive"
39+
"github.com/containerd/containerd/v2/plugins"
40+
"github.com/containerd/containerd/v2/protobuf/proto"
41+
ptypes "github.com/containerd/containerd/v2/protobuf/types"
42+
"github.com/containerd/log"
43+
v1 "github.com/opencontainers/image-spec/specs-go/v1"
44+
"google.golang.org/grpc/codes"
45+
"google.golang.org/grpc/status"
46+
47+
"github.com/containerd/containerd/v2/client"
48+
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
49+
)
50+
51+
// PodCriuVersion is the version of CRIU needed for
52+
// checkpointing and restoring containers out of and into Pods.
53+
const podCriuVersion = 31600
54+
55+
// CheckForCriu uses CRIU's go bindings to check if the CRIU
56+
// binary exists and if it at least the version Podman needs.
57+
func checkForCriu(version int) error {
58+
c := criu.MakeCriu()
59+
criuVersion, err := c.GetCriuVersion()
60+
if err != nil {
61+
return fmt.Errorf("failed to check for criu version: %w", err)
62+
}
63+
64+
if criuVersion >= version {
65+
return nil
66+
}
67+
return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion)
68+
}
69+
70+
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (*runtime.CheckpointContainerResponse, error) {
71+
start := time.Now()
72+
if err := checkForCriu(podCriuVersion); err != nil {
73+
// This is the wrong error message and needs to be adapted once
74+
// Kubernetes (the e2e_node/checkpoint) test has been changed to
75+
// handle too old or missing CRIU error messages.
76+
log.G(ctx).WithError(err).Errorf("Failed to checkpoint container %q", r.GetContainerId())
77+
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
78+
}
79+
80+
container, err := c.containerStore.Get(r.GetContainerId())
81+
if err != nil {
82+
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
83+
}
84+
85+
state := container.Status.Get().State()
86+
if state != runtime.ContainerState_CONTAINER_RUNNING {
87+
return nil, fmt.Errorf(
88+
"container %q is in %s state. only %s containers can be checkpointed",
89+
r.GetContainerId(),
90+
criContainerStateToString(state),
91+
criContainerStateToString(runtime.ContainerState_CONTAINER_RUNNING),
92+
)
93+
}
94+
95+
imageRef := container.ImageRef
96+
image, err := c.GetImage(imageRef)
97+
if err != nil {
98+
return nil, fmt.Errorf("getting container image failed: %w", err)
99+
}
100+
101+
i, err := container.Container.Info(ctx)
102+
if err != nil {
103+
return nil, fmt.Errorf("get container info: %w", err)
104+
}
105+
106+
configJSON, err := json.Marshal(&crmetadata.ContainerConfig{
107+
ID: container.ID,
108+
Name: container.Name,
109+
RootfsImageName: func() string {
110+
if len(image.References) > 0 {
111+
return image.References[0]
112+
}
113+
return ""
114+
}(),
115+
RootfsImageRef: imageRef,
116+
OCIRuntime: i.Runtime.Name,
117+
RootfsImage: container.Config.GetImage().UserSpecifiedImage,
118+
CheckpointedAt: time.Now(),
119+
CreatedTime: i.CreatedAt,
120+
})
121+
if err != nil {
122+
return nil, fmt.Errorf("generating container config JSON failed: %w", err)
123+
}
124+
125+
task, err := container.Container.Task(ctx, nil)
126+
if err != nil {
127+
return nil, fmt.Errorf("failed to get task for container %q: %w", r.GetContainerId(), err)
128+
}
129+
img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, c.getContainerRootDir(r.GetContainerId()))}...)
130+
if err != nil {
131+
return nil, fmt.Errorf("checkpointing container %q failed: %w", r.GetContainerId(), err)
132+
}
133+
134+
// the checkpoint image has been provided as an index with manifests representing the tar of criu data, the rw layer, and the config
135+
var (
136+
index v1.Index
137+
rawIndex []byte
138+
targetDesc = img.Target()
139+
contentStore = img.ContentStore()
140+
)
141+
142+
rawIndex, err = content.ReadBlob(ctx, contentStore, targetDesc)
143+
if err != nil {
144+
return nil, fmt.Errorf("failed to retrieve checkpoint index blob from content store: %w", err)
145+
}
146+
if err = json.Unmarshal(rawIndex, &index); err != nil {
147+
return nil, fmt.Errorf("failed to unmarshall blob into checkpoint data OCI index: %w", err)
148+
}
149+
150+
cpPath := filepath.Join(c.getContainerRootDir(r.GetContainerId()), "ctrd-checkpoint")
151+
if err := os.MkdirAll(cpPath, 0o700); err != nil {
152+
return nil, err
153+
}
154+
defer os.RemoveAll(cpPath)
155+
156+
if err := os.WriteFile(filepath.Join(cpPath, crmetadata.ConfigDumpFile), configJSON, 0o600); err != nil {
157+
return nil, err
158+
}
159+
160+
// walk the manifests and pull out the blobs that we need to save in the checkpoint tarball:
161+
// - the checkpoint criu data
162+
// - the rw diff tarball
163+
// - the spec blob
164+
for _, manifest := range index.Manifests {
165+
switch manifest.MediaType {
166+
case images.MediaTypeContainerd1Checkpoint:
167+
if err := writeCriuCheckpointData(ctx, contentStore, manifest, cpPath); err != nil {
168+
return nil, fmt.Errorf("failed to copy CRIU checkpoint blob to checkpoint dir: %w", err)
169+
}
170+
case v1.MediaTypeImageLayerGzip:
171+
if err := writeRootFsDiffTar(ctx, contentStore, manifest, cpPath); err != nil {
172+
return nil, fmt.Errorf("failed to copy rw filesystem layer blob to checkpoint dir: %w", err)
173+
}
174+
case images.MediaTypeContainerd1CheckpointConfig:
175+
if err := writeSpecDumpFile(ctx, contentStore, manifest, cpPath); err != nil {
176+
return nil, fmt.Errorf("failed to copy container spec blob to checkpoint dir: %w", err)
177+
}
178+
default:
179+
}
180+
}
181+
182+
// write final tarball of all content
183+
tar := archive.Diff(ctx, "", cpPath)
184+
185+
outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0600)
186+
if err != nil {
187+
return nil, err
188+
}
189+
defer outFile.Close()
190+
_, err = io.Copy(outFile, tar)
191+
if err != nil {
192+
return nil, err
193+
}
194+
if err := tar.Close(); err != nil {
195+
return nil, err
196+
}
197+
198+
containerCheckpointTimer.WithValues(i.Runtime.Name).UpdateSince(start)
199+
200+
return &runtime.CheckpointContainerResponse{}, nil
201+
}
202+
203+
func withCheckpointOpts(rt, rootDir string) client.CheckpointTaskOpts {
204+
return func(r *client.CheckpointTaskInfo) error {
205+
// Kubernetes currently supports checkpointing of container
206+
// as part of the Forensic Container Checkpointing KEP.
207+
// This implies that the container is never stopped
208+
leaveRunning := true
209+
210+
switch rt {
211+
case plugins.RuntimeRuncV2:
212+
if r.Options == nil {
213+
r.Options = &options.CheckpointOptions{}
214+
}
215+
opts, _ := r.Options.(*options.CheckpointOptions)
216+
217+
opts.Exit = !leaveRunning
218+
opts.WorkPath = rootDir
219+
}
220+
return nil
221+
}
222+
}
223+
224+
func writeCriuCheckpointData(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
225+
ra, err := store.ReaderAt(ctx, desc)
226+
if err != nil {
227+
return err
228+
}
229+
defer ra.Close()
230+
231+
checkpointDirectory := filepath.Join(cpPath, crmetadata.CheckpointDirectory)
232+
// This is the criu data tarball. Let's unpack it
233+
// and put it into the crmetadata.CheckpointDirectory directory.
234+
if err := os.MkdirAll(checkpointDirectory, 0o700); err != nil {
235+
return err
236+
}
237+
tr := tar.NewReader(content.NewReader(ra))
238+
for {
239+
header, err := tr.Next()
240+
if err != nil {
241+
if errors.Is(err, io.EOF) {
242+
break
243+
}
244+
return err
245+
}
246+
if strings.Contains(header.Name, "..") {
247+
return fmt.Errorf("found illegal string '..' in checkpoint archive")
248+
}
249+
destFile, err := os.Create(filepath.Join(checkpointDirectory, header.Name))
250+
if err != nil {
251+
return err
252+
}
253+
defer destFile.Close()
254+
255+
_, err = io.CopyN(destFile, tr, header.Size)
256+
if err != nil {
257+
return err
258+
}
259+
}
260+
return nil
261+
}
262+
263+
func writeRootFsDiffTar(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
264+
ra, err := store.ReaderAt(ctx, desc)
265+
if err != nil {
266+
return err
267+
}
268+
defer ra.Close()
269+
270+
// the rw layer tarball
271+
f, err := os.Create(filepath.Join(cpPath, crmetadata.RootFsDiffTar))
272+
if err != nil {
273+
return err
274+
}
275+
defer f.Close()
276+
277+
_, err = io.Copy(f, content.NewReader(ra))
278+
if err != nil {
279+
return err
280+
}
281+
282+
return nil
283+
}
284+
285+
func writeSpecDumpFile(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
286+
// this is the container spec
287+
f, err := os.Create(filepath.Join(cpPath, crmetadata.SpecDumpFile))
288+
if err != nil {
289+
return err
290+
}
291+
defer f.Close()
292+
data, err := content.ReadBlob(ctx, store, desc)
293+
if err != nil {
294+
return err
295+
}
296+
var any ptypes.Any
297+
if err := proto.Unmarshal(data, &any); err != nil {
298+
return err
299+
}
300+
_, err = f.Write(any.Value)
301+
if err != nil {
302+
return err
303+
}
304+
305+
return nil
306+
}

internal/cri/server/metrics.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ var (
3535
containerStopTimer metrics.LabeledTimer
3636
containerStartTimer metrics.LabeledTimer
3737
containerEventsDroppedCount metrics.Counter
38+
containerCheckpointTimer metrics.LabeledTimer
3839

3940
networkPluginOperations metrics.LabeledCounter
4041
networkPluginOperationsErrors metrics.LabeledCounter
@@ -59,6 +60,7 @@ func init() {
5960
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
6061
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
6162
containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start")
63+
containerCheckpointTimer = ns.NewLabeledTimer("container_checkpoint", "time to checkpoint a container", "runtime")
6264

6365
networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type")
6466
networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")

0 commit comments

Comments
 (0)