Skip to content

Commit f25770e

Browse files
committed
Wire through CRI ContainerCheckpoint RPC
This connects the new CRI ContainerCheckpoint RPC to the existing internal checkpoint functions. With this commit it is possible to checkpoint a container in Kubernetes using the Forensic Container Checkpointing KEP (containerd#2008): # curl X POST "https://localhost:10250/checkpoint/namespace/podId/container" Which will result in containerd creating a checkpoint in the location specified by Kubernetes (usually /var/lib/kubelet/checkpoints). This is a Linux only feature because CRIU only exists on Linux. Rewritten with the help of Phil Estes. Signed-off-by: Phil Estes <[email protected]> Signed-off-by: Adrian Reber <[email protected]>
1 parent e53663c commit f25770e

File tree

20 files changed

+4085
-1
lines changed

20 files changed

+4085
-1
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ require (
88
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0
99
github.com/Microsoft/go-winio v0.6.1
1010
github.com/Microsoft/hcsshim v0.12.0
11+
github.com/checkpoint-restore/checkpointctl v1.1.0
12+
github.com/checkpoint-restore/go-criu/v7 v7.0.0
1113
github.com/containerd/btrfs/v2 v2.0.0
1214
github.com/containerd/cgroups/v3 v3.0.3
1315
github.com/containerd/console v1.0.4

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
6363
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
6464
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
6565
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
66+
github.com/checkpoint-restore/checkpointctl v1.1.0 h1:plS/2zBzbAXO6DH/H+TqD7ZGhz8iQVb+NLgsOJSTWaw=
67+
github.com/checkpoint-restore/checkpointctl v1.1.0/go.mod h1:DtPd9M4bt/jdt+7DodFxm0lrzdevabk3cbni/FL4BY0=
68+
github.com/checkpoint-restore/go-criu/v7 v7.0.0 h1:R4UF/njKOuq8ooG7naFGsCeKsjv5j+rIhgFgSSeC2KY=
69+
github.com/checkpoint-restore/go-criu/v7 v7.0.0/go.mod h1:xD1v3cPww1QYpJR3+XTTdC8hYubPnptIPsT1daXhbr4=
6670
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
6771
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
6872
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=

internal/cri/instrument/instrumented_service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ func (in *instrumentedService) CheckpointContainer(ctx context.Context, r *runti
565565
}
566566
}()
567567

568-
res, err = in.c.CheckpointContainer(ctx, r)
568+
res, err = in.c.CheckpointContainer(ctrdutil.WithNamespace(ctx), r)
569569
return res, errdefs.ToGRPC(err)
570570
}
571571

internal/cri/server/container_checkpoint.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//go:build !linux
2+
13
/*
24
Copyright The containerd Authors.
35
@@ -18,12 +20,15 @@ package server
1820

1921
import (
2022
"context"
23+
"time"
2124

2225
"google.golang.org/grpc/codes"
2326
"google.golang.org/grpc/status"
2427
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2528
)
2629

2730
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) {
31+
// The next line is just needed to make the linter happy.
32+
containerCheckpointTimer.WithValues("no-runtime").UpdateSince(time.Now())
2833
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
2934
}
Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package server
20+
21+
import (
22+
"archive/tar"
23+
"context"
24+
"encoding/json"
25+
"errors"
26+
"fmt"
27+
"io"
28+
"os"
29+
"path/filepath"
30+
"strings"
31+
"time"
32+
33+
crmetadata "github.com/checkpoint-restore/checkpointctl/lib"
34+
"github.com/checkpoint-restore/go-criu/v7"
35+
"github.com/containerd/containerd/v2/core/content"
36+
"github.com/containerd/containerd/v2/core/images"
37+
"github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
38+
"github.com/containerd/containerd/v2/pkg/archive"
39+
"github.com/containerd/containerd/v2/plugins"
40+
"github.com/containerd/containerd/v2/protobuf/proto"
41+
ptypes "github.com/containerd/containerd/v2/protobuf/types"
42+
"github.com/containerd/log"
43+
v1 "github.com/opencontainers/image-spec/specs-go/v1"
44+
"google.golang.org/grpc/codes"
45+
"google.golang.org/grpc/status"
46+
47+
"github.com/containerd/containerd/v2/client"
48+
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
49+
)
50+
51+
// PodCriuVersion is the version of CRIU needed for
52+
// checkpointing and restoring containers out of and into Pods.
53+
const podCriuVersion = 31600
54+
55+
// CheckForCriu uses CRIU's go bindings to check if the CRIU
56+
// binary exists and if it at least the version Podman needs.
57+
func checkForCriu(version int) error {
58+
c := criu.MakeCriu()
59+
criuVersion, err := c.GetCriuVersion()
60+
if err != nil {
61+
return fmt.Errorf("failed to check for criu version: %w", err)
62+
}
63+
64+
if criuVersion >= version {
65+
return nil
66+
}
67+
return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion)
68+
}
69+
70+
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (*runtime.CheckpointContainerResponse, error) {
71+
start := time.Now()
72+
if err := checkForCriu(podCriuVersion); err != nil {
73+
// This is the wrong error message and needs to be adapted once
74+
// Kubernetes (the e2e_node/checkpoint) test has been changed to
75+
// handle too old or missing CRIU error messages.
76+
log.G(ctx).WithError(err).Errorf("Failed to checkpoint container %q", r.GetContainerId())
77+
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
78+
}
79+
80+
container, err := c.containerStore.Get(r.GetContainerId())
81+
if err != nil {
82+
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
83+
}
84+
85+
state := container.Status.Get().State()
86+
if state != runtime.ContainerState_CONTAINER_RUNNING {
87+
return nil, fmt.Errorf(
88+
"container %q is in %s state. only %s containers can be checkpointed",
89+
r.GetContainerId(),
90+
criContainerStateToString(state),
91+
criContainerStateToString(runtime.ContainerState_CONTAINER_RUNNING),
92+
)
93+
}
94+
95+
imageRef := container.ImageRef
96+
image, err := c.GetImage(imageRef)
97+
if err != nil {
98+
return nil, fmt.Errorf("getting container image failed: %w", err)
99+
}
100+
101+
i, err := container.Container.Info(ctx)
102+
if err != nil {
103+
return nil, fmt.Errorf("get container info: %w", err)
104+
}
105+
106+
configJSON, err := json.Marshal(&crmetadata.ContainerConfig{
107+
ID: container.ID,
108+
Name: container.Name,
109+
RootfsImageName: func() string {
110+
if len(image.References) > 0 {
111+
return image.References[0]
112+
}
113+
return ""
114+
}(),
115+
RootfsImageRef: imageRef,
116+
OCIRuntime: i.Runtime.Name,
117+
RootfsImage: container.Config.GetImage().UserSpecifiedImage,
118+
CheckpointedAt: time.Now(),
119+
CreatedTime: i.CreatedAt,
120+
})
121+
if err != nil {
122+
return nil, fmt.Errorf("generating container config JSON failed: %w", err)
123+
}
124+
125+
task, err := container.Container.Task(ctx, nil)
126+
if err != nil {
127+
return nil, fmt.Errorf("failed to get task for container %q: %w", r.GetContainerId(), err)
128+
}
129+
img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, c.getContainerRootDir(r.GetContainerId()))}...)
130+
if err != nil {
131+
return nil, fmt.Errorf("checkpointing container %q failed: %w", r.GetContainerId(), err)
132+
}
133+
134+
// the checkpoint image has been provided as an index with manifests representing the tar of criu data, the rw layer, and the config
135+
var (
136+
index v1.Index
137+
rawIndex []byte
138+
targetDesc = img.Target()
139+
contentStore = img.ContentStore()
140+
)
141+
142+
rawIndex, err = content.ReadBlob(ctx, contentStore, targetDesc)
143+
if err != nil {
144+
return nil, fmt.Errorf("failed to retrieve checkpoint index blob from content store: %w", err)
145+
}
146+
if err = json.Unmarshal(rawIndex, &index); err != nil {
147+
return nil, fmt.Errorf("failed to unmarshall blob into checkpoint data OCI index: %w", err)
148+
}
149+
150+
cpPath := filepath.Join(c.getContainerRootDir(r.GetContainerId()), "ctrd-checkpoint")
151+
if err := os.MkdirAll(cpPath, 0o700); err != nil {
152+
return nil, err
153+
}
154+
defer os.RemoveAll(cpPath)
155+
156+
if err := os.WriteFile(filepath.Join(cpPath, crmetadata.ConfigDumpFile), configJSON, 0o600); err != nil {
157+
return nil, err
158+
}
159+
160+
// walk the manifests and pull out the blobs that we need to save in the checkpoint tarball:
161+
// - the checkpoint criu data
162+
// - the rw diff tarball
163+
// - the spec blob
164+
for _, manifest := range index.Manifests {
165+
switch manifest.MediaType {
166+
case images.MediaTypeContainerd1Checkpoint:
167+
if err := writeCriuCheckpointData(ctx, contentStore, manifest, cpPath); err != nil {
168+
return nil, fmt.Errorf("failed to copy CRIU checkpoint blob to checkpoint dir: %w", err)
169+
}
170+
case v1.MediaTypeImageLayerGzip:
171+
if err := writeRootFsDiffTar(ctx, contentStore, manifest, cpPath); err != nil {
172+
return nil, fmt.Errorf("failed to copy rw filesystem layer blob to checkpoint dir: %w", err)
173+
}
174+
case images.MediaTypeContainerd1CheckpointConfig:
175+
if err := writeSpecDumpFile(ctx, contentStore, manifest, cpPath); err != nil {
176+
return nil, fmt.Errorf("failed to copy container spec blob to checkpoint dir: %w", err)
177+
}
178+
default:
179+
}
180+
}
181+
182+
// write final tarball of all content
183+
tar := archive.Diff(ctx, "", cpPath)
184+
185+
outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0600)
186+
if err != nil {
187+
return nil, err
188+
}
189+
defer outFile.Close()
190+
_, err = io.Copy(outFile, tar)
191+
if err != nil {
192+
return nil, err
193+
}
194+
if err := tar.Close(); err != nil {
195+
return nil, err
196+
}
197+
198+
containerCheckpointTimer.WithValues(i.Runtime.Name).UpdateSince(start)
199+
200+
return &runtime.CheckpointContainerResponse{}, nil
201+
}
202+
203+
func withCheckpointOpts(rt, rootDir string) client.CheckpointTaskOpts {
204+
return func(r *client.CheckpointTaskInfo) error {
205+
// Kubernetes currently supports checkpointing of container
206+
// as part of the Forensic Container Checkpointing KEP.
207+
// This implies that the container is never stopped
208+
leaveRunning := true
209+
210+
switch rt {
211+
case plugins.RuntimeRuncV2:
212+
if r.Options == nil {
213+
r.Options = &options.CheckpointOptions{}
214+
}
215+
opts, _ := r.Options.(*options.CheckpointOptions)
216+
217+
opts.Exit = !leaveRunning
218+
opts.WorkPath = rootDir
219+
}
220+
return nil
221+
}
222+
}
223+
224+
func writeCriuCheckpointData(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
225+
ra, err := store.ReaderAt(ctx, desc)
226+
if err != nil {
227+
return err
228+
}
229+
defer ra.Close()
230+
231+
checkpointDirectory := filepath.Join(cpPath, crmetadata.CheckpointDirectory)
232+
// This is the criu data tarball. Let's unpack it
233+
// and put it into the crmetadata.CheckpointDirectory directory.
234+
if err := os.MkdirAll(checkpointDirectory, 0o700); err != nil {
235+
return err
236+
}
237+
tr := tar.NewReader(content.NewReader(ra))
238+
for {
239+
header, err := tr.Next()
240+
if err != nil {
241+
if errors.Is(err, io.EOF) {
242+
break
243+
}
244+
return err
245+
}
246+
if strings.Contains(header.Name, "..") {
247+
return fmt.Errorf("found illegal string '..' in checkpoint archive")
248+
}
249+
destFile, err := os.Create(filepath.Join(checkpointDirectory, header.Name))
250+
if err != nil {
251+
return err
252+
}
253+
defer destFile.Close()
254+
255+
_, err = io.CopyN(destFile, tr, header.Size)
256+
if err != nil {
257+
return err
258+
}
259+
}
260+
return nil
261+
}
262+
263+
func writeRootFsDiffTar(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
264+
ra, err := store.ReaderAt(ctx, desc)
265+
if err != nil {
266+
return err
267+
}
268+
defer ra.Close()
269+
270+
// the rw layer tarball
271+
f, err := os.Create(filepath.Join(cpPath, crmetadata.RootFsDiffTar))
272+
if err != nil {
273+
return err
274+
}
275+
defer f.Close()
276+
277+
_, err = io.Copy(f, content.NewReader(ra))
278+
if err != nil {
279+
return err
280+
}
281+
282+
return nil
283+
}
284+
285+
func writeSpecDumpFile(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
286+
// this is the container spec
287+
f, err := os.Create(filepath.Join(cpPath, crmetadata.SpecDumpFile))
288+
if err != nil {
289+
return err
290+
}
291+
defer f.Close()
292+
data, err := content.ReadBlob(ctx, store, desc)
293+
if err != nil {
294+
return err
295+
}
296+
var any ptypes.Any
297+
if err := proto.Unmarshal(data, &any); err != nil {
298+
return err
299+
}
300+
_, err = f.Write(any.Value)
301+
if err != nil {
302+
return err
303+
}
304+
305+
return nil
306+
}

internal/cri/server/metrics.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ var (
3535
containerStopTimer metrics.LabeledTimer
3636
containerStartTimer metrics.LabeledTimer
3737
containerEventsDroppedCount metrics.Counter
38+
containerCheckpointTimer metrics.LabeledTimer
3839

3940
networkPluginOperations metrics.LabeledCounter
4041
networkPluginOperationsErrors metrics.LabeledCounter
@@ -59,6 +60,7 @@ func init() {
5960
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
6061
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
6162
containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start")
63+
containerCheckpointTimer = ns.NewLabeledTimer("container_checkpoint", "time to checkpoint a container", "runtime")
6264

6365
networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type")
6466
networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")

0 commit comments

Comments
 (0)