Skip to content

Commit 60ae709

Browse files
committed
checkpoint: support lazy migration
With the help of userfaultfd CRIU supports lazy migration. Lazy migration means that memory pages are only transferred from the migration source to the migration destination on page fault. This enables to reduce the downtime during process or container migration to a minimum as the memory does not need to be transferred during migration. Lazy migration currently depends on userfaultfd being available on the current Linux kernel and if the used CRIU version supports lazy migration. Both dependencies can be checked by querying CRIU via RPC if the lazy migration feature is available. Using feature checking instead of version comparison enables runC to use CRIU features from the criu-dev branch. This way the user can decide if lazy migration should be available by choosing the right kernel and CRIU branch. To use lazy migration the CRIU process during dump needs to dump everything besides the memory pages and then it opens a network port waiting for remote page fault requests: # runc checkpoint httpd --lazy-pages --page-server 0.0.0.0:27 \ --status-fd /tmp/postcopy-pipe In this example CRIU will hang/wait once it has opened the network port and wait for network connection. As runC waits for CRIU to finish it will also hang until the lazy migration has finished. To know when the restore on the destination side can start the '--status-fd' parameter is used: #️ runc checkpoint --help | grep status --status-fd value criu writes \0 to this FD once lazy-pages is ready The parameter '--status-fd' is directly from CRIU and this way the process outside of runC which controls the migration knows exactly when to transfer the checkpoint (without memory pages) to the destination and that the restore can be started. On the destination side it is necessary to start CRIU in 'lazy-pages' mode like this: # criu lazy-pages --page-server --address 192.168.122.3 --port 27 \ -D checkpoint and tell runC to do a lazy restore: # runc restore -d --image-path checkpoint --work-path checkpoint \ --lazy-pages httpd If both processes on the restore side have the same working directory 'criu lazy-pages' creates a unix domain socket where it waits for requests from the actual restore. runC starts CRIU restore in lazy restore mode and talks to 'criu lazy-pages' that it wants to restore memory pages on demand. CRIU continues to restore the process and once the process is running and accesses the first non-existing memory page the 'criu lazy-pages' server will request the page from the source system. Thus all pages from the source system will be transferred to the destination system. Once all pages have been transferred runC on the source system will end and the container will have finished migration. This can also be combined with CRIU's pre-copy support. The combination of pre-copy and post-copy (lazy migration) provides the possibility to migrate containers with minimal downtimes. Some additional background about post-copy migration can be found in these articles: https://lisas.de/~adrian/?p=1253 https://lisas.de/~adrian/?p=1183 Signed-off-by: Adrian Reber <[email protected]>
1 parent a3a632a commit 60ae709

File tree

4 files changed

+49
-0
lines changed

4 files changed

+49
-0
lines changed

checkpoint.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ checkpointed.`,
3030
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
3131
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
3232
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
33+
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
34+
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
3335
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
3436
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
3537
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},

libcontainer/container_linux.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,25 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
773773
}
774774
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
775775
}
776+
return nil
777+
}
778+
779+
func waitForCriuLazyServer(r *os.File, status string) error {
780+
781+
data := make([]byte, 1)
782+
_, err := r.Read(data)
783+
if err != nil {
784+
return err
785+
}
786+
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
787+
if err != nil {
788+
return err
789+
}
790+
_, err = fd.Write(data)
791+
if err != nil {
792+
return err
793+
}
794+
fd.Close()
776795

777796
return nil
778797
}
@@ -840,6 +859,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
840859
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
841860
OrphanPtsMaster: proto.Bool(true),
842861
AutoDedup: proto.Bool(criuOpts.AutoDedup),
862+
LazyPages: proto.Bool(criuOpts.LazyPages),
843863
}
844864

845865
fcg := c.cgroupManager.GetPaths()["freezer"]
@@ -890,6 +910,24 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
890910
Opts: &rpcOpts,
891911
}
892912

913+
if criuOpts.LazyPages {
914+
// lazy migration requested; check if criu supports it
915+
feat := criurpc.CriuFeatures{
916+
LazyPages: proto.Bool(true),
917+
}
918+
919+
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
920+
return err
921+
}
922+
923+
statusRead, statusWrite, err := os.Pipe()
924+
if err != nil {
925+
return err
926+
}
927+
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
928+
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
929+
}
930+
893931
//no need to dump these information in pre-dump
894932
if !criuOpts.PreDump {
895933
for _, m := range c.config.Mounts {
@@ -1042,6 +1080,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
10421080
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
10431081
OrphanPtsMaster: proto.Bool(true),
10441082
AutoDedup: proto.Bool(criuOpts.AutoDedup),
1083+
LazyPages: proto.Bool(criuOpts.LazyPages),
10451084
},
10461085
}
10471086

libcontainer/criu_opts_linux.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,6 @@ type CriuOpts struct {
3535
ManageCgroupsMode cgMode // dump or restore cgroup mode
3636
EmptyNs uint32 // don't c/r properties for namespace from this mask
3737
AutoDedup bool // auto deduplication for incremental dumps
38+
LazyPages bool // restore memory pages lazily using userfaultfd
39+
StatusFd string // fd for feedback when lazy server is ready
3840
}

restore.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ using the runc checkpoint command.`,
8686
Name: "auto-dedup",
8787
Usage: "enable auto deduplication of memory images",
8888
},
89+
cli.BoolFlag{
90+
Name: "lazy-pages",
91+
Usage: "use userfaultfd to lazily restore memory pages",
92+
},
8993
},
9094
Action: func(context *cli.Context) error {
9195
if err := checkArgs(context, 1, exactArgs); err != nil {
@@ -128,5 +132,7 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
128132
FileLocks: context.Bool("file-locks"),
129133
PreDump: context.Bool("pre-dump"),
130134
AutoDedup: context.Bool("auto-dedup"),
135+
LazyPages: context.Bool("lazy-pages"),
136+
StatusFd: context.String("status-fd"),
131137
}
132138
}

0 commit comments

Comments
 (0)