Skip to content

Commit 1c43d09

Browse files
committed
checkpoint: add support for containers with terminals
CRIU was extended to report about orphaned master pty-s via RPC. Signed-off-by: Andrei Vagin <[email protected]>
1 parent a4fcbfb commit 1c43d09

File tree

2 files changed

+80
-54
lines changed

2 files changed

+80
-54
lines changed

libcontainer/container_linux.go

Lines changed: 75 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"fmt"
99
"io"
1010
"io/ioutil"
11+
"net"
1112
"os"
1213
"os/exec"
1314
"path/filepath"
@@ -17,6 +18,8 @@ import (
1718
"syscall"
1819
"time"
1920

21+
"golang.org/x/sys/unix"
22+
2023
"github.com/Sirupsen/logrus"
2124
"github.com/golang/protobuf/proto"
2225
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -727,20 +730,21 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
727730
defer imageDir.Close()
728731

729732
rpcOpts := criurpc.CriuOpts{
730-
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
731-
WorkDirFd: proto.Int32(int32(workDir.Fd())),
732-
LogLevel: proto.Int32(4),
733-
LogFile: proto.String("dump.log"),
734-
Root: proto.String(c.config.Rootfs),
735-
ManageCgroups: proto.Bool(true),
736-
NotifyScripts: proto.Bool(true),
737-
Pid: proto.Int32(int32(c.initProcess.pid())),
738-
ShellJob: proto.Bool(criuOpts.ShellJob),
739-
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
740-
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
741-
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
742-
FileLocks: proto.Bool(criuOpts.FileLocks),
743-
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
733+
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
734+
WorkDirFd: proto.Int32(int32(workDir.Fd())),
735+
LogLevel: proto.Int32(4),
736+
LogFile: proto.String("dump.log"),
737+
Root: proto.String(c.config.Rootfs),
738+
ManageCgroups: proto.Bool(true),
739+
NotifyScripts: proto.Bool(true),
740+
Pid: proto.Int32(int32(c.initProcess.pid())),
741+
ShellJob: proto.Bool(criuOpts.ShellJob),
742+
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
743+
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
744+
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
745+
FileLocks: proto.Bool(criuOpts.FileLocks),
746+
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
747+
OrphanPtsMaster: proto.Bool(true),
744748
}
745749

746750
// append optional criu opts, e.g., page-server and port
@@ -923,20 +927,21 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
923927
req := &criurpc.CriuReq{
924928
Type: &t,
925929
Opts: &criurpc.CriuOpts{
926-
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
927-
WorkDirFd: proto.Int32(int32(workDir.Fd())),
928-
EvasiveDevices: proto.Bool(true),
929-
LogLevel: proto.Int32(4),
930-
LogFile: proto.String("restore.log"),
931-
RstSibling: proto.Bool(true),
932-
Root: proto.String(root),
933-
ManageCgroups: proto.Bool(true),
934-
NotifyScripts: proto.Bool(true),
935-
ShellJob: proto.Bool(criuOpts.ShellJob),
936-
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
937-
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
938-
FileLocks: proto.Bool(criuOpts.FileLocks),
939-
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
930+
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
931+
WorkDirFd: proto.Int32(int32(workDir.Fd())),
932+
EvasiveDevices: proto.Bool(true),
933+
LogLevel: proto.Int32(4),
934+
LogFile: proto.String("restore.log"),
935+
RstSibling: proto.Bool(true),
936+
Root: proto.String(root),
937+
ManageCgroups: proto.Bool(true),
938+
NotifyScripts: proto.Bool(true),
939+
ShellJob: proto.Bool(criuOpts.ShellJob),
940+
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
941+
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
942+
FileLocks: proto.Bool(criuOpts.FileLocks),
943+
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
944+
OrphanPtsMaster: proto.Bool(true),
940945
},
941946
}
942947

@@ -1030,15 +1035,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
10301035
}
10311036

10321037
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
1033-
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
1038+
fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
10341039
if err != nil {
10351040
return err
10361041
}
10371042

10381043
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
10391044
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
1045+
criuClientFileCon, err := net.FileConn(criuClient)
1046+
criuClient.Close()
1047+
if err != nil {
1048+
return err
1049+
}
1050+
1051+
criuClientCon := criuClientFileCon.(*net.UnixConn)
1052+
defer criuClientCon.Close()
1053+
10401054
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
1041-
defer criuClient.Close()
10421055
defer criuServer.Close()
10431056

10441057
args := []string{"swrk", "3"}
@@ -1058,7 +1071,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
10581071
criuServer.Close()
10591072

10601073
defer func() {
1061-
criuClient.Close()
1074+
criuClientCon.Close()
10621075
_, err := cmd.Process.Wait()
10631076
if err != nil {
10641077
return
@@ -1101,14 +1114,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11011114
if err != nil {
11021115
return err
11031116
}
1104-
_, err = criuClient.Write(data)
1117+
_, err = criuClientCon.Write(data)
11051118
if err != nil {
11061119
return err
11071120
}
11081121

11091122
buf := make([]byte, 10*4096)
1123+
oob := make([]byte, 4096)
11101124
for true {
1111-
n, err := criuClient.Read(buf)
1125+
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
11121126
if err != nil {
11131127
return err
11141128
}
@@ -1136,7 +1150,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11361150
criuFeatures = resp.GetFeatures()
11371151
break
11381152
case t == criurpc.CriuReqType_NOTIFY:
1139-
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
1153+
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
11401154
return err
11411155
}
11421156
t = criurpc.CriuReqType_NOTIFY
@@ -1148,45 +1162,37 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11481162
if err != nil {
11491163
return err
11501164
}
1151-
_, err = criuClient.Write(data)
1165+
_, err = criuClientCon.Write(data)
11521166
if err != nil {
11531167
return err
11541168
}
11551169
continue
11561170
case t == criurpc.CriuReqType_RESTORE:
11571171
case t == criurpc.CriuReqType_DUMP:
1158-
break
11591172
case t == criurpc.CriuReqType_PRE_DUMP:
1160-
// In pre-dump mode CRIU is in a loop and waits for
1161-
// the final DUMP command.
1162-
// The current runc pre-dump approach, however, is
1163-
// start criu in PRE_DUMP once for a single pre-dump
1164-
// and not the whole series of pre-dump, pre-dump, ...m, dump
1165-
// If we got the message CriuReqType_PRE_DUMP it means
1166-
// CRIU was successful and we need to forcefully stop CRIU
1167-
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
1168-
criuClient.Close()
1169-
// Process status won't be success, because one end of sockets is closed
1170-
_, err := cmd.Process.Wait()
1171-
if err != nil {
1172-
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
1173-
return err
1174-
}
1175-
return nil
11761173
default:
11771174
return fmt.Errorf("unable to parse the response %s", resp.String())
11781175
}
11791176

11801177
break
11811178
}
11821179

1180+
criuClientCon.CloseWrite()
11831181
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
11841182
// Here we want to wait only the CRIU process.
11851183
st, err := cmd.Process.Wait()
11861184
if err != nil {
11871185
return err
11881186
}
1189-
if !st.Success() {
1187+
1188+
// In pre-dump mode CRIU is in a loop and waits for
1189+
// the final DUMP command.
1190+
// The current runc pre-dump approach, however, is
1191+
// start criu in PRE_DUMP once for a single pre-dump
1192+
// and not the whole series of pre-dump, pre-dump, ...m, dump
1193+
// If we got the message CriuReqType_PRE_DUMP it means
1194+
// CRIU was successful and we need to forcefully stop CRIU
1195+
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
11901196
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
11911197
}
11921198
return nil
@@ -1220,11 +1226,12 @@ func unlockNetwork(config *configs.Config) error {
12201226
return nil
12211227
}
12221228

1223-
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
1229+
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
12241230
notify := resp.GetNotify()
12251231
if notify == nil {
12261232
return fmt.Errorf("invalid response: %s", resp.String())
12271233
}
1234+
logrus.Debugf("notify: %s\n", notify.GetScript())
12281235
switch {
12291236
case notify.GetScript() == "post-dump":
12301237
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
@@ -1277,6 +1284,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
12771284
logrus.Error(err)
12781285
}
12791286
}
1287+
case notify.GetScript() == "orphan-pts-master":
1288+
scm, err := syscall.ParseSocketControlMessage(oob)
1289+
if err != nil {
1290+
return err
1291+
}
1292+
fds, err := syscall.ParseUnixRights(&scm[0])
1293+
1294+
master := os.NewFile(uintptr(fds[0]), "orphan-pts-master")
1295+
defer master.Close()
1296+
1297+
// While we can access console.master, using the API is a good idea.
1298+
if err := utils.SendFd(process.ConsoleSocket, master); err != nil {
1299+
return err
1300+
}
12801301
}
12811302
return nil
12821303
}

restore.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ restored.`,
2020
Description: `Restores the saved state of the container instance that was previously saved
2121
using the runc checkpoint command.`,
2222
Flags: []cli.Flag{
23+
cli.StringFlag{
24+
Name: "console-socket",
25+
Value: "",
26+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
27+
},
2328
cli.StringFlag{
2429
Name: "image-path",
2530
Value: "",

0 commit comments

Comments
 (0)