Skip to content

Commit 6394544

Browse files
author
Mrunal Patel
authored
Merge pull request #1355 from avagin/cr-console
Dump and restore containers with external terminals
2 parents 9a827e9 + 459a17b commit 6394544

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+12234
-1068
lines changed

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ RUN apt-get update && apt-get install -y \
1515
libcap-dev \
1616
libprotobuf-dev \
1717
libprotobuf-c0-dev \
18+
libnl-3-dev \
19+
libnet-dev \
1820
libseccomp2/jessie-backports \
1921
libseccomp-dev/jessie-backports \
2022
protobuf-c-compiler \
@@ -38,7 +40,7 @@ RUN cd /tmp \
3840
&& rm -rf /tmp/bats
3941

4042
# install criu
41-
ENV CRIU_VERSION 1.7
43+
ENV CRIU_VERSION 2.12
4244
RUN mkdir -p /usr/src/criu \
4345
&& curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
4446
&& cd /usr/src/criu \

create.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ command(s) that get executed on start, edit the args parameter of the spec. See
6262
if err != nil {
6363
return err
6464
}
65-
status, err := startContainer(context, spec, true)
65+
status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
6666
if err != nil {
6767
return err
6868
}

exec.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ func execProcess(context *cli.Context) (int, error) {
135135
consoleSocket: context.String("console-socket"),
136136
detach: detach,
137137
pidFile: context.String("pid-file"),
138+
action: CT_ACT_RUN,
138139
}
139140
return r.run(p)
140141
}

libcontainer/container_linux.go

Lines changed: 81 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"fmt"
99
"io"
1010
"io/ioutil"
11+
"net"
1112
"os"
1213
"os/exec"
1314
"path/filepath"
@@ -17,6 +18,8 @@ import (
1718
"syscall"
1819
"time"
1920

21+
"golang.org/x/sys/unix"
22+
2023
"github.com/Sirupsen/logrus"
2124
"github.com/golang/protobuf/proto"
2225
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -637,7 +640,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion string) error {
637640
c.criuVersion = x*10000 + y*100 + z
638641

639642
if c.criuVersion < versionReq {
640-
return fmt.Errorf("CRIU version must be %s or higher", minVersion)
643+
return fmt.Errorf("CRIU version %d must be %d or higher", c.criuVersion, versionReq)
641644
}
642645

643646
return nil
@@ -727,20 +730,26 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
727730
defer imageDir.Close()
728731

729732
rpcOpts := criurpc.CriuOpts{
730-
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
731-
WorkDirFd: proto.Int32(int32(workDir.Fd())),
732-
LogLevel: proto.Int32(4),
733-
LogFile: proto.String("dump.log"),
734-
Root: proto.String(c.config.Rootfs),
735-
ManageCgroups: proto.Bool(true),
736-
NotifyScripts: proto.Bool(true),
737-
Pid: proto.Int32(int32(c.initProcess.pid())),
738-
ShellJob: proto.Bool(criuOpts.ShellJob),
739-
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
740-
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
741-
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
742-
FileLocks: proto.Bool(criuOpts.FileLocks),
743-
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
733+
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
734+
WorkDirFd: proto.Int32(int32(workDir.Fd())),
735+
LogLevel: proto.Int32(4),
736+
LogFile: proto.String("dump.log"),
737+
Root: proto.String(c.config.Rootfs),
738+
ManageCgroups: proto.Bool(true),
739+
NotifyScripts: proto.Bool(true),
740+
Pid: proto.Int32(int32(c.initProcess.pid())),
741+
ShellJob: proto.Bool(criuOpts.ShellJob),
742+
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
743+
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
744+
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
745+
FileLocks: proto.Bool(criuOpts.FileLocks),
746+
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
747+
OrphanPtsMaster: proto.Bool(true),
748+
}
749+
750+
fcg := c.cgroupManager.GetPaths()["freezer"]
751+
if fcg != "" {
752+
rpcOpts.FreezeCgroup = proto.String(fcg)
744753
}
745754

746755
// append optional criu opts, e.g., page-server and port
@@ -923,20 +932,21 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
923932
req := &criurpc.CriuReq{
924933
Type: &t,
925934
Opts: &criurpc.CriuOpts{
926-
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
927-
WorkDirFd: proto.Int32(int32(workDir.Fd())),
928-
EvasiveDevices: proto.Bool(true),
929-
LogLevel: proto.Int32(4),
930-
LogFile: proto.String("restore.log"),
931-
RstSibling: proto.Bool(true),
932-
Root: proto.String(root),
933-
ManageCgroups: proto.Bool(true),
934-
NotifyScripts: proto.Bool(true),
935-
ShellJob: proto.Bool(criuOpts.ShellJob),
936-
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
937-
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
938-
FileLocks: proto.Bool(criuOpts.FileLocks),
939-
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
935+
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
936+
WorkDirFd: proto.Int32(int32(workDir.Fd())),
937+
EvasiveDevices: proto.Bool(true),
938+
LogLevel: proto.Int32(4),
939+
LogFile: proto.String("restore.log"),
940+
RstSibling: proto.Bool(true),
941+
Root: proto.String(root),
942+
ManageCgroups: proto.Bool(true),
943+
NotifyScripts: proto.Bool(true),
944+
ShellJob: proto.Bool(criuOpts.ShellJob),
945+
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
946+
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
947+
FileLocks: proto.Bool(criuOpts.FileLocks),
948+
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
949+
OrphanPtsMaster: proto.Bool(true),
940950
},
941951
}
942952

@@ -1030,15 +1040,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
10301040
}
10311041

10321042
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
1033-
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
1043+
fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
10341044
if err != nil {
10351045
return err
10361046
}
10371047

10381048
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
10391049
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
1050+
criuClientFileCon, err := net.FileConn(criuClient)
1051+
criuClient.Close()
1052+
if err != nil {
1053+
return err
1054+
}
1055+
1056+
criuClientCon := criuClientFileCon.(*net.UnixConn)
1057+
defer criuClientCon.Close()
1058+
10401059
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
1041-
defer criuClient.Close()
10421060
defer criuServer.Close()
10431061

10441062
args := []string{"swrk", "3"}
@@ -1058,7 +1076,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
10581076
criuServer.Close()
10591077

10601078
defer func() {
1061-
criuClient.Close()
1079+
criuClientCon.Close()
10621080
_, err := cmd.Process.Wait()
10631081
if err != nil {
10641082
return
@@ -1101,14 +1119,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11011119
if err != nil {
11021120
return err
11031121
}
1104-
_, err = criuClient.Write(data)
1122+
_, err = criuClientCon.Write(data)
11051123
if err != nil {
11061124
return err
11071125
}
11081126

11091127
buf := make([]byte, 10*4096)
1128+
oob := make([]byte, 4096)
11101129
for true {
1111-
n, err := criuClient.Read(buf)
1130+
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
11121131
if err != nil {
11131132
return err
11141133
}
@@ -1136,7 +1155,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11361155
criuFeatures = resp.GetFeatures()
11371156
break
11381157
case t == criurpc.CriuReqType_NOTIFY:
1139-
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
1158+
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
11401159
return err
11411160
}
11421161
t = criurpc.CriuReqType_NOTIFY
@@ -1148,45 +1167,37 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11481167
if err != nil {
11491168
return err
11501169
}
1151-
_, err = criuClient.Write(data)
1170+
_, err = criuClientCon.Write(data)
11521171
if err != nil {
11531172
return err
11541173
}
11551174
continue
11561175
case t == criurpc.CriuReqType_RESTORE:
11571176
case t == criurpc.CriuReqType_DUMP:
1158-
break
11591177
case t == criurpc.CriuReqType_PRE_DUMP:
1160-
// In pre-dump mode CRIU is in a loop and waits for
1161-
// the final DUMP command.
1162-
// The current runc pre-dump approach, however, is
1163-
// start criu in PRE_DUMP once for a single pre-dump
1164-
// and not the whole series of pre-dump, pre-dump, ...m, dump
1165-
// If we got the message CriuReqType_PRE_DUMP it means
1166-
// CRIU was successful and we need to forcefully stop CRIU
1167-
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
1168-
criuClient.Close()
1169-
// Process status won't be success, because one end of sockets is closed
1170-
_, err := cmd.Process.Wait()
1171-
if err != nil {
1172-
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
1173-
return err
1174-
}
1175-
return nil
11761178
default:
11771179
return fmt.Errorf("unable to parse the response %s", resp.String())
11781180
}
11791181

11801182
break
11811183
}
11821184

1185+
criuClientCon.CloseWrite()
11831186
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
11841187
// Here we want to wait only the CRIU process.
11851188
st, err := cmd.Process.Wait()
11861189
if err != nil {
11871190
return err
11881191
}
1189-
if !st.Success() {
1192+
1193+
// In pre-dump mode CRIU is in a loop and waits for
1194+
// the final DUMP command.
1195+
// The current runc pre-dump approach, however, is
1196+
// start criu in PRE_DUMP once for a single pre-dump
1197+
// and not the whole series of pre-dump, pre-dump, ...m, dump
1198+
// If we got the message CriuReqType_PRE_DUMP it means
1199+
// CRIU was successful and we need to forcefully stop CRIU
1200+
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
11901201
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
11911202
}
11921203
return nil
@@ -1220,11 +1231,12 @@ func unlockNetwork(config *configs.Config) error {
12201231
return nil
12211232
}
12221233

1223-
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
1234+
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
12241235
notify := resp.GetNotify()
12251236
if notify == nil {
12261237
return fmt.Errorf("invalid response: %s", resp.String())
12271238
}
1239+
logrus.Debugf("notify: %s\n", notify.GetScript())
12281240
switch {
12291241
case notify.GetScript() == "post-dump":
12301242
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
@@ -1277,6 +1289,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
12771289
logrus.Error(err)
12781290
}
12791291
}
1292+
case notify.GetScript() == "orphan-pts-master":
1293+
scm, err := syscall.ParseSocketControlMessage(oob)
1294+
if err != nil {
1295+
return err
1296+
}
1297+
fds, err := syscall.ParseUnixRights(&scm[0])
1298+
1299+
master := os.NewFile(uintptr(fds[0]), "orphan-pts-master")
1300+
defer master.Close()
1301+
1302+
// While we can access console.master, using the API is a good idea.
1303+
if err := utils.SendFd(process.ConsoleSocket, master); err != nil {
1304+
return err
1305+
}
12801306
}
12811307
return nil
12821308
}

0 commit comments

Comments
 (0)