88 "fmt"
99 "io"
1010 "io/ioutil"
11+ "net"
1112 "os"
1213 "os/exec"
1314 "path/filepath"
@@ -17,6 +18,8 @@ import (
1718 "syscall"
1819 "time"
1920
21+ "golang.org/x/sys/unix"
22+
2023 "github.com/Sirupsen/logrus"
2124 "github.com/golang/protobuf/proto"
2225 "github.com/opencontainers/runc/libcontainer/cgroups"
@@ -727,20 +730,21 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
727730 defer imageDir .Close ()
728731
729732 rpcOpts := criurpc.CriuOpts {
730- ImagesDirFd : proto .Int32 (int32 (imageDir .Fd ())),
731- WorkDirFd : proto .Int32 (int32 (workDir .Fd ())),
732- LogLevel : proto .Int32 (4 ),
733- LogFile : proto .String ("dump.log" ),
734- Root : proto .String (c .config .Rootfs ),
735- ManageCgroups : proto .Bool (true ),
736- NotifyScripts : proto .Bool (true ),
737- Pid : proto .Int32 (int32 (c .initProcess .pid ())),
738- ShellJob : proto .Bool (criuOpts .ShellJob ),
739- LeaveRunning : proto .Bool (criuOpts .LeaveRunning ),
740- TcpEstablished : proto .Bool (criuOpts .TcpEstablished ),
741- ExtUnixSk : proto .Bool (criuOpts .ExternalUnixConnections ),
742- FileLocks : proto .Bool (criuOpts .FileLocks ),
743- EmptyNs : proto .Uint32 (criuOpts .EmptyNs ),
733+ ImagesDirFd : proto .Int32 (int32 (imageDir .Fd ())),
734+ WorkDirFd : proto .Int32 (int32 (workDir .Fd ())),
735+ LogLevel : proto .Int32 (4 ),
736+ LogFile : proto .String ("dump.log" ),
737+ Root : proto .String (c .config .Rootfs ),
738+ ManageCgroups : proto .Bool (true ),
739+ NotifyScripts : proto .Bool (true ),
740+ Pid : proto .Int32 (int32 (c .initProcess .pid ())),
741+ ShellJob : proto .Bool (criuOpts .ShellJob ),
742+ LeaveRunning : proto .Bool (criuOpts .LeaveRunning ),
743+ TcpEstablished : proto .Bool (criuOpts .TcpEstablished ),
744+ ExtUnixSk : proto .Bool (criuOpts .ExternalUnixConnections ),
745+ FileLocks : proto .Bool (criuOpts .FileLocks ),
746+ EmptyNs : proto .Uint32 (criuOpts .EmptyNs ),
747+ OrphanPtsMaster : proto .Bool (true ),
744748 }
745749
746750 // append optional criu opts, e.g., page-server and port
@@ -923,20 +927,21 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
923927 req := & criurpc.CriuReq {
924928 Type : & t ,
925929 Opts : & criurpc.CriuOpts {
926- ImagesDirFd : proto .Int32 (int32 (imageDir .Fd ())),
927- WorkDirFd : proto .Int32 (int32 (workDir .Fd ())),
928- EvasiveDevices : proto .Bool (true ),
929- LogLevel : proto .Int32 (4 ),
930- LogFile : proto .String ("restore.log" ),
931- RstSibling : proto .Bool (true ),
932- Root : proto .String (root ),
933- ManageCgroups : proto .Bool (true ),
934- NotifyScripts : proto .Bool (true ),
935- ShellJob : proto .Bool (criuOpts .ShellJob ),
936- ExtUnixSk : proto .Bool (criuOpts .ExternalUnixConnections ),
937- TcpEstablished : proto .Bool (criuOpts .TcpEstablished ),
938- FileLocks : proto .Bool (criuOpts .FileLocks ),
939- EmptyNs : proto .Uint32 (criuOpts .EmptyNs ),
930+ ImagesDirFd : proto .Int32 (int32 (imageDir .Fd ())),
931+ WorkDirFd : proto .Int32 (int32 (workDir .Fd ())),
932+ EvasiveDevices : proto .Bool (true ),
933+ LogLevel : proto .Int32 (4 ),
934+ LogFile : proto .String ("restore.log" ),
935+ RstSibling : proto .Bool (true ),
936+ Root : proto .String (root ),
937+ ManageCgroups : proto .Bool (true ),
938+ NotifyScripts : proto .Bool (true ),
939+ ShellJob : proto .Bool (criuOpts .ShellJob ),
940+ ExtUnixSk : proto .Bool (criuOpts .ExternalUnixConnections ),
941+ TcpEstablished : proto .Bool (criuOpts .TcpEstablished ),
942+ FileLocks : proto .Bool (criuOpts .FileLocks ),
943+ EmptyNs : proto .Uint32 (criuOpts .EmptyNs ),
944+ OrphanPtsMaster : proto .Bool (true ),
940945 },
941946 }
942947
@@ -1030,15 +1035,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
10301035}
10311036
10321037func (c * linuxContainer ) criuSwrk (process * Process , req * criurpc.CriuReq , opts * CriuOpts , applyCgroups bool ) error {
1033- fds , err := syscall .Socketpair (syscall .AF_LOCAL , syscall .SOCK_SEQPACKET | syscall .SOCK_CLOEXEC , 0 )
1038+ fds , err := unix .Socketpair (syscall .AF_LOCAL , syscall .SOCK_SEQPACKET | syscall .SOCK_CLOEXEC , 0 )
10341039 if err != nil {
10351040 return err
10361041 }
10371042
10381043 logPath := filepath .Join (opts .WorkDirectory , req .GetOpts ().GetLogFile ())
10391044 criuClient := os .NewFile (uintptr (fds [0 ]), "criu-transport-client" )
1045+ criuClientFileCon , err := net .FileConn (criuClient )
1046+ criuClient .Close ()
1047+ if err != nil {
1048+ return err
1049+ }
1050+
1051+ criuClientCon := criuClientFileCon .(* net.UnixConn )
1052+ defer criuClientCon .Close ()
1053+
10401054 criuServer := os .NewFile (uintptr (fds [1 ]), "criu-transport-server" )
1041- defer criuClient .Close ()
10421055 defer criuServer .Close ()
10431056
10441057 args := []string {"swrk" , "3" }
@@ -1058,7 +1071,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
10581071 criuServer .Close ()
10591072
10601073 defer func () {
1061- criuClient .Close ()
1074+ criuClientCon .Close ()
10621075 _ , err := cmd .Process .Wait ()
10631076 if err != nil {
10641077 return
@@ -1101,14 +1114,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11011114 if err != nil {
11021115 return err
11031116 }
1104- _ , err = criuClient .Write (data )
1117+ _ , err = criuClientCon .Write (data )
11051118 if err != nil {
11061119 return err
11071120 }
11081121
11091122 buf := make ([]byte , 10 * 4096 )
1123+ oob := make ([]byte , 4096 )
11101124 for true {
1111- n , err := criuClient . Read (buf )
1125+ n , oobn , _ , _ , err := criuClientCon . ReadMsgUnix (buf , oob )
11121126 if err != nil {
11131127 return err
11141128 }
@@ -1136,7 +1150,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11361150 criuFeatures = resp .GetFeatures ()
11371151 break
11381152 case t == criurpc .CriuReqType_NOTIFY :
1139- if err := c .criuNotifications (resp , process , opts , extFds ); err != nil {
1153+ if err := c .criuNotifications (resp , process , opts , extFds , oob [: oobn ] ); err != nil {
11401154 return err
11411155 }
11421156 t = criurpc .CriuReqType_NOTIFY
@@ -1148,45 +1162,37 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
11481162 if err != nil {
11491163 return err
11501164 }
1151- _ , err = criuClient .Write (data )
1165+ _ , err = criuClientCon .Write (data )
11521166 if err != nil {
11531167 return err
11541168 }
11551169 continue
11561170 case t == criurpc .CriuReqType_RESTORE :
11571171 case t == criurpc .CriuReqType_DUMP :
1158- break
11591172 case t == criurpc .CriuReqType_PRE_DUMP :
1160- // In pre-dump mode CRIU is in a loop and waits for
1161- // the final DUMP command.
1162- // The current runc pre-dump approach, however, is
1163- // start criu in PRE_DUMP once for a single pre-dump
1164- // and not the whole series of pre-dump, pre-dump, ...m, dump
1165- // If we got the message CriuReqType_PRE_DUMP it means
1166- // CRIU was successful and we need to forcefully stop CRIU
1167- logrus .Debugf ("PRE_DUMP finished. Send close signal to CRIU service" )
1168- criuClient .Close ()
1169- // Process status won't be success, because one end of sockets is closed
1170- _ , err := cmd .Process .Wait ()
1171- if err != nil {
1172- logrus .Debugf ("After PRE_DUMP CRIU exiting failed" )
1173- return err
1174- }
1175- return nil
11761173 default :
11771174 return fmt .Errorf ("unable to parse the response %s" , resp .String ())
11781175 }
11791176
11801177 break
11811178 }
11821179
1180+ criuClientCon .CloseWrite ()
11831181 // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
11841182 // Here we want to wait only the CRIU process.
11851183 st , err := cmd .Process .Wait ()
11861184 if err != nil {
11871185 return err
11881186 }
1189- if ! st .Success () {
1187+
1188+ // In pre-dump mode CRIU is in a loop and waits for
1189+ // the final DUMP command.
1190+ // The current runc pre-dump approach, however, is
1191+ // start criu in PRE_DUMP once for a single pre-dump
1192+ // and not the whole series of pre-dump, pre-dump, ...m, dump
1193+ // If we got the message CriuReqType_PRE_DUMP it means
1194+ // CRIU was successful and we need to forcefully stop CRIU
1195+ if ! st .Success () && * req .Type != criurpc .CriuReqType_PRE_DUMP {
11901196 return fmt .Errorf ("criu failed: %s\n log file: %s" , st .String (), logPath )
11911197 }
11921198 return nil
@@ -1220,11 +1226,12 @@ func unlockNetwork(config *configs.Config) error {
12201226 return nil
12211227}
12221228
1223- func (c * linuxContainer ) criuNotifications (resp * criurpc.CriuResp , process * Process , opts * CriuOpts , fds []string ) error {
1229+ func (c * linuxContainer ) criuNotifications (resp * criurpc.CriuResp , process * Process , opts * CriuOpts , fds []string , oob [] byte ) error {
12241230 notify := resp .GetNotify ()
12251231 if notify == nil {
12261232 return fmt .Errorf ("invalid response: %s" , resp .String ())
12271233 }
1234+ logrus .Debugf ("notify: %s\n " , notify .GetScript ())
12281235 switch {
12291236 case notify .GetScript () == "post-dump" :
12301237 f , err := os .Create (filepath .Join (c .root , "checkpoint" ))
@@ -1277,6 +1284,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
12771284 logrus .Error (err )
12781285 }
12791286 }
1287+ case notify .GetScript () == "orphan-pts-master" :
1288+ scm , err := syscall .ParseSocketControlMessage (oob )
1289+ if err != nil {
1290+ return err
1291+ }
1292+ fds , err := syscall .ParseUnixRights (& scm [0 ])
1293+
1294+ master := os .NewFile (uintptr (fds [0 ]), "orphan-pts-master" )
1295+ defer master .Close ()
1296+
1297+ // While we can access console.master, using the API is a good idea.
1298+ if err := utils .SendFd (process .ConsoleSocket , master ); err != nil {
1299+ return err
1300+ }
12801301 }
12811302 return nil
12821303}
0 commit comments