8
8
"context"
9
9
"fmt"
10
10
"io/ioutil"
11
+ "net"
11
12
"os"
12
13
"os/exec"
13
14
"os/signal"
@@ -17,11 +18,13 @@ import (
17
18
"time"
18
19
19
20
"github.com/gitpod-io/gitpod/common-go/log"
21
+ "github.com/gitpod-io/gitpod/workspacekit/pkg/seccomp"
20
22
daemonapi "github.com/gitpod-io/gitpod/ws-daemon/api"
21
23
22
24
"github.com/rootless-containers/rootlesskit/pkg/msgutil"
23
25
"github.com/rootless-containers/rootlesskit/pkg/sigproxy"
24
26
sigproxysignal "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal"
27
+ libseccomp "github.com/seccomp/libseccomp-golang"
25
28
"github.com/spf13/cobra"
26
29
"golang.org/x/sys/unix"
27
30
"google.golang.org/grpc"
@@ -36,6 +39,10 @@ const (
36
39
// This time must give ring1 enough time to shut down (see time budgets in supervisor.go),
37
40
// and to talk to ws-daemon within the terminationGracePeriod of the workspace pod.
38
41
ring1ShutdownTimeout = 20 * time .Second
42
+
43
+ // ring2StartupTimeout is the maximum time we wait between starting ring2 and its
44
+ // attempt to connect to the parent socket.
45
+ ring2StartupTimeout = 5 * time .Second
39
46
)
40
47
41
48
var ring0Cmd = & cobra.Command {
@@ -260,20 +267,20 @@ var ring1Cmd = &cobra.Command{
260
267
}
261
268
}
262
269
263
- pipeR , pipeW , err := os .Pipe ()
270
+ socketFN := filepath .Join (os .TempDir (), fmt .Sprintf ("workspacekit-ring1-%d.unix" , time .Now ().UnixNano ()))
271
+ skt , err := net .Listen ("unix" , socketFN )
264
272
if err != nil {
265
- log .WithError (err ).Error ("cannot mount create pipe " )
273
+ log .WithError (err ).Error ("cannot create socket for ring2 " )
266
274
failed = true
267
275
return
268
276
}
269
- defer pipeW .Close ()
277
+ defer skt .Close ()
270
278
271
- cmd := exec .Command ("/proc/self/exe" , "ring2" )
279
+ cmd := exec .Command ("/proc/self/exe" , "ring2" , socketFN )
272
280
cmd .SysProcAttr = & syscall.SysProcAttr {
273
281
Pdeathsig : syscall .SIGKILL ,
274
282
Cloneflags : syscall .CLONE_NEWNS | syscall .CLONE_NEWPID ,
275
283
}
276
- cmd .ExtraFiles = []* os.File {pipeR }
277
284
cmd .Dir = tmpdir
278
285
cmd .Stdin = os .Stdin
279
286
cmd .Stdout = os .Stdout
@@ -294,27 +301,107 @@ var ring1Cmd = &cobra.Command{
294
301
failed = true
295
302
return
296
303
}
297
- _ , err = client .MountProc (ctx , & daemonapi.MountProcRequest {
298
- Target : procLoc ,
299
- Pid : int64 (cmd .Process .Pid ),
304
+ resp , err := client .MountProc (ctx , & daemonapi.MountProcRequest {
305
+ Pid : int64 (cmd .Process .Pid ),
300
306
})
301
307
if err != nil {
302
308
log .WithError (err ).Error ("cannot mount proc" )
303
309
failed = true
304
310
return
305
311
}
306
312
313
+ // TODO(cw): this mount doesn't work because we need to be in the ring2 mount namespace.
314
+ // Use nsenter/mount handler to do this.
315
+ err = unix .Mount (resp .Location , procLoc , "" , unix .MS_MOVE , "" )
316
+ if err != nil {
317
+ log .WithError (err ).WithFields (map [string ]interface {}{"loc" : resp .Location , "dest" : procLoc }).Error ("cannot move proc mount" )
318
+ failed = true
319
+ return
320
+ }
321
+
322
+ incoming := make (chan net.Conn , 1 )
323
+ errc := make (chan error , 1 )
324
+ go func () {
325
+ defer close (incoming )
326
+ defer close (errc )
327
+
328
+ // Accept stops the latest when we close the socket.
329
+ c , err := skt .Accept ()
330
+ if err != nil {
331
+ errc <- err
332
+ return
333
+ }
334
+ incoming <- c
335
+ }()
336
+ var ring2Conn * net.UnixConn
337
+ for {
338
+ var brek bool
339
+ select {
340
+ case err = <- errc :
341
+ if err != nil {
342
+ brek = true
343
+ }
344
+ case c := <- incoming :
345
+ if c == nil {
346
+ continue
347
+ }
348
+ ring2Conn = c .(* net.UnixConn )
349
+ brek = true
350
+ case <- time .After (ring2StartupTimeout ):
351
+ err = fmt .Errorf ("ring2 did not connect in time" )
352
+ brek = true
353
+ }
354
+ if brek {
355
+ break
356
+ }
357
+ }
358
+ if err != nil {
359
+ log .WithError (err ).Error ("ring2 did not connect successfully" )
360
+ failed = true
361
+ return
362
+ }
363
+
307
364
log .Info ("signaling to child process" )
308
- _ , err = msgutil .MarshalToWriter (pipeW , ringSyncMsg {
365
+ _ , err = msgutil .MarshalToWriter (ring2Conn , ringSyncMsg {
309
366
Stage : 1 ,
310
367
Rootfs : tmpdir ,
311
368
})
312
369
if err != nil {
313
- log .WithError (err ).Error ("cannot send signal to child process" )
370
+ log .WithError (err ).Error ("cannot send ring sync msg to ring2" )
371
+ failed = true
372
+ return
373
+ }
374
+
375
+ log .Info ("awaiting seccomp fd" )
376
+ scmpfd , err := receiveSeccmpFd (ring2Conn )
377
+ if err != nil {
378
+ log .WithError (err ).Error ("did not receive seccomp fd from ring2" )
314
379
failed = true
315
380
return
316
381
}
317
382
383
+ if scmpfd == 0 {
384
+ log .Warn ("received 0 as ring2 seccomp fd - syscall handling is broken" )
385
+ } else {
386
+ stp , errchan := seccomp .Handle (scmpfd , cmd .Process .Pid , client )
387
+ defer close (stp )
388
+ go func () {
389
+ t := time .NewTicker (10 * time .Millisecond )
390
+ defer t .Stop ()
391
+ for {
392
+ // We use the ticker to rate-limit the errors from the syscall handler.
393
+ // We're only handling low-frequency syscalls (e.g. mount), and don't want
394
+ // the handler to hog the CPU because it fails on its fd.
395
+ <- t .C
396
+ err := <- errchan
397
+ if err == nil {
398
+ return
399
+ }
400
+ log .WithError (err ).Warn ("syscall handler error" )
401
+ }
402
+ }()
403
+ }
404
+
318
405
err = cmd .Wait ()
319
406
if err != nil {
320
407
log .WithError (err ).Error ("unexpected exit" )
@@ -324,12 +411,52 @@ var ring1Cmd = &cobra.Command{
324
411
},
325
412
}
326
413
414
+ func receiveSeccmpFd (conn * net.UnixConn ) (libseccomp.ScmpFd , error ) {
415
+ buf := make ([]byte , unix .CmsgSpace (4 ))
416
+
417
+ err := conn .SetDeadline (time .Now ().Add (5 * time .Second ))
418
+ if err != nil {
419
+ return 0 , err
420
+ }
421
+
422
+ f , err := conn .File ()
423
+ if err != nil {
424
+ return 0 , err
425
+ }
426
+ defer f .Close ()
427
+ connfd := int (f .Fd ())
428
+
429
+ _ , _ , _ , _ , err = unix .Recvmsg (connfd , nil , buf , 0 )
430
+ if err != nil {
431
+ return 0 , err
432
+ }
433
+
434
+ msgs , err := unix .ParseSocketControlMessage (buf )
435
+ if err != nil {
436
+ return 0 , err
437
+ }
438
+ if len (msgs ) != 1 {
439
+ return 0 , fmt .Errorf ("expected a single socket control message" )
440
+ }
441
+
442
+ fds , err := unix .ParseUnixRights (& msgs [0 ])
443
+ if err != nil {
444
+ return 0 , err
445
+ }
446
+ if len (fds ) == 0 {
447
+ return 0 , fmt .Errorf ("expected a single socket FD" )
448
+ }
449
+
450
+ return libseccomp .ScmpFd (fds [0 ]), nil
451
+ }
452
+
327
453
var ring2Opts struct {
328
454
SupervisorPath string
329
455
}
330
456
var ring2Cmd = & cobra.Command {
331
- Use : "ring2" ,
457
+ Use : "ring2 <ring1Socket> " ,
332
458
Short : "starts ring2" ,
459
+ Args : cobra .ExactArgs (1 ),
333
460
Run : func (_cmd * cobra.Command , args []string ) {
334
461
log .Init (ServiceName , Version , true , true )
335
462
log := log .WithField ("ring" , 2 )
@@ -343,13 +470,21 @@ var ring2Cmd = &cobra.Command{
343
470
sleepForDebugging ()
344
471
}()
345
472
346
- // wait for /proc by listening on the parent's pipe.
347
- // fd=3 is the pipe's FD passed in from the parent via extraFiles.
348
- pipeR := os .NewFile (uintptr (3 ), "" )
473
+ // we talk to ring1 using a Unix socket, so that we can send the seccomp fd across.
474
+ rconn , err := net .Dial ("unix" , args [0 ])
475
+ if err != nil {
476
+ log .WithError (err ).Error ("cannot connect to parent" )
477
+ failed = true
478
+ return
479
+ }
480
+ conn := rconn .(* net.UnixConn )
481
+ log .Info ("connected to parent socket" )
482
+
483
+ // Before we do anything, we wait for the parent to make /proc available to us.
349
484
var msg ringSyncMsg
350
- _ , err : = msgutil .UnmarshalFromReader (pipeR , & msg )
485
+ _ , err = msgutil .UnmarshalFromReader (conn , & msg )
351
486
if err != nil {
352
- log .WithError (err ).Error ("cannot read from parent pipe " )
487
+ log .WithError (err ).Error ("cannot read parent message " )
353
488
failed = true
354
489
return
355
490
}
@@ -366,6 +501,27 @@ var ring2Cmd = &cobra.Command{
366
501
return
367
502
}
368
503
504
+ // Now that we're in our new root filesystem, including proc and all, we can load
505
+ // our seccomp filter, and tell our parent about it.
506
+ scmpFd , err := seccomp .LoadFilter ()
507
+ if err != nil {
508
+ log .WithError (err ).Warn ("cannot load seccomp filter - syscall handling will be broken" )
509
+ }
510
+ connf , err := conn .File ()
511
+ if err != nil {
512
+ log .WithError (err ).Error ("cannot get parent socket fd" )
513
+ failed = true
514
+ return
515
+ }
516
+ sktfd := int (connf .Fd ())
517
+ err = unix .Sendmsg (sktfd , nil , unix .UnixRights (int (scmpFd )), nil , 0 )
518
+ connf .Close ()
519
+ if err != nil {
520
+ log .WithError (err ).Error ("cannot send seccomp fd" )
521
+ failed = true
522
+ return
523
+ }
524
+
369
525
err = cap .SetGroups (33333 )
370
526
if err != nil {
371
527
log .WithError (err ).Error ("cannot setgid" )
@@ -380,7 +536,7 @@ var ring2Cmd = &cobra.Command{
380
536
}
381
537
err = unix .Exec (ring2Opts .SupervisorPath , []string {"supervisor" , "run" , "--inns" }, os .Environ ())
382
538
if err != nil {
383
- log .WithError (err ).Error ("cannot exec" )
539
+ log .WithError (err ).WithField ( "cmd" , ring2Opts . SupervisorPath ). Error ("cannot exec" )
384
540
failed = true
385
541
return
386
542
}
@@ -497,11 +653,12 @@ func init() {
497
653
498
654
supervisorPath := os .Getenv ("GITPOD_WORKSPACEKIT_SUPERVISOR_PATH" )
499
655
if supervisorPath == "" {
500
- wd , err := os .Getwd ()
656
+ wd , err := os .Executable ()
501
657
if err == nil {
502
- supervisorPath = "supervisor"
503
- } else {
658
+ wd = filepath .Dir (wd )
504
659
supervisorPath = filepath .Join (wd , "supervisor" )
660
+ } else {
661
+ supervisorPath = "/.supervisor/supervisor"
505
662
}
506
663
}
507
664
0 commit comments