7
7
"os"
8
8
"os/exec"
9
9
"path/filepath"
10
+ "strconv"
10
11
"sync"
11
12
"syscall"
12
13
"time"
@@ -429,25 +430,129 @@ func (s *forwardIO) Stderr() io.ReadCloser {
429
430
return nil
430
431
}
431
432
432
- // procHandle is to track the os process so we can send signals to it.
433
+ // newRuncProcKiller returns an abstraction for sending SIGKILL to the
434
+ // process inside the container initiated from `runc run`.
435
+ func newRunProcKiller (runC * runc.Runc , id string ) procKiller {
436
+ return procKiller {runC : runC , id : id }
437
+ }
438
+
439
+ // newExecProcKiller returns an abstraction for sending SIGKILL to the
440
+ // process inside the container initiated from `runc exec`.
441
+ func newExecProcKiller (runC * runc.Runc , id string ) (procKiller , error ) {
442
+ // for `runc exec` we need to create a pidfile and read it later to kill
443
+ // the process
444
+ tdir , err := os .MkdirTemp ("" , "runc" )
445
+ if err != nil {
446
+ return procKiller {}, errors .Wrap (err , "failed to create directory for runc pidfile" )
447
+ }
448
+
449
+ return procKiller {
450
+ runC : runC ,
451
+ id : id ,
452
+ pidfile : filepath .Join (tdir , "pidfile" ),
453
+ cleanup : func () {
454
+ os .RemoveAll (tdir )
455
+ },
456
+ }, nil
457
+ }
458
+
459
+ type procKiller struct {
460
+ runC * runc.Runc
461
+ id string
462
+ pidfile string
463
+ cleanup func ()
464
+ }
465
+
466
+ // Cleanup will delete any tmp files created for the pidfile allocation
467
+ // if this killer was for a `runc exec` process.
468
+ func (k procKiller ) Cleanup () {
469
+ if k .cleanup != nil {
470
+ k .cleanup ()
471
+ }
472
+ }
473
+
474
+ // Kill will send SIGKILL to the process running inside the container.
475
+ // If the process was created by `runc run` then we will use `runc kill`,
476
+ // otherwise for `runc exec` we will read the pid from a pidfile and then
477
+ // send the signal directly that process.
478
+ func (k procKiller ) Kill (ctx context.Context ) (err error ) {
479
+ bklog .G (ctx ).Debugf ("sending sigkill to process in container %s" , k .id )
480
+ defer func () {
481
+ if err != nil {
482
+ bklog .G (ctx ).Errorf ("failed to kill process in container id %s: %+v" , k .id , err )
483
+ }
484
+ }()
485
+
486
+ // this timeout is generally a no-op, the Kill ctx should already have a
487
+ // shorter timeout but here as a fail-safe for future refactoring.
488
+ ctx , timeout := context .WithTimeout (ctx , 10 * time .Second )
489
+ defer timeout ()
490
+
491
+ if k .pidfile == "" {
492
+ // for `runc run` process we use `runc kill` to terminate the process
493
+ return k .runC .Kill (ctx , k .id , int (syscall .SIGKILL ), nil )
494
+ }
495
+
496
+ // `runc exec` will write the pidfile a few milliseconds after we
497
+ // get the runc pid via the startedCh, so we might need to retry until
498
+ // it appears in the edge case where we want to kill a process
499
+ // immediately after it was created.
500
+ var pidData []byte
501
+ for {
502
+ pidData , err = os .ReadFile (k .pidfile )
503
+ if err != nil {
504
+ if os .IsNotExist (err ) {
505
+ select {
506
+ case <- ctx .Done ():
507
+ return errors .New ("context cancelled before runc wrote pidfile" )
508
+ case <- time .After (10 * time .Millisecond ):
509
+ continue
510
+ }
511
+ }
512
+ return errors .Wrap (err , "failed to read pidfile from runc" )
513
+ }
514
+ break
515
+ }
516
+ pid , err := strconv .Atoi (string (pidData ))
517
+ if err != nil {
518
+ return errors .Wrap (err , "read invalid pid from pidfile" )
519
+ }
520
+ process , err := os .FindProcess (pid )
521
+ if err != nil {
522
+ // error only possible on non-unix hosts
523
+ return errors .Wrapf (err , "failed to find process for pid %d from pidfile" , pid )
524
+ }
525
+ defer process .Release ()
526
+ return process .Signal (syscall .SIGKILL )
527
+ }
528
+
529
+ // procHandle is to track the process so we can send signals to it
530
+ // and handle graceful shutdown.
433
531
type procHandle struct {
434
- Process * os.Process
435
- ready chan struct {}
436
- ended chan struct {}
437
- shutdown func ()
532
+ // this is for the runc process (not the process in-container)
533
+ monitorProcess * os.Process
534
+ ready chan struct {}
535
+ ended chan struct {}
536
+ shutdown func ()
537
+ // this this only used when the request context is canceled and we need
538
+ // to kill the in-container process.
539
+ killer procKiller
438
540
}
439
541
440
542
// runcProcessHandle will create a procHandle that will be monitored, where
441
- // on ctx.Done the process will be killed. If the kill fails, then the cancel
442
- // will be called. This is to allow for runc to go through its normal shutdown
443
- // procedure if the ctx is canceled and to ensure there are no zombie processes
444
- // left by runc.
445
- func runcProcessHandle (ctx context.Context , id string ) (* procHandle , context.Context ) {
543
+ // on ctx.Done the in-container process will receive a SIGKILL. The returned
544
+ // context should be used for the go-runc.(Run|Exec) invocations. The returned
545
+ // context will only be canceled in the case where the request context is
546
+ // canceled and we are unable to send the SIGKILL to the in-container process.
547
+ // The goal is to allow for runc to gracefully shutdown when the request context
548
+ // is cancelled.
549
+ func runcProcessHandle (ctx context.Context , killer procKiller ) (* procHandle , context.Context ) {
446
550
runcCtx , cancel := context .WithCancel (context .Background ())
447
551
p := & procHandle {
448
552
ready : make (chan struct {}),
449
553
ended : make (chan struct {}),
450
554
shutdown : cancel ,
555
+ killer : killer ,
451
556
}
452
557
// preserve the logger on the context used for the runc process handling
453
558
runcCtx = bklog .WithLogger (runcCtx , bklog .G (ctx ))
@@ -464,8 +569,7 @@ func runcProcessHandle(ctx context.Context, id string) (*procHandle, context.Con
464
569
select {
465
570
case <- ctx .Done ():
466
571
killCtx , timeout := context .WithTimeout (context .Background (), 7 * time .Second )
467
- if err := p .Process .Kill (); err != nil {
468
- bklog .G (ctx ).Errorf ("failed to kill runc %s: %+v" , id , err )
572
+ if err := p .killer .Kill (killCtx ); err != nil {
469
573
select {
470
574
case <- killCtx .Done ():
471
575
timeout ()
@@ -492,8 +596,8 @@ func runcProcessHandle(ctx context.Context, id string) (*procHandle, context.Con
492
596
// Release will free resources with a procHandle.
493
597
func (p * procHandle ) Release () {
494
598
close (p .ended )
495
- if p .Process != nil {
496
- p .Process .Release ()
599
+ if p .monitorProcess != nil {
600
+ p .monitorProcess .Release ()
497
601
}
498
602
}
499
603
@@ -506,9 +610,9 @@ func (p *procHandle) Shutdown() {
506
610
}
507
611
}
508
612
509
- // WaitForReady will wait until the Process has been populated or the
510
- // provided context was cancelled. This should be called before using
511
- // the Process field .
613
+ // WaitForReady will wait until we have received the runc pid via the go-runc
614
+ // Started channel, or until the request context is canceled. This should
615
+ // return without errors before attempting to send signals to the runc process .
512
616
func (p * procHandle ) WaitForReady (ctx context.Context ) error {
513
617
select {
514
618
case <- ctx .Done ():
@@ -518,34 +622,36 @@ func (p *procHandle) WaitForReady(ctx context.Context) error {
518
622
}
519
623
}
520
624
521
- // WaitForStart will record the pid reported by Runc via the channel.
522
- // We wait for up to 10s for the runc process to start . If the started
625
+ // WaitForStart will record the runc pid reported by go-runc via the channel.
626
+ // We wait for up to 10s for the runc pid to be reported . If the started
523
627
// callback is non-nil it will be called after receiving the pid.
524
628
func (p * procHandle ) WaitForStart (ctx context.Context , startedCh <- chan int , started func ()) error {
525
629
startedCtx , timeout := context .WithTimeout (ctx , 10 * time .Second )
526
630
defer timeout ()
527
- var err error
528
631
select {
529
632
case <- startedCtx .Done ():
530
- return errors .New ("runc started message never received" )
531
- case pid , ok := <- startedCh :
633
+ return errors .New ("go- runc started message never received" )
634
+ case runcPid , ok := <- startedCh :
532
635
if ! ok {
533
- return errors .New ("runc process failed to send pid" )
636
+ return errors .New ("go- runc failed to send pid" )
534
637
}
535
638
if started != nil {
536
639
started ()
537
640
}
538
- p .Process , err = os .FindProcess (pid )
641
+ var err error
642
+ p .monitorProcess , err = os .FindProcess (runcPid )
539
643
if err != nil {
540
- return errors .Wrapf (err , "unable to find runc process for pid %d" , pid )
644
+ // error only possible on non-unix hosts
645
+ return errors .Wrapf (err , "failed to find runc process %d" , runcPid )
541
646
}
542
647
close (p .ready )
543
648
}
544
649
return nil
545
650
}
546
651
547
- // handleSignals will wait until the runcProcess is ready then will
548
- // send each signal received on the channel to the process.
652
+ // handleSignals will wait until the procHandle is ready then will
653
+ // send each signal received on the channel to the runc process (not directly
654
+ // to the in-container process)
549
655
func handleSignals (ctx context.Context , runcProcess * procHandle , signals <- chan syscall.Signal ) error {
550
656
if signals == nil {
551
657
return nil
@@ -559,8 +665,15 @@ func handleSignals(ctx context.Context, runcProcess *procHandle, signals <-chan
559
665
case <- ctx .Done ():
560
666
return nil
561
667
case sig := <- signals :
562
- err := runcProcess .Process .Signal (sig )
563
- if err != nil {
668
+ if sig == syscall .SIGKILL {
669
+ // never send SIGKILL directly to runc, it needs to go to the
670
+ // process in-container
671
+ if err := runcProcess .killer .Kill (ctx ); err != nil {
672
+ return err
673
+ }
674
+ continue
675
+ }
676
+ if err := runcProcess .monitorProcess .Signal (sig ); err != nil {
564
677
bklog .G (ctx ).Errorf ("failed to signal %s to process: %s" , sig , err )
565
678
return err
566
679
}
0 commit comments