@@ -10,7 +10,6 @@ import (
1010 "io/fs"
1111 "os"
1212 "os/exec"
13- "path/filepath"
1413 "runtime"
1514 "sync"
1615 "syscall"
@@ -348,12 +347,7 @@ func (m *Manager) createDefaultRunner(ctx context.Context) (*Runner, error) {
348347 tmpDir : tmpDir ,
349348 uploader : uploader ,
350349 }
351- // Only enable forced shutdown for procedure mode
352- var forceShutdown * config.ForceShutdownSignal
353- if m .cfg .UseProcedureMode {
354- forceShutdown = m .cfg .ForceShutdown
355- }
356- runner , err := NewRunner (runtimeContext , runtimeCancel , runnerCtx , cmd , cogYaml .Concurrency .Max , m .cfg .CleanupTimeout , forceShutdown , m .baseLogger )
350+ runner , err := NewRunner (runtimeContext , runtimeCancel , runnerCtx , cmd , cogYaml .Concurrency .Max , m .cfg , m .baseLogger )
357351 if err != nil {
358352 return nil , err
359353 }
@@ -419,6 +413,36 @@ func (m *Manager) allocatePrediction(runner *Runner, req PredictionRequest) { //
419413 delete (runner .pending , req .ID )
420414 runner .mu .Unlock ()
421415
416+ // In one-shot mode, stop runner after prediction completes to trigger cleanup
417+ if m .cfg .OneShot && finalResponse .Status .IsCompleted () {
418+ go func () {
419+ logger := m .logger .Sugar ()
420+ logger .Infow ("one-shot mode: stopping runner after prediction completion" , "prediction_id" , req .ID , "runner_id" , runner .runnerCtx .id )
421+
422+ // Try graceful stop with timeout
423+ stopDone := make (chan error , 1 )
424+ go func () {
425+ stopDone <- runner .Stop ()
426+ }()
427+
428+ timeout := m .cfg .CleanupTimeout
429+ if timeout == 0 {
430+ timeout = 10 * time .Second // Default timeout
431+ }
432+
433+ select {
434+ case err := <- stopDone :
435+ if err != nil {
436+ logger .Errorw ("failed to stop runner in one-shot mode" , "error" , err , "runner_id" , runner .runnerCtx .id )
437+ }
438+ runner .ForceKill ()
439+ case <- time .After (timeout ):
440+ logger .Warnw ("stop timeout exceeded in one-shot mode, falling back to force kill" , "timeout" , timeout , "runner_id" , runner .runnerCtx .id )
441+ runner .ForceKill ()
442+ }
443+ }()
444+ }
445+
422446 if cancel != nil {
423447 cancel ()
424448 }
@@ -626,20 +650,28 @@ func (m *Manager) createProcedureRunner(runnerName, procedureHash string) (*Runn
626650 env = append (env , "TMPDIR=" + tmpDir )
627651 cmd .Env = env
628652
629- // Apply setUID isolation for procedure runners if needed
653+ var allocatedUID * int
630654 if m .shouldUseSetUID () {
631655 uid , err := AllocateUID ()
632656 if err != nil {
633657 runtimeCancel ()
634658 return nil , fmt .Errorf ("failed to allocate UID: %w" , err )
635659 }
660+ allocatedUID = & uid
661+
662+ // Use os.Root for secure ownership changes
663+ workingRoot , err := os .OpenRoot (workingDir )
664+ if err != nil {
665+ runtimeCancel ()
666+ return nil , fmt .Errorf ("failed to open working directory root: %w" , err )
667+ }
668+ defer func () { _ = workingRoot .Close () }()
636669
637- // Change ownership of source directory (workingDir)
638- err = filepath .WalkDir (workingDir , func (path string , d fs.DirEntry , err error ) error {
670+ err = fs .WalkDir (workingRoot .FS (), "." , func (path string , d fs.DirEntry , err error ) error {
639671 if err != nil {
640672 return err
641673 }
642- if lchownErr := os .Lchown (path , uid , NoGroupGID ); lchownErr != nil {
674+ if lchownErr := workingRoot .Lchown (path , uid , NoGroupGID ); lchownErr != nil {
643675 log .Errorw ("failed to change ownership" , "path" , path , "uid" , uid , "error" , lchownErr )
644676 return lchownErr
645677 }
@@ -650,19 +682,24 @@ func (m *Manager) createProcedureRunner(runnerName, procedureHash string) (*Runn
650682 return nil , fmt .Errorf ("failed to change ownership of source directory: %w" , err )
651683 }
652684
653- // Make working dir writable by unprivileged Python process
654- if err := os .Lchown (workingDir , uid , NoGroupGID ); err != nil {
685+ if err := workingRoot .Lchown ("." , uid , NoGroupGID ); err != nil {
655686 log .Errorw ("failed to change ownership of working directory" , "path" , workingDir , "uid" , uid , "error" , err )
656687 runtimeCancel ()
657688 return nil , fmt .Errorf ("failed to change ownership of working directory: %w" , err )
658689 }
659- // Change ownership of temp directory
660- if err := os .Lchown (tmpDir , uid , NoGroupGID ); err != nil {
690+
691+ tmpRoot , err := os .OpenRoot (tmpDir )
692+ if err != nil {
693+ runtimeCancel ()
694+ return nil , fmt .Errorf ("failed to open temp directory root: %w" , err )
695+ }
696+ defer func () { _ = tmpRoot .Close () }()
697+
698+ if err := tmpRoot .Lchown ("." , uid , NoGroupGID ); err != nil {
661699 log .Errorw ("failed to change ownership of temp directory" , "path" , tmpDir , "uid" , uid , "error" , err )
662700 runtimeCancel ()
663701 return nil , fmt .Errorf ("failed to change ownership of temp directory: %w" , err )
664702 }
665- // Use syscall.Credential to run process as unprivileged user from start
666703 cmd .SysProcAttr .Credential = & syscall.Credential {
667704 Uid : uint32 (uid ), //nolint:gosec // this is guarded in isolation .allocate, cannot exceed const MaxUID
668705 Gid : uint32 (NoGroupGID ),
@@ -675,19 +712,17 @@ func (m *Manager) createProcedureRunner(runnerName, procedureHash string) (*Runn
675712 if m .cfg .UploadURL != "" {
676713 uploader = newUploader (m .cfg .UploadURL )
677714 }
715+
678716 runnerCtx := RunnerContext {
679- id : runnerName ,
680- workingdir : workingDir ,
681- tmpDir : tmpDir ,
682- uploader : uploader ,
717+ id : runnerName ,
718+ workingdir : workingDir ,
719+ tmpDir : tmpDir ,
720+ uploader : uploader ,
721+ uid : allocatedUID ,
722+ cleanupDirectories : m .cfg .CleanupDirectories ,
683723 }
684724
685- // Only enable forced shutdown for procedure mode
686- var forceShutdown * config.ForceShutdownSignal
687- if m .cfg .UseProcedureMode {
688- forceShutdown = m .cfg .ForceShutdown
689- }
690- runner , err := NewRunner (runtimeContext , runtimeCancel , runnerCtx , cmd , 1 , m .cfg .CleanupTimeout , forceShutdown , m .baseLogger )
725+ runner , err := NewRunner (runtimeContext , runtimeCancel , runnerCtx , cmd , 1 , m .cfg , m .baseLogger )
691726 if err != nil {
692727 return nil , fmt .Errorf ("failed to create runner: %w" , err )
693728 }
0 commit comments