@@ -469,17 +469,6 @@ func canSendSignal(childProcessName string, noSignalExes []string) (canSendSigna
469469 return canSendSignal , err
470470}
471471
472- var processHandleStartTimes = make (map [int ]uint64 )
473-
474- func getProcessStartTimeFromHandle (processHandle int ) (startTime uint64 , err error ) {
475- startTime , keyPresent := processHandleStartTimes [processHandle ]
476- if ! keyPresent {
477- startTime , err = _getProcessStartTime (processHandle )
478- processHandleStartTimes [processHandle ] = startTime
479- }
480- return
481- }
482-
483472type ProcessRecord struct {
484473 pid int
485474 processHandle int
@@ -490,7 +479,8 @@ type ProcessRecord struct {
490479
491480var pidRecords = make (map [int ]ProcessRecord )
492481
493- func getProcessRecordFromPid (pid int ) (processRecord ProcessRecord , err error ) {
482+ func getProcessRecordFromPid (pidRecordsLock * sync.Mutex , pid int ) (processRecord ProcessRecord , err error ) {
483+ pidRecordsLock .Lock ()
494484 processRecord , keyPresent := pidRecords [pid ]
495485 if ! keyPresent {
496486 processHandle , processExists , accessGranted , err := getProcess (pid )
@@ -504,67 +494,30 @@ func getProcessRecordFromPid(pid int) (processRecord ProcessRecord, err error) {
504494 pidRecords [pid ] = processRecord
505495 }
506496 }
497+ pidRecordsLock .Unlock ()
507498 return
508499}
509500
510- func getProcessStartTimeFromPid (pid int ) (startTime uint64 , err error ) {
511- startTime = 0
512- processRecord , err := getProcessRecordFromPid (pid )
513- if processRecord .processHandle != INVALID_HANDLE && err == nil {
514- startTime , err = getProcessStartTimeFromHandle (processRecord .processHandle )
515- }
516- return
517- }
518-
519- type ChildCheckRecord struct {
520- process * psutil.Process
521- knownChild bool
522- }
523-
524- func checkChildProcess (childCheckRecord ChildCheckRecord , noStopExes []string , noKillExes []string ) (
501+ func checkChildProcess (pidRecordsLock * sync.Mutex , childCheckProcess * psutil.Process , noStopExes []string , noKillExes []string ) (
525502 processHandle int ,
526503 considerChild bool ,
527504 signalsToSend int ,
528505 err error ) {
529506 considerChild = false // first assume false
530507 signalsToSend = SEND_NO_SIGNAL // first assume no signal
531508 // use err2 for getProcessRecordFromPid(), since child may no longer exist
532- processRecord , err2 := getProcessRecordFromPid (int (childCheckRecord . process .Pid ))
509+ processRecord , err2 := getProcessRecordFromPid (pidRecordsLock , int (childCheckProcess .Pid ))
533510 processHandle = processRecord .processHandle
534511 if err2 == nil {
535512 if processHandle != INVALID_HANDLE {
536- doEnvCheck := true // first assume true
537- if childCheckRecord .knownChild {
538- // use err3 for the next operations, since
539- // we may fallback to env check below
540- processStartTime , err3 := getProcessStartTimeFromHandle (processHandle )
541- if err3 == nil {
542- parentPid , err3 := childCheckRecord .process .Ppid () // get parent pid
543- if err3 == nil {
544- // use err3 for getProcessStartTimeFromPid(), since parent may no longer exist
545- parentStartTime , err3 := getProcessStartTimeFromPid (int (parentPid ))
546- if err3 == nil {
547- if processStartTime >= parentStartTime {
548- // this our child and
549- // not from a previous parent with reused pid,
550- // so we can skip the env check
551- doEnvCheck = false
552- considerChild = true
553- }
554- }
555- }
556- }
557- }
558- if doEnvCheck {
559- // handle the orphaned child case
560- // by looking at the env
561- considerChild , err = doesChildHaveMarker (processHandle )
562- }
513+ // Don't look at parent-child relationships, since children, grandchildren, etc.
514+ // could become orphaned at any time. Just look for the child marker to know.
515+ considerChild , err = doesChildHaveMarker (childCheckProcess , processHandle )
563516 if err == nil {
564517 if considerChild {
565518 var childProcessName string = ""
566519 if len (noStopExes ) > 0 || len (noKillExes ) > 0 {
567- childProcessName , err = childCheckRecord . process .Name ()
520+ childProcessName , err = childCheckProcess .Name ()
568521 if err == nil {
569522 if len (childProcessName ) == 0 {
570523 err = errors .New ("childProcessName is empty" )
@@ -608,16 +561,18 @@ func checkChildProcess(childCheckRecord ChildCheckRecord, noStopExes []string, n
608561
609562// get process tree in bottom up order
610563func getProcessTreeRecursive (
611- childCheckRecord ChildCheckRecord ,
564+ pidRecordsLock * sync.Mutex ,
565+ childCheckProcess * psutil.Process ,
612566 noStopExes []string ,
613567 noKillExes []string ,
614568 pidsVisited * IntSet ,
615569) (processRecords []ProcessRecord ) {
616- if considerPid (int (childCheckRecord . process .Pid )) {
570+ if considerPid (int (childCheckProcess .Pid )) {
617571 // avoid cycles in pid tree by looking at visited set
618- if pidsVisited .Add (int (childCheckRecord . process .Pid )) {
572+ if pidsVisited .Add (int (childCheckProcess .Pid )) {
619573 processHandle , considerChild , signalsToSend , err := checkChildProcess (
620- childCheckRecord ,
574+ pidRecordsLock ,
575+ childCheckProcess ,
621576 noStopExes ,
622577 noKillExes )
623578 if err != nil {
@@ -627,7 +582,7 @@ func getProcessTreeRecursive(
627582 }
628583 if processHandle != INVALID_HANDLE {
629584 if considerChild {
630- grandChildren , err := childCheckRecord . process .Children ()
585+ grandChildren , err := childCheckProcess .Children ()
631586 if err != nil {
632587 if err == psutil .ErrorNoChildren {
633588 // ignore this error
@@ -640,10 +595,9 @@ func getProcessTreeRecursive(
640595 } else {
641596 if grandChildren != nil {
642597 for _ , grandChildProcess := range grandChildren {
643- var grandChildCheckRecord = ChildCheckRecord {
644- process : grandChildProcess , knownChild : childCheckRecord .knownChild }
645598 subProcessRecords := getProcessTreeRecursive (
646- grandChildCheckRecord ,
599+ pidRecordsLock ,
600+ grandChildProcess ,
647601 noStopExes ,
648602 noKillExes ,
649603 pidsVisited )
@@ -654,66 +608,23 @@ func getProcessTreeRecursive(
654608 }
655609 }
656610 var processRecord = ProcessRecord {
657- processHandle : processHandle , pid : int (childCheckRecord . process .Pid ), signalsToSend : signalsToSend }
611+ processHandle : processHandle , pid : int (childCheckProcess .Pid ), signalsToSend : signalsToSend }
658612 processRecords = append (processRecords , processRecord )
659613 } else {
660- releaseProcessByPid (int (childCheckRecord .process .Pid ))
614+ pidRecordsLock .Lock ()
615+ releaseProcessByPid (int (childCheckProcess .Pid ))
616+ pidRecordsLock .Unlock ()
661617 }
662618 }
663619 }
664620 }
665621 return processRecords
666622}
667623
668- func getChildProcesses (noStopExes []string , noKillExes []string ) (processRecords []ProcessRecord ) {
669- var childCheckRecords []ChildCheckRecord
670-
671- // to handle non-orphaned children, get our immediate children
672- thisProcess , err := psutil .NewProcess (int32 (os .Getpid ()))
673- if err == nil {
674- immediateChildren , err := thisProcess .Children ()
675- if err == nil {
676- if immediateChildren != nil {
677- for _ , process := range immediateChildren {
678- var childCheckRecord = ChildCheckRecord {
679- process : process , knownChild : true } // process is known to be our child
680- childCheckRecords = append (childCheckRecords , childCheckRecord )
681- }
682- }
683- } else {
684- if err == psutil .ErrorNoChildren {
685- // ignore this error
686- err = nil
687- } else {
688- if Verbose {
689- Log .Error (err )
690- }
691- }
692- }
693- } else {
694- if Verbose {
695- Log .Error (err )
696- }
697- }
698-
699- if err == nil {
700- // to handle orphaned children, get all processes
701- // we'll check for duplicates later
702- allProcesses , err := psutil .Processes ()
703- if err == nil {
704- if allProcesses != nil {
705- for _ , process := range allProcesses {
706- var childCheckRecord = ChildCheckRecord {
707- process : process , knownChild : false } // process may or may not be our child
708- childCheckRecords = append (childCheckRecords , childCheckRecord )
709- }
710- }
711- } else {
712- if Verbose {
713- Log .Error (err )
714- }
715- }
716- }
624+ func getChildProcesses (pidRecordsLock * sync.Mutex , noStopExes []string , noKillExes []string ) (processRecords []ProcessRecord ) {
625+ // handle normal and orphaned children by getting all processes
626+ // we'll check for duplicates later
627+ allProcesses , err := psutil .Processes ()
717628
718629 if err == nil {
719630 // pidsVisited := IntSet{set: make(map[int]bool)}
@@ -732,13 +643,14 @@ func getChildProcesses(noStopExes []string, noKillExes []string) (processRecords
732643 tokens := make (chan int , threads )
733644 var wg sync.WaitGroup
734645
735- for _ , childCheckRecord := range childCheckRecords {
646+ for _ , childCheckProcess := range allProcesses {
736647
737648 wg .Add (1 )
738649 tokens <- 1
739- go func (childCheckRecord ChildCheckRecord ) {
650+ go func (childCheckProcess * psutil. Process ) {
740651 subProcessRecords := getProcessTreeRecursive (
741- childCheckRecord ,
652+ pidRecordsLock ,
653+ childCheckProcess ,
742654 noStopExes ,
743655 noKillExes ,
744656 & pidsVisited )
@@ -749,7 +661,7 @@ func getChildProcesses(noStopExes []string, noKillExes []string) (processRecords
749661
750662 wg .Done ()
751663 <- tokens
752- }(childCheckRecord )
664+ }(childCheckProcess )
753665 }
754666
755667 wg .Wait ()
@@ -877,12 +789,12 @@ func pollKillProcess(processRecord ProcessRecord) (err error) {
877789}
878790
879791// ensure our child processes are stopped
880- func stopChildProcesses (noStopExes []string , noKillExes []string , cleanupTime time.Duration ) (err error ) {
792+ func stopChildProcesses (pidRecordsLock * sync. Mutex , noStopExes []string , noKillExes []string , cleanupTime time.Duration ) (err error ) {
881793 err = nil // first assume no error
882794 anyRemaining := true // first assume some children
883795 totalNumSignaled := 0
884796 if canStopChildProcesses () {
885- processRecords := getChildProcesses (noStopExes , noKillExes )
797+ processRecords := getChildProcesses (pidRecordsLock , noStopExes , noKillExes )
886798 // progress from most graceful to most invasive stop signal
887799 // if no matching children, then call is a noop
888800 numSignaled := signalChildProcesses (processRecords , CTRL_C_SIGNAL )
@@ -908,7 +820,7 @@ func stopChildProcesses(noStopExes []string, noKillExes []string, cleanupTime ti
908820 anyRemaining = pollRemainingChildren (processRecords , 0 ) // wait zero time, since already waited above
909821 // release process handles only after descending into all processes,
910822 // to ensure pids do not get reused while descending
911- releaseProcesses ()
823+ releaseProcesses (pidRecordsLock )
912824 }
913825 if anyRemaining && totalNumSignaled == 0 {
914826 msg := "No child processes stopped or killed\n "
@@ -920,17 +832,20 @@ func stopChildProcesses(noStopExes []string, noKillExes []string, cleanupTime ti
920832}
921833
922834func releaseProcessByPid (pid int ) {
835+ // no pidRecordsLock here, rely on caller to do it
923836 processRecord , keyPresent := pidRecords [pid ]
924837 if ! keyPresent {
925838 delete (pidRecords , processRecord .pid )
926839 releaseProcessByHandle (processRecord .processHandle )
927840 }
928841}
929842
930- func releaseProcesses () {
843+ func releaseProcesses (pidRecordsLock * sync.Mutex ) {
844+ pidRecordsLock .Lock ()
931845 for _ , processRecord := range pidRecords {
932846 releaseProcessByPid (processRecord .pid )
933847 }
848+ pidRecordsLock .Unlock ()
934849}
935850
936851func getChildMarkerKey () string {
@@ -1037,7 +952,7 @@ func (c *Command) run(opts *Options, tryNumber int) error {
1037952 // ensure we only initiate the stop attempt once,
1038953 // from all our command threads
1039954 stopOnce .Do (func () {
1040- err = stopChildProcesses (opts .NoStopExes , opts .NoKillExes , opts .CleanupTime )
955+ err = stopChildProcesses (& opts . PidRecordsLock , opts .NoStopExes , opts .NoKillExes , opts .CleanupTime )
1041956 if err != nil {
1042957 if Verbose {
1043958 Log .Error (err )
@@ -1203,6 +1118,7 @@ type Options struct {
12031118 PrintRetryOutput bool // print output from retries
12041119 Timeout time.Duration // timeout
12051120 StopOnErr bool // stop on any error
1121+ PidRecordsLock sync.Mutex // make stop on error thread-safe
12061122 NoStopExes []string // exe names to exclude from stop signal
12071123 NoKillExes []string // exe names to exclude from kill signal
12081124 CleanupTime time.Duration // time to allow children to clean up
@@ -1452,7 +1368,7 @@ func Run(opts *Options, cancel chan struct{}, chCmdStr chan string) (chan *Comma
14521368 if opts .StopOnErr {
14531369 Log .Error ("stop on first error" )
14541370 }
1455- err = stopChildProcesses (opts .NoStopExes , opts .NoKillExes , opts .CleanupTime )
1371+ err = stopChildProcesses (& opts . PidRecordsLock , opts .NoStopExes , opts .NoKillExes , opts .CleanupTime )
14561372 if err != nil {
14571373 if Verbose {
14581374 Log .Error (err )
0 commit comments