Skip to content

Commit 825fb44

Browse files
authored
Reapply "[supervisor] set pod failure reason when supervisor is reaped" (#20327)
* Revert "[supervisor] revert recent changes to reduce frequency of `supervisor run error with unexpected exit code` (#20325)" This reverts commit 3f066ce. * Ignore expected case: pod kill / process kill by supervisor * Debug commit * fix * Revert "Debug commit" This reverts commit 1473e3d.
1 parent 12277cc commit 825fb44

File tree

3 files changed

+55
-11
lines changed

3 files changed

+55
-11
lines changed

components/supervisor/cmd/init.go

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"os/signal"
1616
"strings"
1717
"sync"
18+
"sync/atomic"
1819
"syscall"
1920
"time"
2021

@@ -77,31 +78,71 @@ var initCmd = &cobra.Command{
7778
}
7879

7980
supervisorDone := make(chan struct{})
81+
handledByReaper := make(chan int)
82+
// supervisor is expected to be killed when receiving signals
83+
ignoreUnexpectedExitCode := atomic.Bool{}
84+
handleSupervisorExit := func(exitCode int) {
85+
if exitCode == 0 {
86+
return
87+
}
88+
logs := extractFailureFromRun()
89+
if shared.IsExpectedShutdown(exitCode) {
90+
log.Fatal(logs)
91+
} else {
92+
if ignoreUnexpectedExitCode.Load() {
93+
return
94+
}
95+
log.WithError(fmt.Errorf(logs)).Fatal("supervisor run error with unexpected exit code")
96+
}
97+
}
8098
go func() {
8199
defer close(supervisorDone)
82100

83101
err := runCommand.Wait()
84-
if err != nil && !(strings.Contains(err.Error(), "signal: ") || strings.Contains(err.Error(), "no child processes")) {
102+
if err == nil {
103+
return
104+
}
105+
// exited by reaper
106+
if strings.Contains(err.Error(), "no child processes") {
107+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
108+
defer cancel()
109+
select {
110+
case <-ctx.Done(): // timeout
111+
case exitCode := <-handledByReaper:
112+
handleSupervisorExit(exitCode)
113+
}
114+
} else if !(strings.Contains(err.Error(), "signal: ")) {
85115
if eerr, ok := err.(*exec.ExitError); ok && eerr.ExitCode() != 0 {
86-
logs := extractFailureFromRun()
87-
if shared.IsExpectedShutdown(eerr.ExitCode()) {
88-
log.Fatal(logs)
89-
} else {
90-
log.WithError(fmt.Errorf(logs)).Fatal("supervisor run error with unexpected exit code")
91-
}
116+
handleSupervisorExit(eerr.ExitCode())
92117
}
93118
log.WithError(err).Error("supervisor run error")
94119
return
95120
}
96121
}()
97122
// start the reaper to clean up zombie processes
98-
reaper.Reap()
123+
reaperChan := make(chan reaper.Status, 10)
124+
reaper.Start(reaper.Config{
125+
Pid: -1,
126+
Options: 0,
127+
DisablePid1Check: false,
128+
StatusChannel: reaperChan,
129+
})
130+
go func() {
131+
for status := range reaperChan {
132+
if status.Pid != runCommand.Process.Pid {
133+
continue
134+
}
135+
exitCode := status.WaitStatus.ExitStatus()
136+
handledByReaper <- exitCode
137+
}
138+
}()
99139

100140
select {
101141
case <-supervisorDone:
102142
// supervisor has ended - we're all done here
103143
return
104144
case <-sigInput:
145+
ignoreUnexpectedExitCode.Store(true)
105146
// we received a terminating signal - pass on to supervisor and wait for it to finish
106147
ctx, cancel := context.WithTimeout(context.Background(), cfg.GetTerminationGracePeriod())
107148
defer cancel()

components/supervisor/go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ require (
1616
github.com/gitpod-io/gitpod/ide-metrics-api v0.0.0-00010101000000-000000000000
1717
github.com/gitpod-io/gitpod/supervisor/api v0.0.0-00010101000000-000000000000
1818
github.com/gitpod-io/gitpod/ws-daemon/api v0.0.0-00010101000000-000000000000
19+
github.com/gitpod-io/go-reaper v0.0.0-20241024192051-78d04cc2e25f
1920
github.com/golang/mock v1.6.0
2021
github.com/google/go-cmp v0.6.0
2122
github.com/google/uuid v1.6.0
@@ -29,7 +30,7 @@ require (
2930
github.com/prometheus/common v0.42.0
3031
github.com/prometheus/procfs v0.10.1
3132
github.com/prometheus/pushgateway v1.5.1
32-
github.com/ramr/go-reaper v0.2.1
33+
github.com/ramr/go-reaper v0.2.2
3334
github.com/sirupsen/logrus v1.9.3
3435
github.com/soheilhy/cmux v0.1.5
3536
github.com/spf13/cobra v1.4.0

components/supervisor/go.sum

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)