Skip to content

Commit 7ace0d3

Browse files
Merge pull request #477 from wking/abandon-child-goroutines-after-metrics-shutdown-context
Bug 1891143: pkg/cvo/metrics: Abandon child goroutines after shutdownContext expires
2 parents 964a46f + 4f16ee5 commit 7ace0d3

File tree

1 file changed

+34
-19
lines changed

1 file changed

+34
-19
lines changed

pkg/cvo/metrics.go

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ version for 'cluster', or empty for 'initial'.
100100
}
101101
}
102102

103+
type asyncResult struct {
104+
name string
105+
error error
106+
}
107+
103108
// RunMetrics launches an server bound to listenAddress serving
104109
// Prometheus metrics at /metrics over HTTP, and, if tlsConfig is
105110
// non-nil, also over HTTPS. Continues serving until runContext.Done()
@@ -123,50 +128,60 @@ func RunMetrics(runContext context.Context, shutdownContext context.Context, lis
123128
// HTTP, needed during upgrade, while still allowing TLS certs and end to end metrics protection.
124129
mux := cmux.New(tcpListener)
125130

126-
errorChannel := make(chan error, 1)
127-
errorChannelCount := 1
131+
resultChannel := make(chan asyncResult, 1)
132+
resultChannelCount := 1
128133

129134
go func() {
130135
// match HTTP first
131136
httpListener := mux.Match(cmux.HTTP1())
132137
klog.Infof("Metrics port listening for HTTP on %v", listenAddress)
133-
errorChannel <- server.Serve(httpListener)
138+
err := server.Serve(httpListener)
139+
resultChannel <- asyncResult{name: "HTTP server", error: err}
134140
}()
135141

136142
if tlsConfig != nil {
137-
errorChannelCount++
143+
resultChannelCount++
138144
go func() {
139145
tlsListener := tls.NewListener(mux.Match(cmux.Any()), tlsConfig)
140146
klog.Infof("Metrics port listening for HTTPS on %v", listenAddress)
141-
errorChannel <- server.Serve(tlsListener)
147+
err := server.Serve(tlsListener)
148+
resultChannel <- asyncResult{name: "HTTPS server", error: err}
142149
}()
143150
}
144151

145-
errorChannelCount++
152+
resultChannelCount++
146153
go func() {
147-
errorChannel <- mux.Serve()
154+
err := mux.Serve()
155+
resultChannel <- asyncResult{name: "TCP muxer", error: err}
148156
}()
149157

150158
shutdown := false
151159
var loopError error
152-
for errorChannelCount > 0 {
160+
for resultChannelCount > 0 {
153161
if shutdown {
154-
err := <-errorChannel
155-
errorChannelCount--
156-
if err != nil && err != http.ErrServerClosed && err != cmux.ErrListenerClosed {
157-
if loopError == nil {
158-
loopError = err
159-
} else if err != nil { // log the error we are discarding
160-
klog.Errorf("Failed to gracefully shut down metrics server: %s", err)
162+
select {
163+
case result := <-resultChannel:
164+
resultChannelCount--
165+
if result.error == nil {
166+
klog.Infof("Collected metrics %s goroutine.", result.name)
167+
} else {
168+
klog.Errorf("Collected metrics %s goroutine: %v", result.name, result.error)
169+
loopError = result.error
161170
}
171+
case <-shutdownContext.Done(): // out of time
172+
klog.Errorf("Abandoning %d uncollected metrics goroutines", resultChannelCount)
173+
return shutdownContext.Err()
162174
}
163175
} else {
164176
select {
165177
case <-runContext.Done(): // clean shutdown
166-
case err := <-errorChannel: // crashed before a shutdown was requested
167-
errorChannelCount--
168-
if err != nil && err != http.ErrServerClosed && err != cmux.ErrListenerClosed {
169-
loopError = err
178+
case result := <-resultChannel: // crashed before a shutdown was requested
179+
resultChannelCount--
180+
if result.error == nil {
181+
klog.Infof("Collected metrics %s goroutine.", result.name)
182+
} else {
183+
klog.Errorf("Collected metrics %s goroutine: %v", result.name, result.error)
184+
loopError = result.error
170185
}
171186
}
172187
shutdown = true

0 commit comments

Comments
 (0)