Skip to content

Commit c87f7c3

Browse files
authored
Use errgroup for all go routines (#601)
Instead of using log.Fatal in some of them Signed-off-by: Richard Wall <[email protected]>
1 parent 415d434 commit c87f7c3

File tree

1 file changed

+33
-18
lines changed

1 file changed

+33
-18
lines changed

pkg/agent/run.go

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,16 @@ func Run(cmd *cobra.Command, args []string) {
7474
logs.Log.Fatalf("While evaluating configuration: %v", err)
7575
}
7676

77-
go func() {
77+
group, gctx := errgroup.WithContext(ctx)
78+
defer func() {
79+
// TODO: replace Fatalf log calls with Errorf and return the error
80+
cancel()
81+
if err := group.Wait(); err != nil {
82+
logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err)
83+
}
84+
}()
85+
86+
group.Go(func() error {
7887
server := http.NewServeMux()
7988

8089
if Flags.Profiling {
@@ -105,21 +114,25 @@ func Run(cmd *cobra.Command, args []string) {
105114

106115
err := http.ListenAndServe(":8081", server)
107116
if err != nil && !errors.Is(err, http.ErrServerClosed) {
108-
logs.Log.Fatalf("failed to run the health check server: %s", err)
117+
return fmt.Errorf("failed to run the health check server: %s", err)
109118
}
110-
}()
119+
// The agent must stop if the management server stops
120+
cancel()
121+
return nil
122+
})
111123

112124
_, isVenConn := preflightClient.(*client.VenConnClient)
113125
if isVenConn {
114-
go func() {
115-
err := preflightClient.(manager.Runnable).Start(ctx)
126+
group.Go(func() error {
127+
err := preflightClient.(manager.Runnable).Start(gctx)
116128
if err != nil {
117-
logs.Log.Fatalf("failed to start a controller-runtime component: %v", err)
129+
return fmt.Errorf("failed to start a controller-runtime component: %v", err)
118130
}
119131

120132
// The agent must stop if the controller-runtime component stops.
121133
cancel()
122-
}()
134+
return nil
135+
})
123136
}
124137

125138
// To help users notice issues with the agent, we show the error messages in
@@ -130,15 +143,6 @@ func Run(cmd *cobra.Command, args []string) {
130143
}
131144

132145
dataGatherers := map[string]datagatherer.DataGatherer{}
133-
group, gctx := errgroup.WithContext(ctx)
134-
135-
defer func() {
136-
// TODO: replace Fatalf log calls with Errorf and return the error
137-
cancel()
138-
if err := group.Wait(); err != nil {
139-
logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err)
140-
}
141-
}()
142146

143147
// load datagatherer config and boot each one
144148
for _, dgConfig := range config.DataGatherers {
@@ -160,6 +164,8 @@ func Run(cmd *cobra.Command, args []string) {
160164
if err := newDg.Run(gctx.Done()); err != nil {
161165
return fmt.Errorf("failed to start %q data gatherer %q: %v", kind, dgConfig.Name, err)
162166
}
167+
// The agent must stop if any of the data gatherers stops
168+
cancel()
163169
return nil
164170
})
165171

@@ -192,15 +198,24 @@ func Run(cmd *cobra.Command, args []string) {
192198

193199
// begin the datagathering loop, periodically sending data to the
194200
// configured output using data in datagatherer caches or refreshing from
195-
// APIs each cycle depending on datagatherer implementation
201+
// APIs each cycle depending on datagatherer implementation.
202+
// If any of the go routines exit (with nil or error) the main context will
203+
// be cancelled, which will cause this blocking loop to exit
204+
// instead of waiting for the time period.
205+
// TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't
206+
// have to wait for it to finish before exiting the process.
196207
for {
197208
gatherAndOutputData(eventf, config, preflightClient, dataGatherers)
198209

199210
if config.OneShot {
200211
break
201212
}
202213

203-
time.Sleep(config.Period)
214+
select {
215+
case <-gctx.Done():
216+
return
217+
case <-time.After(config.Period):
218+
}
204219
}
205220
}
206221

0 commit comments

Comments
 (0)