@@ -74,7 +74,16 @@ func Run(cmd *cobra.Command, args []string) {
74
74
logs .Log .Fatalf ("While evaluating configuration: %v" , err )
75
75
}
76
76
77
- go func () {
77
+ group , gctx := errgroup .WithContext (ctx )
78
+ defer func () {
79
+ // TODO: replace Fatalf log calls with Errorf and return the error
80
+ cancel ()
81
+ if err := group .Wait (); err != nil {
82
+ logs .Log .Fatalf ("failed to wait for controller-runtime component to stop: %v" , err )
83
+ }
84
+ }()
85
+
86
+ group .Go (func () error {
78
87
server := http .NewServeMux ()
79
88
80
89
if Flags .Profiling {
@@ -105,21 +114,25 @@ func Run(cmd *cobra.Command, args []string) {
105
114
106
115
err := http .ListenAndServe (":8081" , server )
107
116
if err != nil && ! errors .Is (err , http .ErrServerClosed ) {
108
- logs . Log . Fatalf ("failed to run the health check server: %s" , err )
117
+ return fmt . Errorf ("failed to run the health check server: %s" , err )
109
118
}
110
- }()
119
+ // The agent must stop if the management server stops
120
+ cancel ()
121
+ return nil
122
+ })
111
123
112
124
_ , isVenConn := preflightClient .(* client.VenConnClient )
113
125
if isVenConn {
114
- go func () {
115
- err := preflightClient .(manager.Runnable ).Start (ctx )
126
+ group . Go ( func () error {
127
+ err := preflightClient .(manager.Runnable ).Start (gctx )
116
128
if err != nil {
117
- logs . Log . Fatalf ("failed to start a controller-runtime component: %v" , err )
129
+ return fmt . Errorf ("failed to start a controller-runtime component: %v" , err )
118
130
}
119
131
120
132
// The agent must stop if the controller-runtime component stops.
121
133
cancel ()
122
- }()
134
+ return nil
135
+ })
123
136
}
124
137
125
138
// To help users notice issues with the agent, we show the error messages in
@@ -130,15 +143,6 @@ func Run(cmd *cobra.Command, args []string) {
130
143
}
131
144
132
145
dataGatherers := map [string ]datagatherer.DataGatherer {}
133
- group , gctx := errgroup .WithContext (ctx )
134
-
135
- defer func () {
136
- // TODO: replace Fatalf log calls with Errorf and return the error
137
- cancel ()
138
- if err := group .Wait (); err != nil {
139
- logs .Log .Fatalf ("failed to wait for controller-runtime component to stop: %v" , err )
140
- }
141
- }()
142
146
143
147
// load datagatherer config and boot each one
144
148
for _ , dgConfig := range config .DataGatherers {
@@ -160,6 +164,8 @@ func Run(cmd *cobra.Command, args []string) {
160
164
if err := newDg .Run (gctx .Done ()); err != nil {
161
165
return fmt .Errorf ("failed to start %q data gatherer %q: %v" , kind , dgConfig .Name , err )
162
166
}
167
+ // The agent must stop if any of the data gatherers stops
168
+ cancel ()
163
169
return nil
164
170
})
165
171
@@ -192,15 +198,24 @@ func Run(cmd *cobra.Command, args []string) {
192
198
193
199
// begin the datagathering loop, periodically sending data to the
194
200
// configured output using data in datagatherer caches or refreshing from
195
- // APIs each cycle depending on datagatherer implementation
201
+ // APIs each cycle depending on datagatherer implementation.
202
+ // If any of the go routines exit (with nil or error) the main context will
203
+ // be cancelled, which will cause this blocking loop to exit
204
+ // instead of waiting for the time period.
205
+ // TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't
206
+ // have to wait for it to finish before exiting the process.
196
207
for {
197
208
gatherAndOutputData (eventf , config , preflightClient , dataGatherers )
198
209
199
210
if config .OneShot {
200
211
break
201
212
}
202
213
203
- time .Sleep (config .Period )
214
+ select {
215
+ case <- gctx .Done ():
216
+ return
217
+ case <- time .After (config .Period ):
218
+ }
204
219
}
205
220
}
206
221
0 commit comments