Skip to content

Commit d5b413d

Browse files
committed
Debug
Signed-off-by: Richard Wall <[email protected]>
1 parent 6b406fb commit d5b413d

File tree

2 files changed

+50
-23
lines changed

2 files changed

+50
-23
lines changed

hack/e2e/values.venafi-kubernetes-agent.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ authentication:
99

1010
extraArgs:
1111
- --logging-format=json
12+
- --log-level=1

pkg/agent/run.go

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ var Flags AgentCmdFlags
5050
const schemaVersion string = "v2.0.0"
5151

5252
// Run starts the agent process
53-
func Run(cmd *cobra.Command, args []string) error {
53+
func Run(cmd *cobra.Command, args []string) (returnErr error) {
5454
ctx, cancel := context.WithCancel(cmd.Context())
5555
defer cancel()
5656
log := klog.FromContext(ctx).WithName("Run")
@@ -82,12 +82,7 @@ func Run(cmd *cobra.Command, args []string) error {
8282
group, gctx := errgroup.WithContext(ctx)
8383
defer func() {
8484
cancel()
85-
if groupErr := group.Wait(); groupErr != nil {
86-
err = multierror.Append(
87-
err,
88-
fmt.Errorf("failed to wait for controller-runtime component to stop: %v", groupErr),
89-
)
90-
}
85+
returnErr = errors.Join(returnErr, group.Wait())
9186
}()
9287

9388
{
@@ -123,8 +118,12 @@ func Run(cmd *cobra.Command, args []string) error {
123118
w.WriteHeader(http.StatusOK)
124119
})
125120

126-
group.Go(func() error {
127-
err := listenAndServe(
121+
group.Go(func() (err error) {
122+
log.Info("Starting")
123+
defer func() {
124+
log.Info("Stopped", "reason", err)
125+
}()
126+
err = listenAndServe(
128127
klog.NewContext(gctx, log),
129128
&http.Server{
130129
Addr: serverAddress,
@@ -137,20 +136,28 @@ func Run(cmd *cobra.Command, args []string) error {
137136
if err != nil {
138137
return fmt.Errorf("APIServer: %s", err)
139138
}
139+
if err := context.Cause(gctx); err == nil {
140+
return fmt.Errorf("APIServer exited unexpectedly")
141+
}
140142
return nil
141143
})
142144
}
143145

144146
_, isVenConn := preflightClient.(*client.VenConnClient)
145147
if isVenConn {
146-
group.Go(func() error {
147-
err := preflightClient.(manager.Runnable).Start(gctx)
148+
group.Go(func() (err error) {
149+
log := log.WithName("VenConnClient")
150+
log.Info("Starting")
151+
defer func() {
152+
log.Info("Stopped", "reason", err)
153+
}()
154+
err = preflightClient.(manager.Runnable).Start(gctx)
148155
if err != nil {
149156
return fmt.Errorf("failed to start a controller-runtime component: %v", err)
150157
}
151-
152-
// The agent must stop if the controller-runtime component stops.
153-
cancel()
158+
if err := context.Cause(gctx); err == nil {
159+
return fmt.Errorf("VenConnClient exited unexpectedly")
160+
}
154161
return nil
155162
})
156163
}
@@ -177,16 +184,17 @@ func Run(cmd *cobra.Command, args []string) error {
177184
return fmt.Errorf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err)
178185
}
179186

180-
log.Info("Starting datagatherer", "gatherer", dgConfig.Name)
181-
182187
// start the data gatherers and wait for the cache sync
183-
group.Go(func() error {
188+
group.Go(func() (err error) {
189+
log := log.WithName("DataGatherer.Run").WithValues("DataGatherer.name", dgConfig.Name)
190+
log.V(1).Info("Starting")
191+
defer func() {
192+
log.V(1).Info("Stopped", "reason", err)
193+
}()
184194
if err := newDg.Run(gctx.Done()); err != nil {
185195
return fmt.Errorf("failed to start %q data gatherer %q: %v", kind, dgConfig.Name, err)
186196
}
187-
// The agent must stop if any of the data gatherers stops
188-
cancel()
189-
return nil
197+
return context.Cause(gctx)
190198
})
191199

192200
// regardless of success, this dataGatherers has been given a
@@ -225,10 +233,28 @@ func Run(cmd *cobra.Command, args []string) error {
225233
// TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't
226234
// have to wait for it to finish before exiting the process.
227235
for {
228-
if err := gatherAndOutputData(ctx, eventf, config, preflightClient, dataGatherers); err != nil {
229-
return err
236+
timeLimit := time.Second * 5
237+
timeoutCTX, cancelTimeout := context.WithTimeoutCause(gctx, time.Second*5, fmt.Errorf("timeout after %s", timeLimit))
238+
defer cancelTimeout()
239+
240+
cancelCTX, cancelCause := context.WithCancelCause(timeoutCTX)
241+
go func() {
242+
err := gatherAndOutputData(cancelCTX, eventf, config, preflightClient, dataGatherers)
243+
cancelCause(err)
244+
}()
245+
246+
select {
247+
case <-cancelCTX.Done():
248+
err := context.Cause(cancelCTX)
249+
if err != nil && !errors.Is(err, context.Canceled) {
250+
return fmt.Errorf("gatherAndOutputData: %s", err)
251+
}
252+
case <-timeoutCTX.Done():
253+
return fmt.Errorf("gatherAndOutputData: %s", context.Cause(timeoutCTX))
230254
}
231255

256+
cancelTimeout()
257+
232258
if config.OneShot {
233259
break
234260
}
@@ -346,7 +372,7 @@ func gatherData(ctx context.Context, config CombinedConfig, dataGatherers map[st
346372
continue
347373
}
348374

349-
log.Info("Successfully gathered data", "gatherer", k, "count", count)
375+
log.V(1).Info("Successfully gathered data", "gatherer", k, "count", count)
350376

351377
readings = append(readings, &api.DataReading{
352378
ClusterID: config.ClusterID,

0 commit comments

Comments
 (0)