Skip to content

Commit 61d64e4

Browse files
authored
[VC-35738] Bubble up the errors from sub-commands and handle errors centrally instead of using log.Fatal at multiple sites (#599)
* Bubble up the errors from sub-commands and handle errors centrally instead of using log.Fatal at multiple sites * Log the bubbled up error instead of printing it * Turn on SilenceErrors and SilenceUsage to prevent Cobra from messing up the stderr output * Remove obsolete translations for converting log.printf to error messages
1 parent 41553d4 commit 61d64e4

File tree

7 files changed

+73
-89
lines changed

7 files changed

+73
-89
lines changed

cmd/agent.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"github.com/spf13/cobra"
88

99
"github.com/jetstack/preflight/pkg/agent"
10-
"github.com/jetstack/preflight/pkg/logs"
1110
"github.com/jetstack/preflight/pkg/permissions"
1211
)
1312

@@ -16,7 +15,7 @@ var agentCmd = &cobra.Command{
1615
Short: "start the preflight agent",
1716
Long: `The agent will periodically gather data for the configured data
1817
gatherers and send it to a remote backend for evaluation`,
19-
Run: agent.Run,
18+
RunE: agent.Run,
2019
}
2120

2221
var agentInfoCmd = &cobra.Command{
@@ -34,24 +33,25 @@ var agentRBACCmd = &cobra.Command{
3433
Use: "rbac",
3534
Short: "print the agent's minimal RBAC manifest",
3635
Long: `Print RBAC string by reading GVRs`,
37-
Run: func(cmd *cobra.Command, args []string) {
36+
RunE: func(cmd *cobra.Command, args []string) error {
3837

3938
b, err := os.ReadFile(agent.Flags.ConfigFilePath)
4039
if err != nil {
41-
logs.Log.Fatalf("Failed to read config file: %s", err)
40+
return fmt.Errorf("Failed to read config file: %s", err)
4241
}
4342
cfg, err := agent.ParseConfig(b)
4443
if err != nil {
45-
logs.Log.Fatalf("Failed to parse config file: %s", err)
44+
return fmt.Errorf("Failed to parse config file: %s", err)
4645
}
4746

4847
err = agent.ValidateDataGatherers(cfg.DataGatherers)
4948
if err != nil {
50-
logs.Log.Fatalf("Failed to validate data gatherers: %s", err)
49+
return fmt.Errorf("Failed to validate data gatherers: %s", err)
5150
}
5251

5352
out := permissions.GenerateFullManifest(cfg.DataGatherers)
5453
fmt.Print(out)
54+
return nil
5555
},
5656
}
5757

cmd/echo.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ var echoCmd = &cobra.Command{
1111
Short: "starts an echo server to test the agent",
1212
Long: `The agent sends data to a server. This echo server
1313
can be used to act as the server part and echo the data received by the agent.`,
14-
Run: echo.Echo,
14+
RunE: echo.Echo,
1515
}
1616

1717
func init() {

cmd/root.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import (
55
"os"
66
"strings"
77

8-
"github.com/jetstack/preflight/pkg/logs"
98
"github.com/spf13/cobra"
109
"github.com/spf13/pflag"
10+
"k8s.io/klog/v2"
11+
12+
"github.com/jetstack/preflight/pkg/logs"
1113
)
1214

1315
// rootCmd represents the base command when called without any subcommands
@@ -21,6 +23,14 @@ Preflight checks are bundled into Packages`,
2123
PersistentPreRun: func(cmd *cobra.Command, args []string) {
2224
logs.Initialize()
2325
},
26+
// SilenceErrors and SilenceUsage prevents this command or any sub-command
27+
// from printing arbitrary text to stderr.
28+
// Why? To ensure that each line of output can be parsed as a single message
29+
// for consumption by logging agents such as fluentd.
30+
// Usage information is still available on stdout with the `-h` and `--help`
31+
// flags.
32+
SilenceErrors: true,
33+
SilenceUsage: true,
2434
}
2535

2636
func init() {
@@ -31,13 +41,16 @@ func init() {
3141

3242
// Execute adds all child commands to the root command and sets flags appropriately.
3343
// This is called by main.main(). It only needs to happen once to the rootCmd.
44+
// If the root command or sub-command returns an error, the error message will
45+
// will be logged and the process will exit with status 1.
3446
func Execute() {
3547
logs.AddFlags(rootCmd.PersistentFlags())
36-
48+
var exitCode int
3749
if err := rootCmd.Execute(); err != nil {
38-
fmt.Println(err)
39-
os.Exit(1)
50+
exitCode = 1
51+
klog.ErrorS(err, "Exiting due to error", "exit-code", exitCode)
4052
}
53+
klog.FlushAndExit(klog.ExitFlushTimeout, exitCode)
4154
}
4255

4356
func setFlagsFromEnv(prefix string, fs *pflag.FlagSet) {

pkg/agent/run.go

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ var Flags AgentCmdFlags
5050
const schemaVersion string = "v2.0.0"
5151

5252
// Run starts the agent process
53-
func Run(cmd *cobra.Command, args []string) {
53+
func Run(cmd *cobra.Command, args []string) error {
5454
logs.Log.Printf("Preflight agent version: %s (%s)", version.PreflightVersion, version.Commit)
5555
ctx, cancel := context.WithCancel(
5656
klog.NewContext(
@@ -62,31 +62,33 @@ func Run(cmd *cobra.Command, args []string) {
6262

6363
file, err := os.Open(Flags.ConfigFilePath)
6464
if err != nil {
65-
logs.Log.Fatalf("Failed to load config file for agent from: %s", Flags.ConfigFilePath)
65+
return fmt.Errorf("Failed to load config file for agent from: %s", Flags.ConfigFilePath)
6666
}
6767
defer file.Close()
6868

6969
b, err := io.ReadAll(file)
7070
if err != nil {
71-
logs.Log.Fatalf("Failed to read config file: %s", err)
71+
return fmt.Errorf("Failed to read config file: %s", err)
7272
}
7373

7474
cfg, err := ParseConfig(b)
7575
if err != nil {
76-
logs.Log.Fatalf("Failed to parse config file: %s", err)
76+
return fmt.Errorf("Failed to parse config file: %s", err)
7777
}
7878

7979
config, preflightClient, err := ValidateAndCombineConfig(logs.Log, cfg, Flags)
8080
if err != nil {
81-
logs.Log.Fatalf("While evaluating configuration: %v", err)
81+
return fmt.Errorf("While evaluating configuration: %v", err)
8282
}
8383

8484
group, gctx := errgroup.WithContext(ctx)
8585
defer func() {
86-
// TODO: replace Fatalf log calls with Errorf and return the error
8786
cancel()
88-
if err := group.Wait(); err != nil {
89-
logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err)
87+
if groupErr := group.Wait(); groupErr != nil {
88+
err = multierror.Append(
89+
err,
90+
fmt.Errorf("failed to wait for controller-runtime component to stop: %v", groupErr),
91+
)
9092
}
9193
}()
9294

@@ -159,7 +161,7 @@ func Run(cmd *cobra.Command, args []string) {
159161
// the agent pod's events.
160162
eventf, err := newEventf(config.InstallNS)
161163
if err != nil {
162-
logs.Log.Fatalf("failed to create event recorder: %v", err)
164+
return fmt.Errorf("failed to create event recorder: %v", err)
163165
}
164166

165167
dataGatherers := map[string]datagatherer.DataGatherer{}
@@ -169,12 +171,12 @@ func Run(cmd *cobra.Command, args []string) {
169171
kind := dgConfig.Kind
170172
if dgConfig.DataPath != "" {
171173
kind = "local"
172-
logs.Log.Fatalf("running data gatherer %s of type %s as Local, data-path override present: %s", dgConfig.Name, dgConfig.Kind, dgConfig.DataPath)
174+
return fmt.Errorf("running data gatherer %s of type %s as Local, data-path override present: %s", dgConfig.Name, dgConfig.Kind, dgConfig.DataPath)
173175
}
174176

175177
newDg, err := dgConfig.Config.NewDataGatherer(gctx)
176178
if err != nil {
177-
logs.Log.Fatalf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err)
179+
return fmt.Errorf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err)
178180
}
179181

180182
logs.Log.Printf("starting %q datagatherer", dgConfig.Name)
@@ -225,18 +227,21 @@ func Run(cmd *cobra.Command, args []string) {
225227
// TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't
226228
// have to wait for it to finish before exiting the process.
227229
for {
228-
gatherAndOutputData(eventf, config, preflightClient, dataGatherers)
230+
if err := gatherAndOutputData(eventf, config, preflightClient, dataGatherers); err != nil {
231+
return err
232+
}
229233

230234
if config.OneShot {
231235
break
232236
}
233237

234238
select {
235239
case <-gctx.Done():
236-
return
240+
return nil
237241
case <-time.After(config.Period):
238242
}
239243
}
244+
return nil
240245
}
241246

242247
// Creates an event recorder for the agent's Pod object. Expects the env var
@@ -246,7 +251,7 @@ func Run(cmd *cobra.Command, args []string) {
246251
func newEventf(installNS string) (Eventf, error) {
247252
restcfg, err := kubeconfig.LoadRESTConfig("")
248253
if err != nil {
249-
logs.Log.Fatalf("failed to load kubeconfig: %v", err)
254+
return nil, fmt.Errorf("failed to load kubeconfig: %v", err)
250255
}
251256
scheme := runtime.NewScheme()
252257
_ = corev1.AddToScheme(scheme)
@@ -276,31 +281,35 @@ func newEventf(installNS string) (Eventf, error) {
276281
// Like Printf but for sending events to the agent's Pod object.
277282
type Eventf func(eventType, reason, msg string, args ...interface{})
278283

279-
func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) {
284+
func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) error {
280285
var readings []*api.DataReading
281286

282287
if config.InputPath != "" {
283288
logs.Log.Printf("Reading data from local file: %s", config.InputPath)
284289
data, err := os.ReadFile(config.InputPath)
285290
if err != nil {
286-
logs.Log.Fatalf("failed to read local data file: %s", err)
291+
return fmt.Errorf("failed to read local data file: %s", err)
287292
}
288293
err = json.Unmarshal(data, &readings)
289294
if err != nil {
290-
logs.Log.Fatalf("failed to unmarshal local data file: %s", err)
295+
return fmt.Errorf("failed to unmarshal local data file: %s", err)
291296
}
292297
} else {
293-
readings = gatherData(config, dataGatherers)
298+
var err error
299+
readings, err = gatherData(config, dataGatherers)
300+
if err != nil {
301+
return err
302+
}
294303
}
295304

296305
if config.OutputPath != "" {
297306
data, err := json.MarshalIndent(readings, "", " ")
298307
if err != nil {
299-
logs.Log.Fatal("failed to marshal JSON")
308+
return fmt.Errorf("failed to marshal JSON: %s", err)
300309
}
301310
err = os.WriteFile(config.OutputPath, data, 0644)
302311
if err != nil {
303-
logs.Log.Fatalf("failed to output to local file: %s", err)
312+
return fmt.Errorf("failed to output to local file: %s", err)
304313
}
305314
logs.Log.Printf("Data saved to local file: %s", config.OutputPath)
306315
} else {
@@ -316,12 +325,13 @@ func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient c
316325
logs.Log.Printf("retrying in %v after error: %s", t, err)
317326
})
318327
if err != nil {
319-
logs.Log.Fatalf("Exiting due to fatal error uploading: %v", err)
328+
return fmt.Errorf("Exiting due to fatal error uploading: %v", err)
320329
}
321330
}
331+
return nil
322332
}
323333

324-
func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) []*api.DataReading {
334+
func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) ([]*api.DataReading, error) {
325335
var readings []*api.DataReading
326336

327337
var dgError *multierror.Error
@@ -360,10 +370,10 @@ func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.Dat
360370
}
361371

362372
if config.StrictMode && dgError.ErrorOrNil() != nil {
363-
logs.Log.Fatalf("halting datagathering in strict mode due to error: %s", dgError.ErrorOrNil())
373+
return nil, fmt.Errorf("halting datagathering in strict mode due to error: %s", dgError.ErrorOrNil())
364374
}
365375

366-
return readings
376+
return readings, nil
367377
}
368378

369379
func postData(config CombinedConfig, preflightClient client.Client, readings []*api.DataReading) error {
@@ -388,7 +398,7 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []*
388398
if config.OrganizationID == "" {
389399
data, err := json.Marshal(readings)
390400
if err != nil {
391-
logs.Log.Fatalf("Cannot marshal readings: %+v", err)
401+
return fmt.Errorf("Cannot marshal readings: %+v", err)
392402
}
393403

394404
// log and collect metrics about the upload size

pkg/echo/echo.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,16 @@ import (
99
"github.com/spf13/cobra"
1010

1111
"github.com/jetstack/preflight/api"
12-
"github.com/jetstack/preflight/pkg/logs"
1312
)
1413

1514
var EchoListen string
1615

1716
var Compact bool
1817

19-
func Echo(cmd *cobra.Command, args []string) {
18+
func Echo(cmd *cobra.Command, args []string) error {
2019
http.HandleFunc("/", echoHandler)
2120
fmt.Println("Listening to requests at ", EchoListen)
22-
err := http.ListenAndServe(EchoListen, nil)
23-
if err != nil {
24-
logs.Log.Fatal(err)
25-
}
21+
return http.ListenAndServe(EchoListen, nil)
2622
}
2723

2824
func echoHandler(w http.ResponseWriter, r *http.Request) {

pkg/logs/logs.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,7 @@ func (w LogToSlogWriter) Write(p []byte) (n int, err error) {
147147

148148
message := string(p)
149149
if strings.Contains(message, "error") ||
150-
strings.Contains(message, "failed") ||
151-
strings.Contains(message, "fatal") ||
152-
strings.Contains(message, "Failed") ||
153-
strings.Contains(message, "While evaluating configuration") ||
154-
strings.Contains(message, "data-path override present") ||
155-
strings.Contains(message, "Cannot marshal readings") {
150+
strings.Contains(message, "failed") {
156151
w.Slog.With("source", w.Source).Error(message)
157152
} else {
158153
w.Slog.With("source", w.Source).Info(message)

pkg/logs/logs_test.go

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -375,52 +375,22 @@ func Test_replaceWithStaticTimestamps(t *testing.T) {
375375
}
376376

377377
func TestLogToSlogWriter(t *testing.T) {
378-
// This test makes sure that all the agent's Log.Fatalf calls are correctly
379-
// translated to slog.Error calls.
378+
// This test makes sure that all the agent's remaining Log calls are correctly
379+
// translated to slog.Error calls where appropriate.
380380
//
381381
// This list was generated using:
382-
// grep -r "Log\.Fatalf" ./cmd ./pkg
382+
// git grep -i "log\.\(print\|fatal\)" pkg/ cmd/ | fgrep -e error -e failed
383383
given := strings.TrimPrefix(`
384-
Failed to load config file for agent from
385-
Failed to read config file
386-
Failed to parse config file
387-
While evaluating configuration
388-
failed to run pprof profiler
389-
failed to run the health check server
390-
failed to start a controller-runtime component
391-
failed to wait for controller-runtime component to stop
392-
running data gatherer %s of type %s as Local, data-path override present
393-
failed to instantiate %q data gatherer
394-
failed to read local data file
395-
failed to unmarshal local data file
396-
failed to output to local file
397-
Exiting due to fatal error uploading
398-
halting datagathering in strict mode due to error
399-
Cannot marshal readings
400-
Failed to read config file
401-
Failed to parse config file
402-
Failed to validate data gatherers
384+
failed to complete initial sync of %q data gatherer %q: %v
385+
error messages will not show in the pod's events because the POD_NAME environment variable is empty
386+
retrying in %v after error: %s
387+
datagatherer informer for %q has failed and is backing off due to error: %s
403388
this is a happy log that should show as INFO`, "\n")
404389
expect := strings.TrimPrefix(`
405-
level=ERROR msg="Failed to load config file for agent from" source=agent
406-
level=ERROR msg="Failed to read config file" source=agent
407-
level=ERROR msg="Failed to parse config file" source=agent
408-
level=ERROR msg="While evaluating configuration" source=agent
409-
level=ERROR msg="failed to run pprof profiler" source=agent
410-
level=ERROR msg="failed to run the health check server" source=agent
411-
level=ERROR msg="failed to start a controller-runtime component" source=agent
412-
level=ERROR msg="failed to wait for controller-runtime component to stop" source=agent
413-
level=ERROR msg="running data gatherer %!s(MISSING) of type %!s(MISSING) as Local, data-path override present" source=agent
414-
level=ERROR msg="failed to instantiate %!q(MISSING) data gatherer" source=agent
415-
level=ERROR msg="failed to read local data file" source=agent
416-
level=ERROR msg="failed to unmarshal local data file" source=agent
417-
level=ERROR msg="failed to output to local file" source=agent
418-
level=ERROR msg="Exiting due to fatal error uploading" source=agent
419-
level=ERROR msg="halting datagathering in strict mode due to error" source=agent
420-
level=ERROR msg="Cannot marshal readings" source=agent
421-
level=ERROR msg="Failed to read config file" source=agent
422-
level=ERROR msg="Failed to parse config file" source=agent
423-
level=ERROR msg="Failed to validate data gatherers" source=agent
390+
level=ERROR msg="failed to complete initial sync of %!q(MISSING) data gatherer %!q(MISSING): %!v(MISSING)" source=agent
391+
level=ERROR msg="error messages will not show in the pod's events because the POD_NAME environment variable is empty" source=agent
392+
level=ERROR msg="retrying in %!v(MISSING) after error: %!s(MISSING)" source=agent
393+
level=ERROR msg="datagatherer informer for %!q(MISSING) has failed and is backing off due to error: %!s(MISSING)" source=agent
424394
level=INFO msg="this is a happy log that should show as INFO" source=agent
425395
`, "\n")
426396

0 commit comments

Comments
 (0)