@@ -8,6 +8,7 @@ package cli
88import (
99 "context"
1010 "fmt"
11+ "io"
1112 "os"
1213 "os/signal"
1314 "strings"
@@ -57,13 +58,19 @@ var (
5758 chaosCertsDir string
5859 chaosReplicationFactor int
5960 chaosStage string
61+ verbose bool
62+
63+ // chaosLogger is the logger used by failure-injection library.
64+ // It is initialized in the chaos command's PersistentPreRunE based on the verbose flag.
65+ chaosLogger * logger.Logger
6066)
6167
6268// GlobalChaosOpts captures global chaos flags
6369type GlobalChaosOpts struct {
6470 WaitBeforeCleanup time.Duration
6571 RunForever bool
6672 Stage FailureStage
73+ Verbose bool
6774}
6875
6976// buildChaosCmd creates the root chaos command
@@ -79,6 +86,10 @@ own lifecycle: Setup → Inject → Wait → Recover → Cleanup.
7986
8087Global flags control the duration and cleanup behavior of all chaos commands.
8188` ,
89+ PersistentPreRunE : func (cmd * cobra.Command , args []string ) error {
90+ // Initialize the chaos logger based on verbose flag
91+ return initChaosLogger ()
92+ },
8293 }
8394
8495 // Add global flags
@@ -103,9 +114,13 @@ Global flags control the duration and cleanup behavior of all chaos commands.
103114 - recover: runs only the recover phase (removes the failure)
104115 - cleanup: runs only the cleanup phase (removes failure dependencies)
105116Default: all` )
117+ chaosCmd .PersistentFlags ().BoolVar (& verbose ,
118+ "verbose" , false ,
119+ "if set, prints verbose logs from failure-injection library" )
106120
107121 // Add subcommands
108- chaosCmd .AddCommand (cr .buildChaosNetworkCmd ())
122+ chaosCmd .AddCommand (cr .buildChaosNetworkPartitionCmd ())
123+ chaosCmd .AddCommand (cr .buildChaosNetworkLatencyCmd ())
109124
110125 return chaosCmd
111126}
@@ -148,11 +163,28 @@ func getClusterOptions() []failures.ClusterOptionFunc {
148163 return opts
149164}
150165
166+ // initChaosLogger initializes the global chaos logger based on the verbose flag.
167+ // This should be called once before any chaos command executes.
168+ func initChaosLogger () error {
169+ cfg := logger.Config {
170+ Stdout : io .Discard ,
171+ Stderr : os .Stderr ,
172+ }
173+ if verbose {
174+ cfg .Stdout = os .Stdout
175+ }
176+
177+ l , err := cfg .NewLogger ("" )
178+ if err != nil {
179+ return errors .Wrap (err , "failed to create chaos logger" )
180+ }
181+
182+ chaosLogger = l
183+ return nil
184+ }
185+
151186// parseInt32SliceToNodes converts a uint32 slice to install.Nodes
152187func parseInt32SliceToNodes (nodes []int32 ) install.Nodes {
153- if len (nodes ) == 0 {
154- return nil
155- }
156188 result := make (install.Nodes , len (nodes ))
157189 for i , n := range nodes {
158190 result [i ] = install .Node (n )
@@ -165,14 +197,18 @@ func parseInt32SliceToNodes(nodes []int32) install.Nodes {
165197// For individual stages, state validation is disabled (disableStateValidation=true) to allow
166198// running stages independently without enforcing the complete lifecycle order.
167199func createFailer (
168- clusterName string , failureName string , stage FailureStage , opts ... failures.ClusterOptionFunc ,
200+ clusterName string ,
201+ failureName string ,
202+ chaosOpts GlobalChaosOpts ,
203+ opts ... failures.ClusterOptionFunc ,
169204) (* failures.Failer , error ) {
170205 registry := failures .GetFailureRegistry ()
171- disableStateValidation := stage != StageAll
206+ disableStateValidation := chaosOpts .Stage != StageAll
207+
172208 return registry .GetFailer (
173209 clusterName ,
174210 failureName ,
175- config . Logger ,
211+ chaosLogger ,
176212 disableStateValidation ,
177213 opts ... ,
178214 )
@@ -182,148 +218,140 @@ func createFailer(
182218// If stage is StageAll, runs the complete lifecycle: Setup → Inject → Wait → Recover → Cleanup.
183219// Otherwise, runs only the specified individual stage.
184220func runFailureLifecycle (
185- ctx context.Context ,
186- l * logger.Logger ,
187- failer * failures.Failer ,
188- args failures.FailureArgs ,
189- opts GlobalChaosOpts ,
221+ ctx context.Context , failer * failures.Failer , args failures.FailureArgs , opts GlobalChaosOpts ,
190222) error {
191223 switch opts .Stage {
192224 case StageSetup :
193- return runSetupStage (ctx , l , failer , args )
225+ return runSetupStage (ctx , failer , args )
194226 case StageInject :
195- return runInjectStage (ctx , l , failer , args )
227+ return runInjectStage (ctx , failer , args )
196228 case StageRecover :
197- return runRecoverStage (ctx , l , failer , args )
229+ return runRecoverStage (ctx , failer , args )
198230 case StageCleanup :
199- return runCleanupStage (ctx , l , failer , args )
231+ return runCleanupStage (ctx , failer , args )
200232 case StageAll :
201- return runFullLifecycle (ctx , l , failer , args , opts )
233+ return runFullLifecycle (ctx , failer , args , opts )
202234 default :
203235 return errors .Newf ("unknown stage: %s" , opts .Stage )
204236 }
205237}
206238
207239// runSetupStage runs only the setup phase
208- func runSetupStage (
209- ctx context.Context , l * logger.Logger , failer * failures.Failer , args failures.FailureArgs ,
210- ) error {
211- l .Printf ("Running setup stage..." )
212- if err := failer .Setup (ctx , l , args ); err != nil {
240+ func runSetupStage (ctx context.Context , failer * failures.Failer , args failures.FailureArgs ) error {
241+ config .Logger .Printf ("Running setup stage..." )
242+ if err := failer .Setup (ctx , chaosLogger , args ); err != nil {
213243 return errors .Wrap (err , "failed to setup failure" )
214244 }
215245
216- l .Printf ("Setup stage completed successfully" )
246+ config . Logger .Printf ("Setup stage completed successfully" )
217247 return nil
218248}
219249
220250// runInjectStage runs only the inject phase
221- func runInjectStage (
222- ctx context.Context , l * logger.Logger , failer * failures.Failer , args failures.FailureArgs ,
223- ) error {
224- l .Printf ("Running inject stage..." )
225- if err := failer .Inject (ctx , l , args ); err != nil {
251+ func runInjectStage (ctx context.Context , failer * failures.Failer , args failures.FailureArgs ) error {
252+ config .Logger .Printf ("Running inject stage..." )
253+ if err := failer .Inject (ctx , chaosLogger , args ); err != nil {
226254 return errors .Wrap (err , "failed to inject failure" )
227255 }
228- l .Printf ("Inject stage completed successfully" )
256+ config .Logger .Printf ("waiting for failure to propagate" )
257+ if err := failer .WaitForFailureToPropagate (ctx , chaosLogger ); err != nil {
258+ return errors .Wrap (err , "failed to propagate failure" )
259+ }
260+ config .Logger .Printf ("Inject stage completed successfully" )
229261 return nil
230262}
231263
232264// runRecoverStage runs only the recover phase.
233265// When running recover individually with state validation disabled, we use SetInjectArgs
234266// to provide the necessary context for recovery without actually running the inject phase.
235267func runRecoverStage (
236- ctx context.Context , l * logger. Logger , failer * failures.Failer , args failures.FailureArgs ,
268+ ctx context.Context , failer * failures.Failer , args failures.FailureArgs ,
237269) error {
238- l .Printf ("Running recover stage..." )
270+ config . Logger .Printf ("Running recover stage..." )
239271 // Set the inject args directly so Recover() has the necessary context
240272 failer .SetInjectArgs (args )
241- if err := failer .Recover (ctx , l ); err != nil {
273+ if err := failer .Recover (ctx , chaosLogger ); err != nil {
242274 return errors .Wrap (err , "failed to recover from failure" )
243275 }
244276
245- if err := failer .WaitForFailureToRecover (ctx , l ); err != nil {
277+ if err := failer .WaitForFailureToRecover (ctx , chaosLogger ); err != nil {
246278 return errors .Wrap (err , "failed to wait for failure to recover" )
247279 }
248280
249- l .Printf ("Recover stage completed successfully" )
281+ config . Logger .Printf ("Recover stage completed successfully" )
250282 return nil
251283}
252284
253285// runCleanupStage runs only the cleanup phase.
254286// When running cleanup individually with state validation disabled, we use SetSetupArgs
255287// to provide the necessary context for cleanup without actually running the setup phase.
256288func runCleanupStage (
257- ctx context.Context , l * logger. Logger , failer * failures.Failer , args failures.FailureArgs ,
289+ ctx context.Context , failer * failures.Failer , args failures.FailureArgs ,
258290) error {
259- l .Printf ("Running cleanup stage..." )
291+ config . Logger .Printf ("Running cleanup stage..." )
260292
261293 // Set the setup args directly so Cleanup() has the necessary context
262294 failer .SetSetupArgs (args )
263- if err := failer .Cleanup (ctx , l ); err != nil {
295+ if err := failer .Cleanup (ctx , chaosLogger ); err != nil {
264296 return errors .Wrap (err , "failed to cleanup failure" )
265297 }
266- l .Printf ("Cleanup stage completed successfully" )
298+ config . Logger .Printf ("Cleanup stage completed successfully" )
267299 return nil
268300}
269301
270302// runFullLifecycle executes the complete failure lifecycle:
271303// Setup → Inject → Wait → Recover → Cleanup
272304func runFullLifecycle (
273- ctx context.Context ,
274- l * logger.Logger ,
275- failer * failures.Failer ,
276- args failures.FailureArgs ,
277- opts GlobalChaosOpts ,
305+ ctx context.Context , failer * failures.Failer , args failures.FailureArgs , opts GlobalChaosOpts ,
278306) error {
279307 // Ensure cleanup always runs, even if we panic or get interrupted
280308 cleanupDone := false
281309 defer func () {
282310 if ! cleanupDone {
283- l .Printf ("Running cleanup due to early exit..." )
284- if err := failer .Cleanup (ctx , l ); err != nil {
285- l .Errorf ("Cleanup failed: %v" , err )
311+ config . Logger .Printf ("Running cleanup due to early exit..." )
312+ if err := failer .Cleanup (ctx , chaosLogger ); err != nil {
313+ config . Logger .Errorf ("Cleanup failed: %v" , err )
286314 }
287315 }
288316 }()
289317
290318 // Setup phase
291- if err := runSetupStage (ctx , l , failer , args ); err != nil {
319+ if err := runSetupStage (ctx , failer , args ); err != nil {
292320 return err
293321 }
294322
295323 // Inject phase
296- if err := runInjectStage (ctx , l , failer , args ); err != nil {
324+ if err := runInjectStage (ctx , failer , args ); err != nil {
297325 return err
298326 }
299327
300328 // Wait phase
301329 if opts .RunForever {
302- l .Printf ("Failure injected. Waiting for interrupt (Ctrl+C)..." )
330+ config . Logger .Printf ("Failure injected. Waiting for interrupt (Ctrl+C)..." )
303331 <- waitForInterrupt ()
304- l .Printf ("Interrupt received. Beginning recovery..." )
332+ config . Logger .Printf ("Interrupt received. Beginning recovery..." )
305333 } else {
306- l .Printf ("Failure injected. Waiting %s before recovery..." , opts .WaitBeforeCleanup )
334+ config . Logger .Printf ("Failure injected. Waiting %s before recovery..." , opts .WaitBeforeCleanup )
307335 select {
308336 case <- time .After (opts .WaitBeforeCleanup ):
309- l .Printf ("Wait period complete. Beginning recovery..." )
337+ config . Logger .Printf ("Wait period complete. Beginning recovery..." )
310338 case <- waitForInterrupt ():
311- l .Printf ("Interrupt received. Beginning recovery..." )
339+ config . Logger .Printf ("Interrupt received. Beginning recovery..." )
312340 }
313341 }
314342
315343 // Recover phase
316- if err := runRecoverStage (ctx , l , failer , args ); err != nil {
344+ if err := runRecoverStage (ctx , failer , args ); err != nil {
317345 return err
318346 }
319347
320348 // Cleanup phase
321- if err := runCleanupStage (ctx , l , failer , args ); err != nil {
349+ if err := runCleanupStage (ctx , failer , args ); err != nil {
322350 return err
323351 }
324352
325353 cleanupDone = true
326- l .Printf ("Failure lifecycle completed successfully" )
354+ config . Logger .Printf ("Failure lifecycle completed successfully" )
327355 return nil
328356}
329357
0 commit comments