@@ -94,18 +94,28 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error {
9494 workflowCtx , cancelWorkflowCtx := context .WithCancelCause (workflowCtx )
9595 defer cancelWorkflowCtx (nil )
9696
97- // recoveryManager is declared here so the cancel listener can mark it as canceled.
98- // It will be initialized later after workflow state is set up.
99- var recoveryManager * pipeline.RecoveryManager
100-
101- // Add sigterm support for internal context.
102- // Required to be able to terminate the running workflow by external signals.
97+ // Handle SIGTERM (k8s, docker, system shutdown)
10398 workflowCtx = utils .WithContextSigtermCallback (workflowCtx , func () {
10499 logger .Error ().Msg ("received sigterm termination signal" )
105100 // WithContextSigtermCallback would cancel the context too, but we want our own custom error
106101 cancelWorkflowCtx (pipeline .ErrCancel )
107102 })
108103
104+ state := rpc.WorkflowState {
105+ Started : time .Now ().Unix (),
106+ }
107+ if err := r .client .Init (runnerCtx , workflow .ID , state ); err != nil {
108+ logger .Error ().Err (err ).Msg ("workflow initialization failed" )
109+ // TODO: should we return here?
110+ }
111+
112+ // Initialize recovery manager before launching goroutines that reference it
113+ recoveryManager := pipeline .NewRecoveryManager (r .client , workflow .ID , true )
114+ if err := recoveryManager .InitRecoveryState (runnerCtx , workflow .Config , int64 (timeout .Seconds ())); err != nil {
115+ logger .Warn ().Err (err ).Msg ("failed to initialize recovery state, continuing without recovery" )
116+ recoveryManager = pipeline .NewRecoveryManager (r .client , workflow .ID , false )
117+ }
118+
109119 // Listen for remote cancel events (UI / API).
110120 // When canceled, we MUST cancel the workflow context
111121 // so that workflow execution stop immediately.
@@ -118,9 +128,7 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error {
118128 } else {
119129 if canceled {
120130 logger .Debug ().Msg ("server side cancel signal received" )
121- if recoveryManager != nil {
122- recoveryManager .SetCanceled ()
123- }
131+ recoveryManager .SetCanceled ()
124132 cancelWorkflowCtx (pipeline .ErrCancel )
125133 }
126134 logger .Debug ().Msg ("cancel listener exited normally" )
@@ -144,25 +152,6 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error {
144152 }
145153 }()
146154
147- state := rpc.WorkflowState {
148- Started : time .Now ().Unix (),
149- }
150-
151- if err := r .client .Init (runnerCtx , workflow .ID , state ); err != nil {
152- logger .Error ().Err (err ).Msg ("signaling workflow initialization to server failed" )
153- // We have an error, maybe the server is currently unreachable or other server-side errors occurred.
154- // So let's clean up and end this not yet started workflow run.
155- cancelWorkflowCtx (err )
156- return err
157- }
158-
159- // Initialize recovery manager; if not enabled on server, it will be a no-op
160- recoveryManager = pipeline .NewRecoveryManager (r .client , workflow .ID , true )
161- if err := recoveryManager .InitRecoveryState (runnerCtx , workflow .Config , int64 (timeout .Seconds ())); err != nil {
162- logger .Warn ().Err (err ).Msg ("failed to initialize recovery state, continuing without recovery" )
163- recoveryManager = pipeline .NewRecoveryManager (nil , workflow .ID , false )
164- }
165-
166155 var uploads sync.WaitGroup
167156
168157 // Run pipeline
@@ -204,7 +193,7 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error {
204193
205194 // If workflow is recoverable (context canceled, recovery enabled, not user cancel),
206195 // skip marking as done. The workflow will be picked up by a new agent after restart.
207- if recoveryManager != nil && recoveryManager .IsRecoverable (runnerCtx ) {
196+ if recoveryManager .IsRecoverable (runnerCtx ) {
208197 logger .Info ().Msg ("workflow is recoverable, not marking as done" )
209198 return nil
210199 }
0 commit comments