@@ -21,6 +21,7 @@ import (
21
21
"errors"
22
22
"fmt"
23
23
"net/http"
24
+ "strconv"
24
25
"time"
25
26
26
27
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
@@ -179,11 +180,59 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
179
180
return ctrl.Result {}, nil
180
181
}
181
182
183
+ secret := new (corev1.Secret )
184
+ if err := r .Get (ctx , req .NamespacedName , secret ); err != nil {
185
+ if ! kerrors .IsNotFound (err ) {
186
+ log .Error (err , "Failed to fetch secret" )
187
+ return ctrl.Result {}, err
188
+ }
189
+
190
+ jitConfig , err := r .createRunnerJitConfig (ctx , ephemeralRunner , log )
191
+ switch {
192
+ case err == nil :
193
+ // create secret if not created
194
+ log .Info ("Creating new ephemeral runner secret for jitconfig." )
195
+ if err := r .createSecret (ctx , ephemeralRunner , jitConfig , log ); err != nil {
196
+ return ctrl.Result {}, fmt .Errorf ("failed to create secret: %w" , err )
197
+ }
198
+ log .Info ("Created new ephemeral runner secret for jitconfig." )
199
+
200
+ case errors .Is (err , retryableError ):
201
+ log .Info ("Encountered retryable error, requeueing" , "error" , err .Error ())
202
+ return ctrl.Result {Requeue : true }, nil
203
+ case errors .Is (err , fatalError ):
204
+ log .Info ("JIT config cannot be created for this ephemeral runner, issuing delete" , "error" , err .Error ())
205
+ if err := r .Delete (ctx , ephemeralRunner ); err != nil {
206
+ return ctrl.Result {}, fmt .Errorf ("failed to delete the ephemeral runner: %w" , err )
207
+ }
208
+ log .Info ("Request to delete ephemeral runner has been issued" )
209
+ return ctrl.Result {}, nil
210
+ default :
211
+ log .Error (err , "Failed to create ephemeral runners secret" , "error" , err .Error ())
212
+ return ctrl.Result {}, err
213
+ }
214
+ }
215
+
182
216
if ephemeralRunner .Status .RunnerId == 0 {
183
- log .Info ("Creating new ephemeral runner registration and updating status with runner config" )
184
- if r , err := r .updateStatusWithRunnerConfig (ctx , ephemeralRunner , log ); r != nil {
185
- return * r , err
217
+ log .Info ("Updating ephemeral runner status with runnerId and runnerName" )
218
+ runnerID , err := strconv .Atoi (string (secret .Data ["runnerId" ]))
219
+ if err != nil {
220
+ log .Error (err , "Runner config secret is corrupted: missing runnerId" )
221
+ log .Info ("Deleting corrupted runner config secret" )
222
+ if err := r .Delete (ctx , secret ); err != nil {
223
+ return ctrl.Result {}, fmt .Errorf ("failed to delete the corrupted runner config secret" )
224
+ }
225
+ log .Info ("Corrupted runner config secret has been deleted" )
226
+ return ctrl.Result {Requeue : true }, nil
227
+ }
228
+
229
+ if err := patchSubResource (ctx , r .Status (), ephemeralRunner , func (obj * v1alpha1.EphemeralRunner ) {
230
+ obj .Status .RunnerId = runnerID
231
+ obj .Status .RunnerName = string (secret .Data ["runnerName" ])
232
+ }); err != nil {
233
+ return ctrl.Result {}, fmt .Errorf ("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %w" , err )
186
234
}
235
+ log .Info ("Updated ephemeral runner status with runnerId and runnerName" )
187
236
}
188
237
189
238
if len (ephemeralRunner .Status .Failures ) > maxFailures {
@@ -213,27 +262,6 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
213
262
}, nil
214
263
}
215
264
216
- secret := new (corev1.Secret )
217
- if err := r .Get (ctx , req .NamespacedName , secret ); err != nil {
218
- if ! kerrors .IsNotFound (err ) {
219
- log .Error (err , "Failed to fetch secret" )
220
- return ctrl.Result {}, err
221
- }
222
- // create secret if not created
223
- log .Info ("Creating new ephemeral runner secret for jitconfig." )
224
- if r , err := r .createSecret (ctx , ephemeralRunner , log ); r != nil {
225
- return * r , err
226
- }
227
-
228
- // Retry to get the secret that was just created.
229
- // Otherwise, even though we want to continue to create the pod,
230
- // it fails due to the missing secret resulting in an invalid pod spec.
231
- if err := r .Get (ctx , req .NamespacedName , secret ); err != nil {
232
- log .Error (err , "Failed to fetch secret" )
233
- return ctrl.Result {}, err
234
- }
235
- }
236
-
237
265
pod := new (corev1.Pod )
238
266
if err := r .Get (ctx , req .NamespacedName , pod ); err != nil {
239
267
if ! kerrors .IsNotFound (err ) {
@@ -509,14 +537,12 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem
509
537
return nil
510
538
}
511
539
512
- // updateStatusWithRunnerConfig fetches runtime configuration needed by the runner
513
- // This method should always set .status.runnerId and .status.runnerJITConfig
514
- func (r * EphemeralRunnerReconciler ) updateStatusWithRunnerConfig (ctx context.Context , ephemeralRunner * v1alpha1.EphemeralRunner , log logr.Logger ) (* ctrl.Result , error ) {
540
+ func (r * EphemeralRunnerReconciler ) createRunnerJitConfig (ctx context.Context , ephemeralRunner * v1alpha1.EphemeralRunner , log logr.Logger ) (* actions.RunnerScaleSetJitRunnerConfig , error ) {
515
541
// Runner is not registered with the service. We need to register it first
516
542
log .Info ("Creating ephemeral runner JIT config" )
517
543
actionsClient , err := r .GetActionsService (ctx , ephemeralRunner )
518
544
if err != nil {
519
- return & ctrl. Result {} , fmt .Errorf ("failed to get actions client for generating JIT config: %w" , err )
545
+ return nil , fmt .Errorf ("failed to get actions client for generating JIT config: %w" , err )
520
546
}
521
547
522
548
jitSettings := & actions.RunnerScaleSetJitRunnerSetting {
@@ -531,74 +557,52 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
531
557
}
532
558
533
559
jitConfig , err := actionsClient .GenerateJitRunnerConfig (ctx , jitSettings , ephemeralRunner .Spec .RunnerScaleSetId )
534
- if err != nil {
535
- actionsError := & actions.ActionsError {}
536
- if ! errors .As (err , & actionsError ) {
537
- return & ctrl.Result {}, fmt .Errorf ("failed to generate JIT config with generic error: %w" , err )
538
- }
560
+ if err == nil { // if NO error
561
+ log .Info ("Created ephemeral runner JIT config" , "runnerId" , jitConfig .Runner .Id )
562
+ return jitConfig , nil
563
+ }
539
564
540
- if actionsError . StatusCode != http . StatusConflict ||
541
- ! actionsError . IsException ( "AgentExistsException" ) {
542
- return & ctrl. Result {} , fmt .Errorf ("failed to generate JIT config with Actions service error: %w" , err )
543
- }
565
+ actionsError := & actions. ActionsError {}
566
+ if ! errors . As ( err , & actionsError ) {
567
+ return nil , fmt .Errorf ("failed to generate JIT config with generic error: %w" , err )
568
+ }
544
569
545
- // If the runner with the name we want already exists it means:
546
- // - We might have a name collision.
547
- // - Our previous reconciliation loop failed to update the
548
- // status with the runnerId and runnerJITConfig after the `GenerateJitRunnerConfig`
549
- // created the runner registration on the service.
550
- // We will try to get the runner and see if it's belong to this AutoScalingRunnerSet,
551
- // if so, we can simply delete the runner registration and create a new one.
552
- log .Info ("Getting runner jit config failed with conflict error, trying to get the runner by name" , "runnerName" , ephemeralRunner .Name )
553
- existingRunner , err := actionsClient .GetRunnerByName (ctx , ephemeralRunner .Name )
554
- if err != nil {
555
- return & ctrl.Result {}, fmt .Errorf ("failed to get runner by name: %w" , err )
556
- }
570
+ if actionsError .StatusCode != http .StatusConflict ||
571
+ ! actionsError .IsException ("AgentExistsException" ) {
572
+ return nil , fmt .Errorf ("failed to generate JIT config with Actions service error: %w" , err )
573
+ }
557
574
558
- if existingRunner == nil {
559
- log .Info ("Runner with the same name does not exist, re-queuing the reconciliation" )
560
- return & ctrl.Result {Requeue : true }, nil
561
- }
575
+ // If the runner with the name we want already exists it means:
576
+ // - We might have a name collision.
577
+ // - Our previous reconciliation loop failed to update the
578
+ // status with the runnerId and runnerJITConfig after the `GenerateJitRunnerConfig`
579
+ // created the runner registration on the service.
580
+ // We will try to get the runner and see if it's belong to this AutoScalingRunnerSet,
581
+ // if so, we can simply delete the runner registration and create a new one.
582
+ log .Info ("Getting runner jit config failed with conflict error, trying to get the runner by name" , "runnerName" , ephemeralRunner .Name )
583
+ existingRunner , err := actionsClient .GetRunnerByName (ctx , ephemeralRunner .Name )
584
+ if err != nil {
585
+ return nil , fmt .Errorf ("failed to get runner by name: %w" , err )
586
+ }
562
587
563
- log .Info ("Found the runner with the same name" , "runnerId" , existingRunner .Id , "runnerScaleSetId" , existingRunner .RunnerScaleSetId )
564
- if existingRunner .RunnerScaleSetId == ephemeralRunner .Spec .RunnerScaleSetId {
565
- log .Info ("Removing the runner with the same name" )
566
- err := actionsClient .RemoveRunner (ctx , int64 (existingRunner .Id ))
567
- if err != nil {
568
- return & ctrl.Result {}, fmt .Errorf ("failed to remove runner from the service: %w" , err )
569
- }
588
+ if existingRunner == nil {
589
+ log .Info ("Runner with the same name does not exist anymore, re-queuing the reconciliation" )
590
+ return nil , fmt .Errorf ("%w: runner existed, retry configuration" , retryableError )
591
+ }
570
592
571
- log .Info ("Removed the runner with the same name, re-queuing the reconciliation" )
572
- return & ctrl.Result {Requeue : true }, nil
593
+ log .Info ("Found the runner with the same name" , "runnerId" , existingRunner .Id , "runnerScaleSetId" , existingRunner .RunnerScaleSetId )
594
+ if existingRunner .RunnerScaleSetId == ephemeralRunner .Spec .RunnerScaleSetId {
595
+ log .Info ("Removing the runner with the same name" )
596
+ err := actionsClient .RemoveRunner (ctx , int64 (existingRunner .Id ))
597
+ if err != nil {
598
+ return nil , fmt .Errorf ("failed to remove runner from the service: %w" , err )
573
599
}
574
600
575
- // TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation?
576
- // The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back.
577
- return & ctrl.Result {}, fmt .Errorf ("runner with the same name but doesn't belong to this RunnerScaleSet: %w" , err )
601
+ log .Info ("Removed the runner with the same name, re-queuing the reconciliation" )
602
+ return nil , fmt .Errorf ("%w: runner existed belonging to the scale set, retry configuration" , retryableError )
578
603
}
579
- log .Info ("Created ephemeral runner JIT config" , "runnerId" , jitConfig .Runner .Id )
580
604
581
- log .Info ("Updating ephemeral runner status with runnerId and runnerJITConfig" )
582
- err = patchSubResource (ctx , r .Status (), ephemeralRunner , func (obj * v1alpha1.EphemeralRunner ) {
583
- obj .Status .RunnerId = jitConfig .Runner .Id
584
- obj .Status .RunnerName = jitConfig .Runner .Name
585
- obj .Status .RunnerJITConfig = jitConfig .EncodedJITConfig
586
- })
587
- if err != nil {
588
- return & ctrl.Result {}, fmt .Errorf ("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %w" , err )
589
- }
590
-
591
- // We want to continue without a requeue for faster pod creation.
592
- //
593
- // To do so, we update the status in-place, so that both continuing the loop and
594
- // and requeuing and skipping updateStatusWithRunnerConfig in the next loop, will
595
- // have the same effect.
596
- ephemeralRunner .Status .RunnerId = jitConfig .Runner .Id
597
- ephemeralRunner .Status .RunnerName = jitConfig .Runner .Name
598
- ephemeralRunner .Status .RunnerJITConfig = jitConfig .EncodedJITConfig
599
-
600
- log .Info ("Updated ephemeral runner status with runnerId and runnerJITConfig" )
601
- return nil , nil
605
+ return nil , fmt .Errorf ("%w: runner with the same name but doesn't belong to this RunnerScaleSet: %w" , fatalError , err )
602
606
}
603
607
604
608
func (r * EphemeralRunnerReconciler ) createPod (ctx context.Context , runner * v1alpha1.EphemeralRunner , secret * corev1.Secret , log logr.Logger ) (ctrl.Result , error ) {
@@ -674,21 +678,21 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
674
678
return ctrl.Result {}, nil
675
679
}
676
680
677
- func (r * EphemeralRunnerReconciler ) createSecret (ctx context.Context , runner * v1alpha1.EphemeralRunner , log logr.Logger ) ( * ctrl. Result , error ) {
681
+ func (r * EphemeralRunnerReconciler ) createSecret (ctx context.Context , runner * v1alpha1.EphemeralRunner , jitConfig * actions. RunnerScaleSetJitRunnerConfig , log logr.Logger ) error {
678
682
log .Info ("Creating new secret for ephemeral runner" )
679
- jitSecret := r .newEphemeralRunnerJitSecret (runner )
683
+ jitSecret := r .newEphemeralRunnerJitSecret (runner , jitConfig )
680
684
681
685
if err := ctrl .SetControllerReference (runner , jitSecret , r .Scheme ); err != nil {
682
- return & ctrl. Result {}, fmt .Errorf ("failed to set controller reference: %w" , err )
686
+ return fmt .Errorf ("failed to set controller reference: %w" , err )
683
687
}
684
688
685
689
log .Info ("Created new secret spec for ephemeral runner" )
686
690
if err := r .Create (ctx , jitSecret ); err != nil {
687
- return & ctrl. Result {}, fmt .Errorf ("failed to create jit secret: %w" , err )
691
+ return fmt .Errorf ("failed to create jit secret: %w" , err )
688
692
}
689
693
690
694
log .Info ("Created ephemeral runner secret" , "secretName" , jitSecret .Name )
691
- return nil , nil
695
+ return nil
692
696
}
693
697
694
698
// updateRunStatusFromPod is responsible for updating non-exiting statuses.
0 commit comments