Skip to content

Commit c275411

Browse files
authored
Remove JIT config from ephemeral runner status field (#4191)
1 parent 52d65c3 commit c275411

File tree

6 files changed

+114
-101
lines changed

6 files changed

+114
-101
lines changed

apis/actions.github.com/v1alpha1/ephemeralrunner_types.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,6 @@ type EphemeralRunnerStatus struct {
145145
RunnerId int `json:"runnerId,omitempty"`
146146
// +optional
147147
RunnerName string `json:"runnerName,omitempty"`
148-
// +optional
149-
RunnerJITConfig string `json:"runnerJITConfig,omitempty"`
150148

151149
// +optional
152150
Failures map[string]metav1.Time `json:"failures,omitempty"`

charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunners.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7874,8 +7874,6 @@ spec:
78747874
type: string
78757875
runnerId:
78767876
type: integer
7877-
runnerJITConfig:
7878-
type: string
78797877
runnerName:
78807878
type: string
78817879
workflowRunId:

config/crd/bases/actions.github.com_ephemeralrunners.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7874,8 +7874,6 @@ spec:
78747874
type: string
78757875
runnerId:
78767876
type: integer
7877-
runnerJITConfig:
7878-
type: string
78797877
runnerName:
78807878
type: string
78817879
workflowRunId:

controllers/actions.github.com/ephemeralrunner_controller.go

Lines changed: 96 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"fmt"
2323
"net/http"
24+
"strconv"
2425
"time"
2526

2627
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
@@ -179,11 +180,59 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
179180
return ctrl.Result{}, nil
180181
}
181182

183+
secret := new(corev1.Secret)
184+
if err := r.Get(ctx, req.NamespacedName, secret); err != nil {
185+
if !kerrors.IsNotFound(err) {
186+
log.Error(err, "Failed to fetch secret")
187+
return ctrl.Result{}, err
188+
}
189+
190+
jitConfig, err := r.createRunnerJitConfig(ctx, ephemeralRunner, log)
191+
switch {
192+
case err == nil:
193+
// create secret if not created
194+
log.Info("Creating new ephemeral runner secret for jitconfig.")
195+
if err := r.createSecret(ctx, ephemeralRunner, jitConfig, log); err != nil {
196+
return ctrl.Result{}, fmt.Errorf("failed to create secret: %w", err)
197+
}
198+
log.Info("Created new ephemeral runner secret for jitconfig.")
199+
200+
case errors.Is(err, retryableError):
201+
log.Info("Encountered retryable error, requeueing", "error", err.Error())
202+
return ctrl.Result{Requeue: true}, nil
203+
case errors.Is(err, fatalError):
204+
log.Info("JIT config cannot be created for this ephemeral runner, issuing delete", "error", err.Error())
205+
if err := r.Delete(ctx, ephemeralRunner); err != nil {
206+
return ctrl.Result{}, fmt.Errorf("failed to delete the ephemeral runner: %w", err)
207+
}
208+
log.Info("Request to delete ephemeral runner has been issued")
209+
return ctrl.Result{}, nil
210+
default:
211+
log.Error(err, "Failed to create ephemeral runners secret", "error", err.Error())
212+
return ctrl.Result{}, err
213+
}
214+
}
215+
182216
if ephemeralRunner.Status.RunnerId == 0 {
183-
log.Info("Creating new ephemeral runner registration and updating status with runner config")
184-
if r, err := r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log); r != nil {
185-
return *r, err
217+
log.Info("Updating ephemeral runner status with runnerId and runnerName")
218+
runnerID, err := strconv.Atoi(string(secret.Data["runnerId"]))
219+
if err != nil {
220+
log.Error(err, "Runner config secret is corrupted: missing runnerId")
221+
log.Info("Deleting corrupted runner config secret")
222+
if err := r.Delete(ctx, secret); err != nil {
223+
return ctrl.Result{}, fmt.Errorf("failed to delete the corrupted runner config secret")
224+
}
225+
log.Info("Corrupted runner config secret has been deleted")
226+
return ctrl.Result{Requeue: true}, nil
227+
}
228+
229+
if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
230+
obj.Status.RunnerId = runnerID
231+
obj.Status.RunnerName = string(secret.Data["runnerName"])
232+
}); err != nil {
233+
return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %w", err)
186234
}
235+
log.Info("Updated ephemeral runner status with runnerId and runnerName")
187236
}
188237

189238
if len(ephemeralRunner.Status.Failures) > maxFailures {
@@ -213,27 +262,6 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
213262
}, nil
214263
}
215264

216-
secret := new(corev1.Secret)
217-
if err := r.Get(ctx, req.NamespacedName, secret); err != nil {
218-
if !kerrors.IsNotFound(err) {
219-
log.Error(err, "Failed to fetch secret")
220-
return ctrl.Result{}, err
221-
}
222-
// create secret if not created
223-
log.Info("Creating new ephemeral runner secret for jitconfig.")
224-
if r, err := r.createSecret(ctx, ephemeralRunner, log); r != nil {
225-
return *r, err
226-
}
227-
228-
// Retry to get the secret that was just created.
229-
// Otherwise, even though we want to continue to create the pod,
230-
// it fails due to the missing secret resulting in an invalid pod spec.
231-
if err := r.Get(ctx, req.NamespacedName, secret); err != nil {
232-
log.Error(err, "Failed to fetch secret")
233-
return ctrl.Result{}, err
234-
}
235-
}
236-
237265
pod := new(corev1.Pod)
238266
if err := r.Get(ctx, req.NamespacedName, pod); err != nil {
239267
if !kerrors.IsNotFound(err) {
@@ -509,14 +537,12 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem
509537
return nil
510538
}
511539

512-
// updateStatusWithRunnerConfig fetches runtime configuration needed by the runner
513-
// This method should always set .status.runnerId and .status.runnerJITConfig
514-
func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
540+
func (r *EphemeralRunnerReconciler) createRunnerJitConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (*actions.RunnerScaleSetJitRunnerConfig, error) {
515541
// Runner is not registered with the service. We need to register it first
516542
log.Info("Creating ephemeral runner JIT config")
517543
actionsClient, err := r.GetActionsService(ctx, ephemeralRunner)
518544
if err != nil {
519-
return &ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %w", err)
545+
return nil, fmt.Errorf("failed to get actions client for generating JIT config: %w", err)
520546
}
521547

522548
jitSettings := &actions.RunnerScaleSetJitRunnerSetting{
@@ -531,74 +557,52 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
531557
}
532558

533559
jitConfig, err := actionsClient.GenerateJitRunnerConfig(ctx, jitSettings, ephemeralRunner.Spec.RunnerScaleSetId)
534-
if err != nil {
535-
actionsError := &actions.ActionsError{}
536-
if !errors.As(err, &actionsError) {
537-
return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %w", err)
538-
}
560+
if err == nil { // if NO error
561+
log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id)
562+
return jitConfig, nil
563+
}
539564

540-
if actionsError.StatusCode != http.StatusConflict ||
541-
!actionsError.IsException("AgentExistsException") {
542-
return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %w", err)
543-
}
565+
actionsError := &actions.ActionsError{}
566+
if !errors.As(err, &actionsError) {
567+
return nil, fmt.Errorf("failed to generate JIT config with generic error: %w", err)
568+
}
544569

545-
// If the runner with the name we want already exists it means:
546-
// - We might have a name collision.
547-
// - Our previous reconciliation loop failed to update the
548-
// status with the runnerId and runnerJITConfig after the `GenerateJitRunnerConfig`
549-
// created the runner registration on the service.
550-
// We will try to get the runner and see if it's belong to this AutoScalingRunnerSet,
551-
// if so, we can simply delete the runner registration and create a new one.
552-
log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name)
553-
existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name)
554-
if err != nil {
555-
return &ctrl.Result{}, fmt.Errorf("failed to get runner by name: %w", err)
556-
}
570+
if actionsError.StatusCode != http.StatusConflict ||
571+
!actionsError.IsException("AgentExistsException") {
572+
return nil, fmt.Errorf("failed to generate JIT config with Actions service error: %w", err)
573+
}
557574

558-
if existingRunner == nil {
559-
log.Info("Runner with the same name does not exist, re-queuing the reconciliation")
560-
return &ctrl.Result{Requeue: true}, nil
561-
}
575+
// If the runner with the name we want already exists it means:
576+
// - We might have a name collision.
577+
// - Our previous reconciliation loop failed to update the
578+
// status with the runnerId and runnerJITConfig after the `GenerateJitRunnerConfig`
579+
// created the runner registration on the service.
580+
// We will try to get the runner and see if it's belong to this AutoScalingRunnerSet,
581+
// if so, we can simply delete the runner registration and create a new one.
582+
log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name)
583+
existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name)
584+
if err != nil {
585+
return nil, fmt.Errorf("failed to get runner by name: %w", err)
586+
}
562587

563-
log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId)
564-
if existingRunner.RunnerScaleSetId == ephemeralRunner.Spec.RunnerScaleSetId {
565-
log.Info("Removing the runner with the same name")
566-
err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id))
567-
if err != nil {
568-
return &ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %w", err)
569-
}
588+
if existingRunner == nil {
589+
log.Info("Runner with the same name does not exist anymore, re-queuing the reconciliation")
590+
return nil, fmt.Errorf("%w: runner existed, retry configuration", retryableError)
591+
}
570592

571-
log.Info("Removed the runner with the same name, re-queuing the reconciliation")
572-
return &ctrl.Result{Requeue: true}, nil
593+
log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId)
594+
if existingRunner.RunnerScaleSetId == ephemeralRunner.Spec.RunnerScaleSetId {
595+
log.Info("Removing the runner with the same name")
596+
err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id))
597+
if err != nil {
598+
return nil, fmt.Errorf("failed to remove runner from the service: %w", err)
573599
}
574600

575-
// TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation?
576-
// The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back.
577-
return &ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %w", err)
601+
log.Info("Removed the runner with the same name, re-queuing the reconciliation")
602+
return nil, fmt.Errorf("%w: runner existed belonging to the scale set, retry configuration", retryableError)
578603
}
579-
log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id)
580604

581-
log.Info("Updating ephemeral runner status with runnerId and runnerJITConfig")
582-
err = patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
583-
obj.Status.RunnerId = jitConfig.Runner.Id
584-
obj.Status.RunnerName = jitConfig.Runner.Name
585-
obj.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
586-
})
587-
if err != nil {
588-
return &ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %w", err)
589-
}
590-
591-
// We want to continue without a requeue for faster pod creation.
592-
//
593-
// To do so, we update the status in-place, so that both continuing the loop and
594-
// and requeuing and skipping updateStatusWithRunnerConfig in the next loop, will
595-
// have the same effect.
596-
ephemeralRunner.Status.RunnerId = jitConfig.Runner.Id
597-
ephemeralRunner.Status.RunnerName = jitConfig.Runner.Name
598-
ephemeralRunner.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
599-
600-
log.Info("Updated ephemeral runner status with runnerId and runnerJITConfig")
601-
return nil, nil
605+
return nil, fmt.Errorf("%w: runner with the same name but doesn't belong to this RunnerScaleSet: %w", fatalError, err)
602606
}
603607

604608
func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, log logr.Logger) (ctrl.Result, error) {
@@ -674,21 +678,21 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
674678
return ctrl.Result{}, nil
675679
}
676680

677-
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
681+
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, jitConfig *actions.RunnerScaleSetJitRunnerConfig, log logr.Logger) error {
678682
log.Info("Creating new secret for ephemeral runner")
679-
jitSecret := r.newEphemeralRunnerJitSecret(runner)
683+
jitSecret := r.newEphemeralRunnerJitSecret(runner, jitConfig)
680684

681685
if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil {
682-
return &ctrl.Result{}, fmt.Errorf("failed to set controller reference: %w", err)
686+
return fmt.Errorf("failed to set controller reference: %w", err)
683687
}
684688

685689
log.Info("Created new secret spec for ephemeral runner")
686690
if err := r.Create(ctx, jitSecret); err != nil {
687-
return &ctrl.Result{}, fmt.Errorf("failed to create jit secret: %w", err)
691+
return fmt.Errorf("failed to create jit secret: %w", err)
688692
}
689693

690694
log.Info("Created ephemeral runner secret", "secretName", jitSecret.Name)
691-
return nil, nil
695+
return nil
692696
}
693697

694698
// updateRunStatusFromPod is responsible for updating non-exiting statuses.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package actionsgithubcom
2+
3+
type controllerError string
4+
5+
func (e controllerError) Error() string {
6+
return string(e)
7+
}
8+
9+
const (
10+
retryableError = controllerError("retryable error")
11+
fatalError = controllerError("fatal error")
12+
)

controllers/actions.github.com/resourcebuilder.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ func (b *ResourceBuilder) newEphemeralRunnerPod(ctx context.Context, runner *v1a
618618
FilterLabels(labels, LabelKeyRunnerTemplateHash),
619619
annotations,
620620
runner.Spec,
621-
runner.Status.RunnerJITConfig,
621+
secret.Data,
622622
)
623623

624624
objectMeta := metav1.ObjectMeta{
@@ -671,14 +671,17 @@ func (b *ResourceBuilder) newEphemeralRunnerPod(ctx context.Context, runner *v1a
671671
return &newPod
672672
}
673673

674-
func (b *ResourceBuilder) newEphemeralRunnerJitSecret(ephemeralRunner *v1alpha1.EphemeralRunner) *corev1.Secret {
674+
func (b *ResourceBuilder) newEphemeralRunnerJitSecret(ephemeralRunner *v1alpha1.EphemeralRunner, jitConfig *actions.RunnerScaleSetJitRunnerConfig) *corev1.Secret {
675675
return &corev1.Secret{
676676
ObjectMeta: metav1.ObjectMeta{
677677
Name: ephemeralRunner.Name,
678678
Namespace: ephemeralRunner.Namespace,
679679
},
680680
Data: map[string][]byte{
681-
jitTokenKey: []byte(ephemeralRunner.Status.RunnerJITConfig),
681+
jitTokenKey: []byte(jitConfig.EncodedJITConfig),
682+
"runnerName": []byte(jitConfig.Runner.Name),
683+
"runnerId": []byte(strconv.Itoa(jitConfig.Runner.Id)),
684+
"scaleSetId": []byte(strconv.Itoa(jitConfig.Runner.RunnerScaleSetId)),
682685
},
683686
}
684687
}

0 commit comments

Comments
 (0)