diff --git a/agent/rpc/client_grpc.go b/agent/rpc/client_grpc.go index e42c23476d1..e3ff32bdca1 100644 --- a/agent/rpc/client_grpc.go +++ b/agent/rpc/client_grpc.go @@ -28,6 +28,7 @@ import ( grpcproto "google.golang.org/protobuf/proto" backend "go.woodpecker-ci.org/woodpecker/v3/pipeline/backend/types" + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" "go.woodpecker-ci.org/woodpecker/v3/rpc" "go.woodpecker-ci.org/woodpecker/v3/rpc/proto" ) @@ -482,7 +483,7 @@ func (c *client) sendLogs(ctx context.Context, entries []*proto.LogEntry) error return nil } -func (c *client) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (int64, error) { +func (c *client) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (rpc.AgentConfig, error) { req := new(proto.RegisterAgentRequest) req.Info = &proto.AgentInfo{ Platform: info.Platform, @@ -493,7 +494,14 @@ func (c *client) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (int64, } res, err := c.client.RegisterAgent(ctx, req) - return res.GetAgentId(), err + if err != nil { + return rpc.AgentConfig{}, err + } + protoConfig := res.GetConfig() + return rpc.AgentConfig{ + AgentID: protoConfig.GetAgentId(), + RecoveryEnabled: protoConfig.GetRecoveryEnabled(), + }, nil } func (c *client) UnregisterAgent(ctx context.Context) error { @@ -542,3 +550,98 @@ func (c *client) ReportHealth(ctx context.Context) (err error) { } } } + +// InitWorkflowRecovery initializes recovery state for all steps in a workflow and returns current states. +func (c *client) InitWorkflowRecovery(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64) (map[string]*types.RecoveryState, error) { + retry := c.newBackOff() + req := &proto.InitWorkflowRecoveryRequest{ + WorkflowId: workflowID, + StepUuids: stepUUIDs, + TimeoutSeconds: timeoutSeconds, + } + + var res *proto.InitWorkflowRecoveryResponse + var err error + + for { + res, err = c.client.InitWorkflowRecovery(ctx, req) + if err == nil { + break + } + log.Error().Err(err).Msgf("grpc error: InitWorkflowRecovery(): code: %v", status.Code(err)) + + switch status.Code(err) { + case codes.Canceled: + if ctx.Err() != nil { + return nil, nil + } + return nil, err + case + codes.Aborted, + codes.DataLoss, + codes.DeadlineExceeded, + codes.Internal, + codes.Unavailable: + // non-fatal errors + default: + return nil, err + } + + select { + case <-time.After(retry.NextBackOff()): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + result := make(map[string]*types.RecoveryState, len(res.GetStates())) + for _, state := range res.GetStates() { + result[state.GetStepUuid()] = &types.RecoveryState{ + Status: types.RecoveryStatus(state.GetStatus()), + ExitCode: int(state.GetExitCode()), + } + } + return result, nil +} + +// UpdateStepRecoveryState updates the recovery state for a specific step. +func (c *client) UpdateStepRecoveryState(ctx context.Context, workflowID, stepUUID string, recoveryStatus types.RecoveryStatus, exitCode int) (err error) { + retry := c.newBackOff() + req := &proto.UpdateStepRecoveryStateRequest{ + WorkflowId: workflowID, + StepUuid: stepUUID, + Status: proto.RecoveryStatus(recoveryStatus), + ExitCode: int32(exitCode), + } + + for { + _, err = c.client.UpdateStepRecoveryState(ctx, req) + if err == nil { + return nil + } + log.Error().Err(err).Msgf("grpc error: UpdateStepRecoveryState(): code: %v", status.Code(err)) + + switch status.Code(err) { + case codes.Canceled: + if ctx.Err() != nil { + return nil + } + return err + case + codes.Aborted, + codes.DataLoss, + codes.DeadlineExceeded, + codes.Internal, + codes.Unavailable: + // non-fatal errors + default: + return err + } + + select { + case <-time.After(retry.NextBackOff()): + case <-ctx.Done(): + return ctx.Err() + } + } +} diff --git a/agent/runner.go b/agent/runner.go index 900221588cf..a3166add000 100644 --- a/agent/runner.go +++ b/agent/runner.go @@ -34,20 +34,22 @@ import ( ) type Runner struct { - client rpc.Peer - filter rpc.Filter - hostname string - counter *State - backend *backend.Backend + client rpc.Peer + filter rpc.Filter + hostname string + counter *State + backend *backend.Backend + recoveryEnabled bool } -func NewRunner(workEngine rpc.Peer, f rpc.Filter, h string, state *State, backend *backend.Backend) Runner { +func NewRunner(workEngine rpc.Peer, f rpc.Filter, h string, state *State, backend *backend.Backend, recoveryEnabled bool) Runner { return Runner{ - client: workEngine, - filter: f, - hostname: h, - counter: state, - backend: backend, + client: workEngine, + filter: f, + hostname: h, + counter: state, + backend: backend, + recoveryEnabled: recoveryEnabled, } } @@ -95,14 +97,28 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error { workflowCtx, cancelWorkflowCtx := context.WithCancelCause(workflowCtx) defer cancelWorkflowCtx(nil) - // Add sigterm support for internal context. - // Required to be able to terminate the running workflow by external signals. + // Handle SIGTERM (k8s, docker, system shutdown) workflowCtx = utils.WithContextSigtermCallback(workflowCtx, func() { logger.Error().Msg("received sigterm termination signal") // WithContextSigtermCallback would cancel the context too, but we want our own custom error cancelWorkflowCtx(pipeline_errors.ErrCancel) }) + state := rpc.WorkflowState{ + Started: time.Now().Unix(), + } + if err := r.client.Init(runnerCtx, workflow.ID, state); err != nil { + logger.Error().Err(err).Msg("workflow initialization failed") + return err + } + + // Initialize recovery manager before launching goroutines that reference it + recoveryManager := pipeline_runtime.NewRecoveryManager(r.client, workflow.ID, r.recoveryEnabled) + if err := recoveryManager.InitRecoveryState(runnerCtx, workflow.Config, int64(timeout.Seconds())); err != nil { + logger.Warn().Err(err).Msg("failed to initialize recovery state, continuing without recovery") + recoveryManager = pipeline_runtime.NewRecoveryManager(r.client, workflow.ID, false) + } + // Listen for remote cancel events (UI / API). // When canceled, we MUST cancel the workflow context // so that workflow execution stop immediately. @@ -114,10 +130,10 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error { cancelWorkflowCtx(err) } else { if canceled { - logger.Debug().Err(err).Msg("server side cancel signal received") + logger.Debug().Msg("server side cancel signal received") + recoveryManager.SetCanceled() cancelWorkflowCtx(pipeline_errors.ErrCancel) } - // Wait returned without error, meaning the workflow finished normally logger.Debug().Msg("cancel listener exited normally") } }() @@ -139,18 +155,6 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error { } }() - state := rpc.WorkflowState{ - Started: time.Now().Unix(), - } - - if err := r.client.Init(runnerCtx, workflow.ID, state); err != nil { - logger.Error().Err(err).Msg("signaling workflow initialization to server failed") - // We have an error, maybe the server is currently unreachable or other server-side errors occurred. - // So let's clean up and end this not yet started workflow run. - cancelWorkflowCtx(err) - return err - } - var uploads sync.WaitGroup // Run pipeline @@ -161,6 +165,7 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error { pipeline_runtime.WithLogger(r.createLogger(logger, &uploads, workflow)), pipeline_runtime.WithTracer(r.createTracer(ctxMeta, &uploads, logger, workflow)), pipeline_runtime.WithBackend(*r.backend), + pipeline_runtime.WithRecoveryManager(recoveryManager), pipeline_runtime.WithDescription(map[string]string{ "workflow_id": workflow.ID, "repo": repoName, @@ -189,6 +194,13 @@ func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error { uploads.Wait() logger.Debug().Msg("logs and traces uploaded") + // If workflow is recoverable (context canceled, recovery enabled, not user cancel), + // skip marking as done. The workflow will be picked up by a new agent after restart. + if recoveryManager.IsRecoverable(runnerCtx) { + logger.Info().Msg("workflow is recoverable, not marking as done") + return nil + } + // Update workflow state doneCtx := runnerCtx if doneCtx.Err() != nil { diff --git a/cmd/agent/core/agent.go b/cmd/agent/core/agent.go index bf9adec7635..e44826af3b9 100644 --- a/cmd/agent/core/agent.go +++ b/cmd/agent/core/agent.go @@ -211,7 +211,7 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error { log.Debug().Msgf("custom labels detected: %#v", customLabels) } - agentConfig.AgentID, err = client.RegisterAgent(grpcCtx, rpc.AgentInfo{ //nolint:contextcheck + registeredAgent, err := client.RegisterAgent(grpcCtx, rpc.AgentInfo{ //nolint:contextcheck Version: version.String(), Backend: backendEngine.Name(), Platform: engInfo.Platform, @@ -221,6 +221,7 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error { if err != nil { return err } + agentConfig.AgentID = registeredAgent.AgentID serviceWaitingGroup.Go(func() error { // we close grpc client context once unregister was handled @@ -288,7 +289,7 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error { // https://go.dev/blog/go1.22 fixed scope for goroutines in loops for i := range maxWorkflows { serviceWaitingGroup.Go(func() error { - runner := agent.NewRunner(client, filter, hostname, counter, &backendEngine) + runner := agent.NewRunner(client, filter, hostname, counter, &backendEngine, registeredAgent.RecoveryEnabled) log.Debug().Msgf("created new runner %d", i) for { diff --git a/cmd/server/flags.go b/cmd/server/flags.go index 779d91400cd..a09ffa4a10d 100644 --- a/cmd/server/flags.go +++ b/cmd/server/flags.go @@ -593,6 +593,15 @@ var flags = append([]cli.Flag{ Name: "encryption-disable-flag", Usage: "Flag to decrypt all encrypted data and disable encryption on server", }, + // + // recovery options + // + &cli.BoolFlag{ + Sources: cli.EnvVars("WOODPECKER_RECOVERY_ENABLED"), + Name: "recovery-enabled", + Usage: "Enable pipeline recovery state tracking, allowing agents to resume workflows after restart", + Value: false, + }, }, logger.GlobalLoggerFlags...) // If woodpecker is running inside a container the default value for diff --git a/cmd/server/grpc_server.go b/cmd/server/grpc_server.go index 216dde8de9b..9c38837e513 100644 --- a/cmd/server/grpc_server.go +++ b/cmd/server/grpc_server.go @@ -53,6 +53,7 @@ func runGrpcServer(ctx context.Context, c *cli.Command, _store store.Store) erro server.Config.Services.Logs, server.Config.Services.Pubsub, _store, + c.Bool("recovery-enabled"), ) proto.RegisterWoodpeckerServer(grpcServer, woodpeckerServer) diff --git a/cmd/server/server.go b/cmd/server/server.go index c6e274454e3..45942f16e65 100644 --- a/cmd/server/server.go +++ b/cmd/server/server.go @@ -134,6 +134,28 @@ func run(ctx context.Context, c *cli.Command) error { return nil }) + // Start recovery state cleanup task + if c.Bool("recovery-enabled") { + serviceWaitingGroup.Go(func() error { + log.Info().Msg("starting recovery state cleanup service ...") + ticker := time.NewTicker(time.Minute * 5) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + log.Info().Msg("recovery state cleanup service stopped") + return nil + case <-ticker.C: + if err := _store.RecoveryStateCleanExpired(); err != nil { + log.Error().Err(err).Msg("failed to clean expired recovery states") + } else { + log.Trace().Msg("cleaned expired recovery states") + } + } + } + }) + } + // start the grpc server serviceWaitingGroup.Go(func() error { log.Info().Msg("starting grpc server ...") diff --git a/cmd/server/setup.go b/cmd/server/setup.go index 5728a8b153c..edf1b93df66 100644 --- a/cmd/server/setup.go +++ b/cmd/server/setup.go @@ -208,7 +208,6 @@ func setupEvilGlobals(ctx context.Context, c *cli.Command, s store.Store) (err e server.Config.Pipeline.DefaultCancelPreviousPipelineEvents = events server.Config.Pipeline.DefaultTimeout = c.Int64("default-pipeline-timeout") server.Config.Pipeline.MaxTimeout = c.Int64("max-pipeline-timeout") - _labels := c.StringSlice("default-workflow-labels") labels := make(map[string]string, len(_labels)) for _, v := range _labels { diff --git a/docs/docs/30-administration/10-configuration/10-server.md b/docs/docs/30-administration/10-configuration/10-server.md index 8994af44f65..ea78fa3608a 100644 --- a/docs/docs/30-administration/10-configuration/10-server.md +++ b/docs/docs/30-administration/10-configuration/10-server.md @@ -1089,6 +1089,15 @@ Fully qualified public forge URL, used if forge url is not a public URL. Format: --- +### RECOVERY_ENABLED + +- Name: `WOODPECKER_RECOVERY_ENABLED` +- Default: `false` + +Enables pipeline recovery state tracking. When enabled, agents can resume workflows after a restart (e.g. during rolling deployments or agent crashes). Steps that already completed successfully are skipped, and running steps are reconnected. Currently, Docker and Kubernetes backends support recovery. + +--- + ### GITHUB\_\* See [GitHub configuration](./12-forges/20-github.md#configuration) diff --git a/pipeline/backend/docker/docker.go b/pipeline/backend/docker/docker.go index 057284c2649..382cd9f551d 100644 --- a/pipeline/backend/docker/docker.go +++ b/pipeline/backend/docker/docker.go @@ -157,9 +157,12 @@ func (e *docker) SetupWorkflow(ctx context.Context, conf *backend.Config, taskUU Name: conf.Volume, Driver: volumeDriver, }) - if err != nil { + if err != nil && !errdefs.IsAlreadyExists(err) && !errdefs.IsConflict(err) { return err } + if err != nil { + log.Trace().Str("taskUUID", taskUUID).Msg("volume already exists, reusing") + } networkDriver := networkDriverBridge if e.info.OSType == "windows" { @@ -169,7 +172,13 @@ func (e *docker) SetupWorkflow(ctx context.Context, conf *backend.Config, taskUU Driver: networkDriver, EnableIPv6: &e.config.enableIPv6, }) - return err + if err != nil && !errdefs.IsAlreadyExists(err) && !errdefs.IsConflict(err) { + return err + } + if err != nil { + log.Trace().Str("taskUUID", taskUUID).Msg("network already exists, reusing") + } + return nil } func (e *docker) StartStep(ctx context.Context, step *backend.Step, taskUUID string) error { @@ -230,7 +239,12 @@ func (e *docker) StartStep(ctx context.Context, step *backend.Step, taskUUID str _, err = e.client.ContainerCreate(ctx, config, hostConfig, nil, nil, containerName) } if err != nil { - return err + // Container already exists (recovery scenario), continue without error + if errdefs.IsAlreadyExists(err) || errdefs.IsConflict(err) { + log.Trace().Str("container", containerName).Msg("container already exists, reusing") + } else { + return err + } } if len(step.NetworkMode) == 0 { @@ -238,7 +252,8 @@ func (e *docker) StartStep(ctx context.Context, step *backend.Step, taskUUID str err = e.client.NetworkConnect(ctx, net.Name, containerName, &network.EndpointSettings{ Aliases: net.Aliases, }) - if err != nil { + // Ignore error if container is already connected to network (recovery scenario) + if err != nil && !errdefs.IsAlreadyExists(err) && !errdefs.IsConflict(err) { return err } } @@ -246,13 +261,19 @@ func (e *docker) StartStep(ctx context.Context, step *backend.Step, taskUUID str // join the container to an existing network if e.config.network != "" { err = e.client.NetworkConnect(ctx, e.config.network, containerName, &network.EndpointSettings{}) - if err != nil { + // Ignore error if container is already connected to network (recovery scenario) + if err != nil && !errdefs.IsAlreadyExists(err) && !errdefs.IsConflict(err) { return err } } } - return e.client.ContainerStart(ctx, containerName, container.StartOptions{}) + err = e.client.ContainerStart(ctx, containerName, container.StartOptions{}) + // Ignore error if container is already running (recovery scenario) + if err != nil && !isErrContainerAlreadyStarted(err) { + return err + } + return nil } func (e *docker) WaitStep(ctx context.Context, step *backend.Step, taskUUID string) (*backend.State, error) { @@ -358,6 +379,18 @@ func (e *docker) DestroyWorkflow(ctx context.Context, conf *backend.Config, task return nil } +// Reconnect attempts to reconnect to a running container. +// This is used for recovery after agent restart. +func (e *docker) Reconnect(ctx context.Context, step *backend.Step, taskUUID string) error { + containerName := toContainerName(step) + _, err := e.client.ContainerInspect(ctx, containerName) + if err != nil { + return err + } + log.Debug().Str("taskUUID", taskUUID).Str("container", containerName).Msg("reconnected to existing container") + return nil +} + var removeOpts = container.RemoveOptions{ RemoveVolumes: true, RemoveLinks: false, @@ -377,6 +410,12 @@ func isErrContainerNotFoundOrNotRunning(err error) bool { (strings.Contains(err.Error(), "removal of container") && strings.Contains(err.Error(), "is already in progress"))) } +func isErrContainerAlreadyStarted(err error) bool { + // Error response from daemon: Container ... is already started + // Error response from podman daemon: container ... is already running + return err != nil && (strings.Contains(err.Error(), "is already started") || strings.Contains(err.Error(), "is already running")) +} + // normalizeArchType converts the arch type reported by docker info into // the runtime.GOARCH format // TODO: find out if we we need to convert other arch types too diff --git a/pipeline/backend/dummy/dummy.go b/pipeline/backend/dummy/dummy.go index 69eb8d5aa59..e5f6e54f66a 100644 --- a/pipeline/backend/dummy/dummy.go +++ b/pipeline/backend/dummy/dummy.go @@ -216,6 +216,10 @@ func (e *dummy) DestroyStep(_ context.Context, step *backend.Step, taskUUID stri return nil } +func (e *dummy) Reconnect(_ context.Context, _ *backend.Step, _ string) error { + return backend.ErrWorkflowRecoveryNotSupported +} + func (e *dummy) DestroyWorkflow(_ context.Context, _ *backend.Config, taskUUID string) error { log.Trace().Str("taskUUID", taskUUID).Msgf("delete workflow environment") diff --git a/pipeline/backend/kubernetes/kubernetes.go b/pipeline/backend/kubernetes/kubernetes.go index 49448cede9c..badb6219006 100644 --- a/pipeline/backend/kubernetes/kubernetes.go +++ b/pipeline/backend/kubernetes/kubernetes.go @@ -363,29 +363,49 @@ func (e *kube) TailStep(ctx context.Context, step *types.Step, taskUUID string) log.Trace().Str("taskUUID", taskUUID).Msgf("tail logs of pod: %s", podName) - up := make(chan bool) - - podUpdated := func(_, newPod any) { - pod, ok := newPod.(*v1.Pod) - if !ok { - log.Error().Msgf("could not parse pod: %v", newPod) - return - } + up := make(chan bool, 1) // buffered to avoid blocking in handlers + checkPodReady := func(pod *v1.Pod) { if pod.Name == podName { if isImagePullBackOffState(pod) || isInvalidImageName(pod) { - up <- true + select { + case up <- true: + default: + } + return } switch pod.Status.Phase { case v1.PodRunning, v1.PodSucceeded, v1.PodFailed: - up <- true + select { + case up <- true: + default: + } } } } + podAdded := func(obj any) { + pod, ok := obj.(*v1.Pod) + if !ok { + log.Error().Msgf("could not parse pod: %v", obj) + return + } + checkPodReady(pod) + } + + podUpdated := func(_, newPod any) { + pod, ok := newPod.(*v1.Pod) + if !ok { + log.Error().Msgf("could not parse pod: %v", newPod) + return + } + checkPodReady(pod) + } + si := informers.NewSharedInformerFactoryWithOptions(e.client, defaultResyncDuration, informers.WithNamespace(e.config.GetNamespace(step.OrgID))) if _, err := si.Core().V1().Pods().Informer().AddEventHandler( cache.ResourceEventHandlerFuncs{ + AddFunc: podAdded, UpdateFunc: podUpdated, }, ); err != nil { @@ -489,3 +509,20 @@ func (e *kube) DestroyWorkflow(ctx context.Context, conf *types.Config, taskUUID return nil } + +// Reconnect attempts to reconnect to a running pod. +func (e *kube) Reconnect(ctx context.Context, step *types.Step, taskUUID string) error { + name, err := podName(step) + if err != nil { + return fmt.Errorf("pod name error: %w", err) + } + + namespace := e.config.GetNamespace(step.OrgID) + _, err = e.client.CoreV1().Pods(namespace).Get(ctx, name, meta_v1.GetOptions{}) + if err != nil { + return fmt.Errorf("pod %s not found: %w", name, err) + } + + log.Debug().Str("taskUUID", taskUUID).Str("pod", name).Msg("reconnected to existing pod") + return nil +} diff --git a/pipeline/backend/kubernetes/pod.go b/pipeline/backend/kubernetes/pod.go index 04b3373afa4..15985791c41 100644 --- a/pipeline/backend/kubernetes/pod.go +++ b/pipeline/backend/kubernetes/pod.go @@ -647,8 +647,18 @@ func startPod(ctx context.Context, engine *kube, step *types.Step, options Backe return nil, err } + namespace := engineConfig.GetNamespace(step.OrgID) log.Trace().Msgf("creating pod: %s", pod.Name) - return engine.client.CoreV1().Pods(engineConfig.GetNamespace(step.OrgID)).Create(ctx, pod, meta_v1.CreateOptions{}) + createdPod, err := engine.client.CoreV1().Pods(namespace).Create(ctx, pod, meta_v1.CreateOptions{}) + if err != nil { + if errors.IsAlreadyExists(err) { + // Pod already exists (recovery scenario), continue without error + log.Trace().Msgf("pod already exists, reusing: %s", pod.Name) + return pod, nil + } + return nil, err + } + return createdPod, nil } func stopPod(ctx context.Context, engine *kube, step *types.Step, deleteOpts meta_v1.DeleteOptions) error { diff --git a/pipeline/backend/kubernetes/secrets.go b/pipeline/backend/kubernetes/secrets.go index d27ec5f46a0..7383412da36 100644 --- a/pipeline/backend/kubernetes/secrets.go +++ b/pipeline/backend/kubernetes/secrets.go @@ -290,6 +290,11 @@ func startRegistrySecret(ctx context.Context, engine *kube, step *types.Step) er log.Trace().Msgf("creating secret: %s", secret.Name) _, err = engine.client.CoreV1().Secrets(engine.config.GetNamespace(step.OrgID)).Create(ctx, secret, meta_v1.CreateOptions{}) if err != nil { + if errors.IsAlreadyExists(err) { + // Secret already exists (recovery scenario), ignore + log.Trace().Msgf("secret already exists, reusing: %s", secret.Name) + return nil + } return err } return nil @@ -321,6 +326,11 @@ func startStepSecret(ctx context.Context, e *kube, step *types.Step) error { log.Trace().Msgf("creating secret: %s", secret.Name) _, err = e.client.CoreV1().Secrets(e.config.GetNamespace(step.OrgID)).Create(ctx, secret, meta_v1.CreateOptions{}) if err != nil { + if errors.IsAlreadyExists(err) { + // Secret already exists (recovery scenario), ignore + log.Trace().Msgf("secret already exists, reusing: %s", secret.Name) + return nil + } return err } return nil diff --git a/pipeline/backend/kubernetes/service.go b/pipeline/backend/kubernetes/service.go index b86fbaa0d80..068e2151fe6 100644 --- a/pipeline/backend/kubernetes/service.go +++ b/pipeline/backend/kubernetes/service.go @@ -74,7 +74,16 @@ func startHeadlessService(ctx context.Context, engine *kube, namespace, taskUUID } log.Trace().Str("name", svc.Name).Interface("selector", svc.Spec.Selector).Msg("creating headless service") - return engine.client.CoreV1().Services(namespace).Create(ctx, svc, meta_v1.CreateOptions{}) + createdSvc, err := engine.client.CoreV1().Services(namespace).Create(ctx, svc, meta_v1.CreateOptions{}) + if err != nil { + if errors.IsAlreadyExists(err) { + // Service already exists (recovery scenario), continue without error + log.Trace().Str("name", svc.Name).Msg("headless service already exists, reusing") + return svc, nil + } + return nil, err + } + return createdSvc, nil } func stopHeadlessService(ctx context.Context, engine *kube, namespace, taskUUID string) error { diff --git a/pipeline/backend/kubernetes/volume.go b/pipeline/backend/kubernetes/volume.go index ad2bd2ff0de..5d640e2aea4 100644 --- a/pipeline/backend/kubernetes/volume.go +++ b/pipeline/backend/kubernetes/volume.go @@ -83,7 +83,16 @@ func startVolume(ctx context.Context, engine *kube, name, namespace string) (*v1 } log.Trace().Msgf("creating volume: %s", pvc.Name) - return engine.client.CoreV1().PersistentVolumeClaims(namespace).Create(ctx, pvc, meta_v1.CreateOptions{}) + createdPVC, err := engine.client.CoreV1().PersistentVolumeClaims(namespace).Create(ctx, pvc, meta_v1.CreateOptions{}) + if err != nil { + if errors.IsAlreadyExists(err) { + // Volume already exists (recovery scenario), continue without error + log.Trace().Msgf("volume already exists, reusing: %s", pvc.Name) + return pvc, nil + } + return nil, err + } + return createdPVC, nil } func stopVolume(ctx context.Context, engine *kube, name, namespace string, deleteOpts meta_v1.DeleteOptions) error { diff --git a/pipeline/backend/local/local.go b/pipeline/backend/local/local.go index f6c13a2ef11..8f1de3af900 100644 --- a/pipeline/backend/local/local.go +++ b/pipeline/backend/local/local.go @@ -256,6 +256,10 @@ func (e *local) DestroyStep(_ context.Context, step *types.Step, taskUUID string return nil } +func (e *local) Reconnect(_ context.Context, _ *types.Step, _ string) error { + return types.ErrWorkflowRecoveryNotSupported +} + func (e *local) DestroyWorkflow(_ context.Context, _ *types.Config, taskUUID string) error { log.Trace().Str("taskUUID", taskUUID).Msg("delete workflow environment") diff --git a/pipeline/backend/types/backend.go b/pipeline/backend/types/backend.go index 0877c422a12..62f6b88dab6 100644 --- a/pipeline/backend/types/backend.go +++ b/pipeline/backend/types/backend.go @@ -161,6 +161,12 @@ type Backend interface { // This function may be called concurrently for different workflows // and must be thread-safe. DestroyWorkflow(ctx context.Context, conf *Config, taskUUID string) error + + // Reconnect attempts to reconnect to a running step after agent restart. + // Returns nil if reconnection is possible, error otherwise. + // After successful reconnect, TailStep and WaitStep can be used normally. + // Backends that do not support reconnection should return ErrWorkflowRecoveryNotSupported. + Reconnect(ctx context.Context, step *Step, taskUUID string) error } // BackendInfo represents the reported information of a loaded backend. diff --git a/pipeline/backend/types/errors.go b/pipeline/backend/types/errors.go index c472496f7e4..ab2671dd52f 100644 --- a/pipeline/backend/types/errors.go +++ b/pipeline/backend/types/errors.go @@ -16,4 +16,7 @@ package types import "errors" -var ErrNoCliContextFound = errors.New("no CliContext in context found") +var ( + ErrNoCliContextFound = errors.New("no CliContext in context found") + ErrWorkflowRecoveryNotSupported = errors.New("workflow recovery not supported") +) diff --git a/pipeline/runtime/executor.go b/pipeline/runtime/executor.go index edf1b0d195c..ad70a35e746 100644 --- a/pipeline/runtime/executor.go +++ b/pipeline/runtime/executor.go @@ -46,6 +46,12 @@ func (r *Runtime) Run(runnerCtx context.Context) error { } defer func() { + // Skip destroying workflow if recovery is enabled and context was canceled but NOT by user. + if r.recoveryManager.IsRecoverable(runnerCtx) { + logger.Info().Msg("skipping workflow destruction, preserving for recovery") + return + } + ctx := runnerCtx //nolint:contextcheck if ctx.Err() != nil { ctx = GetShutdownCtx() @@ -68,7 +74,6 @@ func (r *Runtime) Run(runnerCtx context.Context) error { ExitCode: 1, } - // Trace the error if we have a tracer if r.tracer != nil { if err := r.tracer.Trace(state); err != nil { logger.Error().Err(err).Msg("failed to trace step error") @@ -155,6 +160,49 @@ func (r *Runtime) execAll(runnerCtx context.Context, steps []*backend.Step) <-ch return nil } + // Check recovery state if recovery is enabled + if r.recoveryManager.Enabled() { + shouldSkip, recoveryState := r.recoveryManager.ShouldSkipStep(step) + if shouldSkip { + logger.Info(). + Str("step", step.Name). + Int("status", int(recoveryState.Status)). + Int("exit_code", recoveryState.ExitCode). + Msg("skipping step due to recovery state") + + // Trace the recovered state + processState := &backend.State{ + Exited: true, + ExitCode: recoveryState.ExitCode, + } + if traceErr := r.traceStep(processState, nil, step); traceErr != nil { + return traceErr + } + + // Return error if step was failed + if recoveryState.ExitCode != 0 { + return &pipeline_errors.ExitError{ + UUID: step.UUID, + Code: recoveryState.ExitCode, + } + } + return nil + } else if r.recoveryManager.ShouldReconnect(recoveryState) { + // Attempt to reconnect to a running step + reconnectErr := r.engine.Reconnect(r.ctx, step, r.taskUUID) //nolint:contextcheck + if reconnectErr == nil { + logger.Info().Str("step", step.Name).Msg("reconnecting to existing step") + return r.execReconnected(step) + } + logger.Debug().Err(reconnectErr).Str("step", step.Name).Msg("cannot reconnect, re-executing step") + } + + // Mark step as running in recovery state + if err := r.recoveryManager.MarkStepRunning(r.ctx, step); err != nil { //nolint:contextcheck + logger.Warn().Err(err).Str("step", step.Name).Msg("failed to mark step as running") + } + } + // Trace started. err := r.traceStep(nil, nil, step) if err != nil { @@ -182,8 +230,33 @@ func (r *Runtime) execAll(runnerCtx context.Context, steps []*backend.Step) <-ch err = pipeline_errors.ErrCancel } - // Return the error after tracing it. - err = r.traceStep(processState, err, step) + // Check if workflow is recoverable + recoverable := r.recoveryManager.IsRecoverable(r.ctx) //nolint:contextcheck + + // Update recovery state based on step result + if r.recoveryManager.Enabled() { + switch { + case recoverable: + logger.Debug().Str("step", step.Name).Msg("workflow is recoverable, not updating step state") + case processState != nil && processState.ExitCode == 0 && err == nil: + if markErr := r.recoveryManager.MarkStepSuccess(r.ctx, step); markErr != nil { //nolint:contextcheck + logger.Warn().Err(markErr).Str("step", step.Name).Msg("failed to mark step as success") + } + default: + exitCode := 1 + if processState != nil { + exitCode = processState.ExitCode + } + if markErr := r.recoveryManager.MarkStepFailed(r.ctx, step, exitCode); markErr != nil { //nolint:contextcheck + logger.Warn().Err(markErr).Str("step", step.Name).Msg("failed to mark step as failed") + } + } + } + + // Skip tracing if workflow is recoverable + if !recoverable { + err = r.traceStep(processState, err, step) + } if err != nil && step.Failure == metadata.FailureIgnore { return nil } @@ -216,6 +289,75 @@ func (r *Runtime) execAll(runnerCtx context.Context, steps []*backend.Step) <-ch return done } +// execReconnected handles a reconnected step (waiting for completion without re-executing). +func (r *Runtime) execReconnected(step *backend.Step) error { + logger := r.MakeLogger() + + var wg sync.WaitGroup + if r.logger != nil { + rc, err := r.engine.TailStep(r.ctx, step, r.taskUUID) + if err != nil { + logger.Warn().Err(err).Str("step", step.Name).Msg("failed to retrieve logs for reconnected step, continuing without logs") + } else { + wg.Add(1) + go func() { + defer wg.Done() + if err := r.logger(step, rc); err != nil { + logger.Error().Err(err).Msg("process logging failed") + } + _ = rc.Close() + }() + } + } + + if step.Detached { + return nil + } + + wg.Wait() + waitState, err := r.engine.WaitStep(r.ctx, step, r.taskUUID) + if err != nil { + if errors.Is(err, context.Canceled) { + return pipeline_errors.ErrCancel + } + return err + } + + if waitState.ExitCode == 0 { + if markErr := r.recoveryManager.MarkStepSuccess(r.ctx, step); markErr != nil { + logger.Warn().Err(markErr).Str("step", step.Name).Msg("failed to mark step as success") + } + } else { + if markErr := r.recoveryManager.MarkStepFailed(r.ctx, step, waitState.ExitCode); markErr != nil { + logger.Warn().Err(markErr).Str("step", step.Name).Msg("failed to mark step as failed") + } + } + + // Trace the result + if err := r.traceStep(waitState, nil, step); err != nil { + return err + } + + return exitError(step, waitState) +} + +// exitError returns an OomError or ExitError based on the wait state, or nil if the step succeeded. +func exitError(step *backend.Step, waitState *backend.State) error { + if waitState.OOMKilled { + return &pipeline_errors.OomError{ + UUID: step.UUID, + Code: waitState.ExitCode, + } + } else if waitState.ExitCode != 0 { + return &pipeline_errors.ExitError{ + UUID: step.UUID, + Code: waitState.ExitCode, + } + } + + return nil +} + // Executes the step and returns the state and error. func (r *Runtime) exec(runnerCtx context.Context, step *backend.Step, setupWg *sync.WaitGroup) (*backend.State, error) { defer func() { @@ -281,17 +423,5 @@ func (r *Runtime) exec(runnerCtx context.Context, step *backend.Step, setupWg *s waitState.Error = pipeline_errors.ErrCancel } - if waitState.OOMKilled { - return waitState, &pipeline_errors.OomError{ - UUID: step.UUID, - Code: waitState.ExitCode, - } - } else if waitState.ExitCode != 0 { - return waitState, &pipeline_errors.ExitError{ - UUID: step.UUID, - Code: waitState.ExitCode, - } - } - - return waitState, nil + return waitState, exitError(step, waitState) } diff --git a/pipeline/runtime/option.go b/pipeline/runtime/option.go index f87d4a108a9..19bb6ec4059 100644 --- a/pipeline/runtime/option.go +++ b/pipeline/runtime/option.go @@ -64,3 +64,10 @@ func WithTaskUUID(uuid string) Option { r.taskUUID = uuid } } + +// WithRecoveryManager returns an option configured with a recovery manager. +func WithRecoveryManager(rm *RecoveryManager) Option { + return func(r *Runtime) { + r.recoveryManager = rm + } +} diff --git a/pipeline/runtime/recovery.go b/pipeline/runtime/recovery.go new file mode 100644 index 00000000000..4dd0633d9f6 --- /dev/null +++ b/pipeline/runtime/recovery.go @@ -0,0 +1,164 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runtime + +import ( + "context" + "sync/atomic" + + backend "go.woodpecker-ci.org/woodpecker/v3/pipeline/backend/types" + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" +) + +// RecoveryClient defines the interface for recovery state communication. +type RecoveryClient interface { + InitWorkflowRecovery(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64) (map[string]*types.RecoveryState, error) + UpdateStepRecoveryState(ctx context.Context, workflowID, stepUUID string, status types.RecoveryStatus, exitCode int) error +} + +// RecoveryManager manages the recovery state for pipeline steps. +type RecoveryManager struct { + client RecoveryClient + workflowID string + enabled bool + stateCache map[string]*types.RecoveryState // step UUID -> state (loaded once) + canceled atomic.Bool // set when workflow is canceled by user/API +} + +// NewRecoveryManager creates a new RecoveryManager. +func NewRecoveryManager(client RecoveryClient, workflowID string, enabled bool) *RecoveryManager { + return &RecoveryManager{ + client: client, + workflowID: workflowID, + enabled: enabled, + } +} + +// InitRecoveryState initializes recovery state for all steps in the config. +// On first run, creates recovery states for all steps. +// On agent restart, loads existing states into cache. +func (m *RecoveryManager) InitRecoveryState(ctx context.Context, config *backend.Config, timeoutSeconds int64) error { + if !m.enabled { + return nil + } + + // Create recovery states (idempotent) and load current states in a single RPC + stepUUIDs := collectStepUUIDs(config) + states, err := m.client.InitWorkflowRecovery(ctx, m.workflowID, stepUUIDs, timeoutSeconds) + if err != nil { + return err + } + m.stateCache = states + return nil +} + +// GetStepState retrieves the recovery state for a step from cache. +func (m *RecoveryManager) GetStepState(step *backend.Step) *types.RecoveryState { + if !m.enabled || m.stateCache == nil { + return &types.RecoveryState{Status: types.RecoveryStatusPending} + } + + if state, ok := m.stateCache[step.UUID]; ok { + return state + } + return &types.RecoveryState{Status: types.RecoveryStatusPending} +} + +// MarkStepRunning marks a step as running. +func (m *RecoveryManager) MarkStepRunning(ctx context.Context, step *backend.Step) error { + if !m.enabled { + return nil + } + + return m.client.UpdateStepRecoveryState(ctx, m.workflowID, step.UUID, types.RecoveryStatusRunning, 0) +} + +// MarkStepSuccess marks a step as successfully completed. +func (m *RecoveryManager) MarkStepSuccess(ctx context.Context, step *backend.Step) error { + if !m.enabled { + return nil + } + + return m.client.UpdateStepRecoveryState(ctx, m.workflowID, step.UUID, types.RecoveryStatusSuccess, 0) +} + +// MarkStepFailed marks a step as failed. +func (m *RecoveryManager) MarkStepFailed(ctx context.Context, step *backend.Step, exitCode int) error { + if !m.enabled { + return nil + } + + return m.client.UpdateStepRecoveryState(ctx, m.workflowID, step.UUID, types.RecoveryStatusFailed, exitCode) +} + +// IsRecoverable returns true if the workflow can be recovered by another agent +// (context canceled with recovery enabled, but not canceled by user/API). +func (m *RecoveryManager) IsRecoverable(ctx context.Context) bool { + return ctx.Err() != nil && m.enabled && !m.canceled.Load() +} + +// ShouldSkipStep determines if a step should be skipped based on its recovery state. +// Returns true if the step was already completed (success, failed, or skipped). +func (m *RecoveryManager) ShouldSkipStep(step *backend.Step) (bool, *types.RecoveryState) { + if !m.enabled { + return false, nil + } + + state := m.GetStepState(step) + + switch state.Status { + case types.RecoveryStatusSuccess, types.RecoveryStatusFailed, types.RecoveryStatusSkipped: + return true, state + default: + return false, state + } +} + +// ShouldReconnect determines if we should attempt to reconnect to a running step. +// This is only applicable for backends that support reconnection (Docker, Kubernetes). +func (m *RecoveryManager) ShouldReconnect(state *types.RecoveryState) bool { + if state == nil { + return false + } + return state.Status == types.RecoveryStatusRunning +} + +// Enabled returns whether recovery is enabled. +func (m *RecoveryManager) Enabled() bool { + return m.enabled +} + +// SetCanceled marks the workflow as canceled by user/API. +func (m *RecoveryManager) SetCanceled() { + m.canceled.Store(true) +} + +// WasCanceled returns whether the workflow was canceled by user/API. +func (m *RecoveryManager) WasCanceled() bool { + return m.canceled.Load() +} + +// collectStepUUIDs extracts all step UUIDs from the config. +func collectStepUUIDs(config *backend.Config) []string { + var uuids []string + for _, stage := range config.Stages { + for _, step := range stage.Steps { + if step.UUID != "" { + uuids = append(uuids, step.UUID) + } + } + } + return uuids +} diff --git a/pipeline/runtime/recovery_test.go b/pipeline/runtime/recovery_test.go new file mode 100644 index 00000000000..0d28667c36c --- /dev/null +++ b/pipeline/runtime/recovery_test.go @@ -0,0 +1,291 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runtime + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + backend "go.woodpecker-ci.org/woodpecker/v3/pipeline/backend/types" + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" +) + +type mockRecoveryClient struct { + initResult map[string]*types.RecoveryState + initErr error + updateErr error + + // Track calls for assertions + initCalled bool + initWorkflowID string + initStepUUIDs []string + initTimeout int64 + updateCalls []updateCall +} + +type updateCall struct { + workflowID string + stepUUID string + status types.RecoveryStatus + exitCode int +} + +func (m *mockRecoveryClient) InitWorkflowRecovery(_ context.Context, workflowID string, stepUUIDs []string, timeout int64) (map[string]*types.RecoveryState, error) { + m.initCalled = true + m.initWorkflowID = workflowID + m.initStepUUIDs = stepUUIDs + m.initTimeout = timeout + return m.initResult, m.initErr +} + +func (m *mockRecoveryClient) UpdateStepRecoveryState(_ context.Context, workflowID, stepUUID string, status types.RecoveryStatus, exitCode int) error { + m.updateCalls = append(m.updateCalls, updateCall{workflowID, stepUUID, status, exitCode}) + return m.updateErr +} + +func TestInitRecoveryState(t *testing.T) { + t.Run("disabled manager returns nil without calling client", func(t *testing.T) { + client := &mockRecoveryClient{} + mgr := NewRecoveryManager(client, "wf-1", false) + + err := mgr.InitRecoveryState(t.Context(), &backend.Config{}, 300) + require.NoError(t, err) + assert.False(t, client.initCalled) + }) + + t.Run("enabled manager collects step UUIDs and populates cache", func(t *testing.T) { + initResult := map[string]*types.RecoveryState{ + "uuid-1": {Status: types.RecoveryStatusPending}, + "uuid-2": {Status: types.RecoveryStatusRunning}, + "uuid-3": {Status: types.RecoveryStatusSuccess, ExitCode: 0}, + } + client := &mockRecoveryClient{initResult: initResult} + mgr := NewRecoveryManager(client, "wf-1", true) + + config := &backend.Config{ + Stages: []*backend.Stage{ + {Steps: []*backend.Step{{UUID: "uuid-1"}, {UUID: "uuid-2"}}}, + {Steps: []*backend.Step{{UUID: "uuid-3"}}}, + }, + } + + err := mgr.InitRecoveryState(t.Context(), config, 300) + require.NoError(t, err) + assert.True(t, client.initCalled) + + // Verify params forwarded to client + assert.Equal(t, "wf-1", client.initWorkflowID) + assert.Equal(t, []string{"uuid-1", "uuid-2", "uuid-3"}, client.initStepUUIDs) + assert.Equal(t, int64(300), client.initTimeout) + + // Verify cache is populated + step1 := &backend.Step{UUID: "uuid-1"} + state := mgr.GetStepState(step1) + assert.Equal(t, types.RecoveryStatusPending, state.Status) + + step3 := &backend.Step{UUID: "uuid-3"} + state = mgr.GetStepState(step3) + assert.Equal(t, types.RecoveryStatusSuccess, state.Status) + }) + + t.Run("empty UUIDs are filtered from collectStepUUIDs", func(t *testing.T) { + client := &mockRecoveryClient{initResult: map[string]*types.RecoveryState{ + "uuid-1": {Status: types.RecoveryStatusPending}, + }} + mgr := NewRecoveryManager(client, "wf-1", true) + + config := &backend.Config{ + Stages: []*backend.Stage{ + {Steps: []*backend.Step{{UUID: "uuid-1"}, {UUID: ""}}}, + }, + } + + err := mgr.InitRecoveryState(t.Context(), config, 300) + require.NoError(t, err) + assert.Equal(t, []string{"uuid-1"}, client.initStepUUIDs) + }) + + t.Run("client error propagates", func(t *testing.T) { + client := &mockRecoveryClient{initErr: errors.New("rpc failed")} + mgr := NewRecoveryManager(client, "wf-1", true) + + config := &backend.Config{ + Stages: []*backend.Stage{ + {Steps: []*backend.Step{{UUID: "uuid-1"}}}, + }, + } + + err := mgr.InitRecoveryState(t.Context(), config, 300) + require.EqualError(t, err, "rpc failed") + }) +} + +func TestShouldSkipStep(t *testing.T) { + tests := []struct { + name string + status types.RecoveryStatus + wantSkip bool + }{ + {"Pending", types.RecoveryStatusPending, false}, + {"Running", types.RecoveryStatusRunning, false}, + {"Success", types.RecoveryStatusSuccess, true}, + {"Failed", types.RecoveryStatusFailed, true}, + {"Skipped", types.RecoveryStatusSkipped, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := &mockRecoveryClient{ + initResult: map[string]*types.RecoveryState{ + "step-1": {Status: tt.status}, + }, + } + mgr := NewRecoveryManager(client, "wf-1", true) + + config := &backend.Config{ + Stages: []*backend.Stage{ + {Steps: []*backend.Step{{UUID: "step-1"}}}, + }, + } + err := mgr.InitRecoveryState(t.Context(), config, 300) + require.NoError(t, err) + + skip, state := mgr.ShouldSkipStep(&backend.Step{UUID: "step-1"}) + assert.Equal(t, tt.wantSkip, skip) + assert.Equal(t, tt.status, state.Status) + }) + } + + t.Run("disabled manager returns false nil", func(t *testing.T) { + mgr := NewRecoveryManager(nil, "wf-1", false) + skip, state := mgr.ShouldSkipStep(&backend.Step{UUID: "step-1"}) + assert.False(t, skip) + assert.Nil(t, state) + }) +} + +func TestShouldReconnect(t *testing.T) { + mgr := NewRecoveryManager(nil, "wf-1", true) + + assert.False(t, mgr.ShouldReconnect(nil)) + assert.True(t, mgr.ShouldReconnect(&types.RecoveryState{Status: types.RecoveryStatusRunning})) + assert.False(t, mgr.ShouldReconnect(&types.RecoveryState{Status: types.RecoveryStatusPending})) + assert.False(t, mgr.ShouldReconnect(&types.RecoveryState{Status: types.RecoveryStatusSuccess})) + assert.False(t, mgr.ShouldReconnect(&types.RecoveryState{Status: types.RecoveryStatusFailed})) + assert.False(t, mgr.ShouldReconnect(&types.RecoveryState{Status: types.RecoveryStatusSkipped})) +} + +func TestIsRecoverable(t *testing.T) { + t.Run("active context returns false", func(t *testing.T) { + mgr := NewRecoveryManager(nil, "wf-1", true) + assert.False(t, mgr.IsRecoverable(t.Context())) + }) + + t.Run("canceled context with recovery enabled returns true", func(t *testing.T) { + mgr := NewRecoveryManager(nil, "wf-1", true) + ctx, cancel := context.WithCancelCause(t.Context()) + cancel(nil) + assert.True(t, mgr.IsRecoverable(ctx)) + }) + + t.Run("canceled context with recovery disabled returns false", func(t *testing.T) { + mgr := NewRecoveryManager(nil, "wf-1", false) + ctx, cancel := context.WithCancelCause(t.Context()) + cancel(nil) + assert.False(t, mgr.IsRecoverable(ctx)) + }) + + t.Run("canceled context with user cancel returns false", func(t *testing.T) { + mgr := NewRecoveryManager(nil, "wf-1", true) + mgr.SetCanceled() + ctx, cancel := context.WithCancelCause(t.Context()) + cancel(nil) + assert.False(t, mgr.IsRecoverable(ctx)) + }) +} + +func TestMarkStepMethods(t *testing.T) { + t.Run("MarkStepRunning calls client with correct args", func(t *testing.T) { + client := &mockRecoveryClient{} + mgr := NewRecoveryManager(client, "wf-1", true) + step := &backend.Step{UUID: "step-1"} + + err := mgr.MarkStepRunning(t.Context(), step) + require.NoError(t, err) + require.Len(t, client.updateCalls, 1) + assert.Equal(t, "wf-1", client.updateCalls[0].workflowID) + assert.Equal(t, "step-1", client.updateCalls[0].stepUUID) + assert.Equal(t, types.RecoveryStatusRunning, client.updateCalls[0].status) + assert.Equal(t, 0, client.updateCalls[0].exitCode) + }) + + t.Run("MarkStepSuccess calls client with correct args", func(t *testing.T) { + client := &mockRecoveryClient{} + mgr := NewRecoveryManager(client, "wf-1", true) + step := &backend.Step{UUID: "step-2"} + + err := mgr.MarkStepSuccess(t.Context(), step) + require.NoError(t, err) + require.Len(t, client.updateCalls, 1) + assert.Equal(t, types.RecoveryStatusSuccess, client.updateCalls[0].status) + assert.Equal(t, 0, client.updateCalls[0].exitCode) + }) + + t.Run("MarkStepFailed calls client with correct args", func(t *testing.T) { + client := &mockRecoveryClient{} + mgr := NewRecoveryManager(client, "wf-1", true) + step := &backend.Step{UUID: "step-3"} + + err := mgr.MarkStepFailed(t.Context(), step, 137) + require.NoError(t, err) + require.Len(t, client.updateCalls, 1) + assert.Equal(t, types.RecoveryStatusFailed, client.updateCalls[0].status) + assert.Equal(t, 137, client.updateCalls[0].exitCode) + }) + + t.Run("disabled manager returns nil without calling client", func(t *testing.T) { + client := &mockRecoveryClient{} + mgr := NewRecoveryManager(client, "wf-1", false) + step := &backend.Step{UUID: "step-1"} + + require.NoError(t, mgr.MarkStepRunning(t.Context(), step)) + require.NoError(t, mgr.MarkStepSuccess(t.Context(), step)) + require.NoError(t, mgr.MarkStepFailed(t.Context(), step, 1)) + assert.Empty(t, client.updateCalls) + }) +} + +func TestGetStepStateCacheMiss(t *testing.T) { + client := &mockRecoveryClient{initResult: map[string]*types.RecoveryState{ + "uuid-1": {Status: types.RecoveryStatusSuccess}, + }} + mgr := NewRecoveryManager(client, "wf-1", true) + + config := &backend.Config{ + Stages: []*backend.Stage{ + {Steps: []*backend.Step{{UUID: "uuid-1"}}}, + }, + } + err := mgr.InitRecoveryState(t.Context(), config, 300) + require.NoError(t, err) + + // Unknown UUID returns default Pending state + state := mgr.GetStepState(&backend.Step{UUID: "unknown-uuid"}) + assert.Equal(t, types.RecoveryStatusPending, state.Status) +} diff --git a/pipeline/runtime/runtime.go b/pipeline/runtime/runtime.go index 5fb197b4ab5..de250f4e220 100644 --- a/pipeline/runtime/runtime.go +++ b/pipeline/runtime/runtime.go @@ -39,8 +39,9 @@ type Runtime struct { // Cleanup operations should use the runnerCtx passed to Run() ctx context.Context - tracer tracing.Tracer - logger logging.Logger + tracer tracing.Tracer + logger logging.Logger + recoveryManager *RecoveryManager taskUUID string @@ -55,6 +56,7 @@ func New(spec *backend.Config, opts ...Option) *Runtime { r.spec = spec r.ctx = context.Background() r.taskUUID = ulid.Make().String() + r.recoveryManager = NewRecoveryManager(nil, "", false) for _, opts := range opts { opts(r) } diff --git a/pipeline/types/recovery.go b/pipeline/types/recovery.go new file mode 100644 index 00000000000..34436298672 --- /dev/null +++ b/pipeline/types/recovery.go @@ -0,0 +1,32 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +// RecoveryStatus represents the recovery state of a step. +type RecoveryStatus int + +// RecoveryState represents the recovery state for a step. +type RecoveryState struct { + Status RecoveryStatus `json:"status"` + ExitCode int `json:"exit_code"` +} + +const ( + RecoveryStatusPending RecoveryStatus = iota + RecoveryStatusRunning + RecoveryStatusSuccess + RecoveryStatusFailed + RecoveryStatusSkipped +) diff --git a/rpc/mocks/mock_Peer.go b/rpc/mocks/mock_Peer.go index c66458a8cf3..129dce1012a 100644 --- a/rpc/mocks/mock_Peer.go +++ b/rpc/mocks/mock_Peer.go @@ -8,6 +8,7 @@ import ( "context" mock "github.com/stretchr/testify/mock" + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" "go.woodpecker-ci.org/woodpecker/v3/rpc" ) @@ -261,6 +262,86 @@ func (_c *MockPeer_Init_Call) RunAndReturn(run func(c context.Context, workflowI return _c } +// InitWorkflowRecovery provides a mock function for the type MockPeer +func (_mock *MockPeer) InitWorkflowRecovery(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64) (map[string]*types.RecoveryState, error) { + ret := _mock.Called(ctx, workflowID, stepUUIDs, timeoutSeconds) + + if len(ret) == 0 { + panic("no return value specified for InitWorkflowRecovery") + } + + var r0 map[string]*types.RecoveryState + var r1 error + if returnFunc, ok := ret.Get(0).(func(context.Context, string, []string, int64) (map[string]*types.RecoveryState, error)); ok { + return returnFunc(ctx, workflowID, stepUUIDs, timeoutSeconds) + } + if returnFunc, ok := ret.Get(0).(func(context.Context, string, []string, int64) map[string]*types.RecoveryState); ok { + r0 = returnFunc(ctx, workflowID, stepUUIDs, timeoutSeconds) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[string]*types.RecoveryState) + } + } + if returnFunc, ok := ret.Get(1).(func(context.Context, string, []string, int64) error); ok { + r1 = returnFunc(ctx, workflowID, stepUUIDs, timeoutSeconds) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// MockPeer_InitWorkflowRecovery_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'InitWorkflowRecovery' +type MockPeer_InitWorkflowRecovery_Call struct { + *mock.Call +} + +// InitWorkflowRecovery is a helper method to define mock.On call +// - ctx context.Context +// - workflowID string +// - stepUUIDs []string +// - timeoutSeconds int64 +func (_e *MockPeer_Expecter) InitWorkflowRecovery(ctx interface{}, workflowID interface{}, stepUUIDs interface{}, timeoutSeconds interface{}) *MockPeer_InitWorkflowRecovery_Call { + return &MockPeer_InitWorkflowRecovery_Call{Call: _e.mock.On("InitWorkflowRecovery", ctx, workflowID, stepUUIDs, timeoutSeconds)} +} + +func (_c *MockPeer_InitWorkflowRecovery_Call) Run(run func(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64)) *MockPeer_InitWorkflowRecovery_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 context.Context + if args[0] != nil { + arg0 = args[0].(context.Context) + } + var arg1 string + if args[1] != nil { + arg1 = args[1].(string) + } + var arg2 []string + if args[2] != nil { + arg2 = args[2].([]string) + } + var arg3 int64 + if args[3] != nil { + arg3 = args[3].(int64) + } + run( + arg0, + arg1, + arg2, + arg3, + ) + }) + return _c +} + +func (_c *MockPeer_InitWorkflowRecovery_Call) Return(stringToRecoveryState map[string]*types.RecoveryState, err error) *MockPeer_InitWorkflowRecovery_Call { + _c.Call.Return(stringToRecoveryState, err) + return _c +} + +func (_c *MockPeer_InitWorkflowRecovery_Call) RunAndReturn(run func(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64) (map[string]*types.RecoveryState, error)) *MockPeer_InitWorkflowRecovery_Call { + _c.Call.Return(run) + return _c +} + // Next provides a mock function for the type MockPeer func (_mock *MockPeer) Next(c context.Context, f rpc.Filter) (*rpc.Workflow, error) { ret := _mock.Called(c, f) @@ -330,22 +411,22 @@ func (_c *MockPeer_Next_Call) RunAndReturn(run func(c context.Context, f rpc.Fil } // RegisterAgent provides a mock function for the type MockPeer -func (_mock *MockPeer) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (int64, error) { +func (_mock *MockPeer) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (rpc.AgentConfig, error) { ret := _mock.Called(ctx, info) if len(ret) == 0 { panic("no return value specified for RegisterAgent") } - var r0 int64 + var r0 rpc.AgentConfig var r1 error - if returnFunc, ok := ret.Get(0).(func(context.Context, rpc.AgentInfo) (int64, error)); ok { + if returnFunc, ok := ret.Get(0).(func(context.Context, rpc.AgentInfo) (rpc.AgentConfig, error)); ok { return returnFunc(ctx, info) } - if returnFunc, ok := ret.Get(0).(func(context.Context, rpc.AgentInfo) int64); ok { + if returnFunc, ok := ret.Get(0).(func(context.Context, rpc.AgentInfo) rpc.AgentConfig); ok { r0 = returnFunc(ctx, info) } else { - r0 = ret.Get(0).(int64) + r0 = ret.Get(0).(rpc.AgentConfig) } if returnFunc, ok := ret.Get(1).(func(context.Context, rpc.AgentInfo) error); ok { r1 = returnFunc(ctx, info) @@ -385,12 +466,12 @@ func (_c *MockPeer_RegisterAgent_Call) Run(run func(ctx context.Context, info rp return _c } -func (_c *MockPeer_RegisterAgent_Call) Return(n int64, err error) *MockPeer_RegisterAgent_Call { - _c.Call.Return(n, err) +func (_c *MockPeer_RegisterAgent_Call) Return(agentConfig rpc.AgentConfig, err error) *MockPeer_RegisterAgent_Call { + _c.Call.Return(agentConfig, err) return _c } -func (_c *MockPeer_RegisterAgent_Call) RunAndReturn(run func(ctx context.Context, info rpc.AgentInfo) (int64, error)) *MockPeer_RegisterAgent_Call { +func (_c *MockPeer_RegisterAgent_Call) RunAndReturn(run func(ctx context.Context, info rpc.AgentInfo) (rpc.AgentConfig, error)) *MockPeer_RegisterAgent_Call { _c.Call.Return(run) return _c } @@ -560,6 +641,81 @@ func (_c *MockPeer_Update_Call) RunAndReturn(run func(c context.Context, workflo return _c } +// UpdateStepRecoveryState provides a mock function for the type MockPeer +func (_mock *MockPeer) UpdateStepRecoveryState(ctx context.Context, workflowID string, stepUUID string, status types.RecoveryStatus, exitCode int) error { + ret := _mock.Called(ctx, workflowID, stepUUID, status, exitCode) + + if len(ret) == 0 { + panic("no return value specified for UpdateStepRecoveryState") + } + + var r0 error + if returnFunc, ok := ret.Get(0).(func(context.Context, string, string, types.RecoveryStatus, int) error); ok { + r0 = returnFunc(ctx, workflowID, stepUUID, status, exitCode) + } else { + r0 = ret.Error(0) + } + return r0 +} + +// MockPeer_UpdateStepRecoveryState_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateStepRecoveryState' +type MockPeer_UpdateStepRecoveryState_Call struct { + *mock.Call +} + +// UpdateStepRecoveryState is a helper method to define mock.On call +// - ctx context.Context +// - workflowID string +// - stepUUID string +// - status types.RecoveryStatus +// - exitCode int +func (_e *MockPeer_Expecter) UpdateStepRecoveryState(ctx interface{}, workflowID interface{}, stepUUID interface{}, status interface{}, exitCode interface{}) *MockPeer_UpdateStepRecoveryState_Call { + return &MockPeer_UpdateStepRecoveryState_Call{Call: _e.mock.On("UpdateStepRecoveryState", ctx, workflowID, stepUUID, status, exitCode)} +} + +func (_c *MockPeer_UpdateStepRecoveryState_Call) Run(run func(ctx context.Context, workflowID string, stepUUID string, status types.RecoveryStatus, exitCode int)) *MockPeer_UpdateStepRecoveryState_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 context.Context + if args[0] != nil { + arg0 = args[0].(context.Context) + } + var arg1 string + if args[1] != nil { + arg1 = args[1].(string) + } + var arg2 string + if args[2] != nil { + arg2 = args[2].(string) + } + var arg3 types.RecoveryStatus + if args[3] != nil { + arg3 = args[3].(types.RecoveryStatus) + } + var arg4 int + if args[4] != nil { + arg4 = args[4].(int) + } + run( + arg0, + arg1, + arg2, + arg3, + arg4, + ) + }) + return _c +} + +func (_c *MockPeer_UpdateStepRecoveryState_Call) Return(err error) *MockPeer_UpdateStepRecoveryState_Call { + _c.Call.Return(err) + return _c +} + +func (_c *MockPeer_UpdateStepRecoveryState_Call) RunAndReturn(run func(ctx context.Context, workflowID string, stepUUID string, status types.RecoveryStatus, exitCode int) error) *MockPeer_UpdateStepRecoveryState_Call { + _c.Call.Return(run) + return _c +} + // Version provides a mock function for the type MockPeer func (_mock *MockPeer) Version(c context.Context) (*rpc.Version, error) { ret := _mock.Called(c) diff --git a/rpc/peer.go b/rpc/peer.go index e1568f692f5..10446aede68 100644 --- a/rpc/peer.go +++ b/rpc/peer.go @@ -15,7 +15,11 @@ package rpc -import "context" +import ( + "context" + + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" +) // Peer defines the bidirectional communication interface between Woodpecker agents and servers. // @@ -258,7 +262,7 @@ type Peer interface { // Returns: // - agentID: Unique identifier for this agent (use in subsequent calls) // - error: If registration fails - RegisterAgent(ctx context.Context, info AgentInfo) (int64, error) + RegisterAgent(ctx context.Context, info AgentInfo) (AgentConfig, error) // UnregisterAgent removes this agent from the server's registry. // @@ -302,4 +306,14 @@ type Peer interface { // - nil on success // - error if communication fails ReportHealth(c context.Context) error + + // InitWorkflowRecovery initializes recovery state for all steps in a workflow + // and returns current states. This creates server-side state tracking for each + // step, enabling recovery after agent restart by knowing which steps completed, + // failed, or were running. + InitWorkflowRecovery(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64) (map[string]*types.RecoveryState, error) + + // UpdateStepRecoveryState updates the recovery state for a specific step. + // Called as steps transition through running, success, failed states. + UpdateStepRecoveryState(ctx context.Context, workflowID, stepUUID string, status types.RecoveryStatus, exitCode int) error } diff --git a/rpc/proto/version.go b/rpc/proto/version.go index ee63ab7cd21..a0ececc67d7 100644 --- a/rpc/proto/version.go +++ b/rpc/proto/version.go @@ -16,4 +16,4 @@ package proto // Version is the version of the woodpecker.proto file, // IMPORTANT: increased by 1 each time it get changed. -const Version int32 = 15 +const Version int32 = 16 diff --git a/rpc/proto/woodpecker.pb.go b/rpc/proto/woodpecker.pb.go index f5126303d94..e9b1948244a 100644 --- a/rpc/proto/woodpecker.pb.go +++ b/rpc/proto/woodpecker.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.36.11 -// protoc v6.33.1 +// protoc v6.33.4 // source: woodpecker.proto package proto @@ -36,6 +36,61 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) +type RecoveryStatus int32 + +const ( + RecoveryStatus_RECOVERY_PENDING RecoveryStatus = 0 + RecoveryStatus_RECOVERY_RUNNING RecoveryStatus = 1 + RecoveryStatus_RECOVERY_SUCCESS RecoveryStatus = 2 + RecoveryStatus_RECOVERY_FAILED RecoveryStatus = 3 + RecoveryStatus_RECOVERY_SKIPPED RecoveryStatus = 4 +) + +// Enum value maps for RecoveryStatus. +var ( + RecoveryStatus_name = map[int32]string{ + 0: "RECOVERY_PENDING", + 1: "RECOVERY_RUNNING", + 2: "RECOVERY_SUCCESS", + 3: "RECOVERY_FAILED", + 4: "RECOVERY_SKIPPED", + } + RecoveryStatus_value = map[string]int32{ + "RECOVERY_PENDING": 0, + "RECOVERY_RUNNING": 1, + "RECOVERY_SUCCESS": 2, + "RECOVERY_FAILED": 3, + "RECOVERY_SKIPPED": 4, + } +) + +func (x RecoveryStatus) Enum() *RecoveryStatus { + p := new(RecoveryStatus) + *p = x + return p +} + +func (x RecoveryStatus) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (RecoveryStatus) Descriptor() protoreflect.EnumDescriptor { + return file_woodpecker_proto_enumTypes[0].Descriptor() +} + +func (RecoveryStatus) Type() protoreflect.EnumType { + return &file_woodpecker_proto_enumTypes[0] +} + +func (x RecoveryStatus) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use RecoveryStatus.Descriptor instead. +func (RecoveryStatus) EnumDescriptor() ([]byte, []int) { + return file_woodpecker_proto_rawDescGZIP(), []int{0} +} + type StepState struct { state protoimpl.MessageState `protogen:"open.v1"` StepUuid string `protobuf:"bytes,1,opt,name=step_uuid,json=stepUuid,proto3" json:"step_uuid,omitempty"` @@ -376,6 +431,58 @@ func (x *Workflow) GetPayload() []byte { return nil } +type AgentConfig struct { + state protoimpl.MessageState `protogen:"open.v1"` + AgentId int64 `protobuf:"varint,1,opt,name=agent_id,json=agentId,proto3" json:"agent_id,omitempty"` + RecoveryEnabled bool `protobuf:"varint,2,opt,name=recovery_enabled,json=recoveryEnabled,proto3" json:"recovery_enabled,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AgentConfig) Reset() { + *x = AgentConfig{} + mi := &file_woodpecker_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AgentConfig) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AgentConfig) ProtoMessage() {} + +func (x *AgentConfig) ProtoReflect() protoreflect.Message { + mi := &file_woodpecker_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AgentConfig.ProtoReflect.Descriptor instead. +func (*AgentConfig) Descriptor() ([]byte, []int) { + return file_woodpecker_proto_rawDescGZIP(), []int{5} +} + +func (x *AgentConfig) GetAgentId() int64 { + if x != nil { + return x.AgentId + } + return 0 +} + +func (x *AgentConfig) GetRecoveryEnabled() bool { + if x != nil { + return x.RecoveryEnabled + } + return false +} + type NextRequest struct { state protoimpl.MessageState `protogen:"open.v1"` Filter *Filter `protobuf:"bytes,1,opt,name=filter,proto3" json:"filter,omitempty"` @@ -385,7 +492,7 @@ type NextRequest struct { func (x *NextRequest) Reset() { *x = NextRequest{} - mi := &file_woodpecker_proto_msgTypes[5] + mi := &file_woodpecker_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -397,7 +504,7 @@ func (x *NextRequest) String() string { func (*NextRequest) ProtoMessage() {} func (x *NextRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[5] + mi := &file_woodpecker_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -410,7 +517,7 @@ func (x *NextRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use NextRequest.ProtoReflect.Descriptor instead. func (*NextRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{5} + return file_woodpecker_proto_rawDescGZIP(), []int{6} } func (x *NextRequest) GetFilter() *Filter { @@ -430,7 +537,7 @@ type InitRequest struct { func (x *InitRequest) Reset() { *x = InitRequest{} - mi := &file_woodpecker_proto_msgTypes[6] + mi := &file_woodpecker_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -442,7 +549,7 @@ func (x *InitRequest) String() string { func (*InitRequest) ProtoMessage() {} func (x *InitRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[6] + mi := &file_woodpecker_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -455,7 +562,7 @@ func (x *InitRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use InitRequest.ProtoReflect.Descriptor instead. func (*InitRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{6} + return file_woodpecker_proto_rawDescGZIP(), []int{7} } func (x *InitRequest) GetId() string { @@ -481,7 +588,7 @@ type WaitRequest struct { func (x *WaitRequest) Reset() { *x = WaitRequest{} - mi := &file_woodpecker_proto_msgTypes[7] + mi := &file_woodpecker_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -493,7 +600,7 @@ func (x *WaitRequest) String() string { func (*WaitRequest) ProtoMessage() {} func (x *WaitRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[7] + mi := &file_woodpecker_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -506,7 +613,7 @@ func (x *WaitRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use WaitRequest.ProtoReflect.Descriptor instead. func (*WaitRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{7} + return file_woodpecker_proto_rawDescGZIP(), []int{8} } func (x *WaitRequest) GetId() string { @@ -526,7 +633,7 @@ type DoneRequest struct { func (x *DoneRequest) Reset() { *x = DoneRequest{} - mi := &file_woodpecker_proto_msgTypes[8] + mi := &file_woodpecker_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -538,7 +645,7 @@ func (x *DoneRequest) String() string { func (*DoneRequest) ProtoMessage() {} func (x *DoneRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[8] + mi := &file_woodpecker_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -551,7 +658,7 @@ func (x *DoneRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use DoneRequest.ProtoReflect.Descriptor instead. func (*DoneRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{8} + return file_woodpecker_proto_rawDescGZIP(), []int{9} } func (x *DoneRequest) GetId() string { @@ -577,7 +684,7 @@ type ExtendRequest struct { func (x *ExtendRequest) Reset() { *x = ExtendRequest{} - mi := &file_woodpecker_proto_msgTypes[9] + mi := &file_woodpecker_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -589,7 +696,7 @@ func (x *ExtendRequest) String() string { func (*ExtendRequest) ProtoMessage() {} func (x *ExtendRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[9] + mi := &file_woodpecker_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -602,7 +709,7 @@ func (x *ExtendRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ExtendRequest.ProtoReflect.Descriptor instead. func (*ExtendRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{9} + return file_woodpecker_proto_rawDescGZIP(), []int{10} } func (x *ExtendRequest) GetId() string { @@ -622,7 +729,7 @@ type UpdateRequest struct { func (x *UpdateRequest) Reset() { *x = UpdateRequest{} - mi := &file_woodpecker_proto_msgTypes[10] + mi := &file_woodpecker_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -634,7 +741,7 @@ func (x *UpdateRequest) String() string { func (*UpdateRequest) ProtoMessage() {} func (x *UpdateRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[10] + mi := &file_woodpecker_proto_msgTypes[11] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -647,7 +754,7 @@ func (x *UpdateRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use UpdateRequest.ProtoReflect.Descriptor instead. func (*UpdateRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{10} + return file_woodpecker_proto_rawDescGZIP(), []int{11} } func (x *UpdateRequest) GetId() string { @@ -673,7 +780,7 @@ type LogRequest struct { func (x *LogRequest) Reset() { *x = LogRequest{} - mi := &file_woodpecker_proto_msgTypes[11] + mi := &file_woodpecker_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -685,7 +792,7 @@ func (x *LogRequest) String() string { func (*LogRequest) ProtoMessage() {} func (x *LogRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[11] + mi := &file_woodpecker_proto_msgTypes[12] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -698,7 +805,7 @@ func (x *LogRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use LogRequest.ProtoReflect.Descriptor instead. func (*LogRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{11} + return file_woodpecker_proto_rawDescGZIP(), []int{12} } func (x *LogRequest) GetLogEntries() []*LogEntry { @@ -716,7 +823,7 @@ type Empty struct { func (x *Empty) Reset() { *x = Empty{} - mi := &file_woodpecker_proto_msgTypes[12] + mi := &file_woodpecker_proto_msgTypes[13] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -728,7 +835,7 @@ func (x *Empty) String() string { func (*Empty) ProtoMessage() {} func (x *Empty) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[12] + mi := &file_woodpecker_proto_msgTypes[13] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -741,7 +848,7 @@ func (x *Empty) ProtoReflect() protoreflect.Message { // Deprecated: Use Empty.ProtoReflect.Descriptor instead. func (*Empty) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{12} + return file_woodpecker_proto_rawDescGZIP(), []int{13} } type ReportHealthRequest struct { @@ -753,7 +860,7 @@ type ReportHealthRequest struct { func (x *ReportHealthRequest) Reset() { *x = ReportHealthRequest{} - mi := &file_woodpecker_proto_msgTypes[13] + mi := &file_woodpecker_proto_msgTypes[14] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -765,7 +872,7 @@ func (x *ReportHealthRequest) String() string { func (*ReportHealthRequest) ProtoMessage() {} func (x *ReportHealthRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[13] + mi := &file_woodpecker_proto_msgTypes[14] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -778,7 +885,7 @@ func (x *ReportHealthRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ReportHealthRequest.ProtoReflect.Descriptor instead. func (*ReportHealthRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{13} + return file_woodpecker_proto_rawDescGZIP(), []int{14} } func (x *ReportHealthRequest) GetStatus() string { @@ -801,7 +908,7 @@ type AgentInfo struct { func (x *AgentInfo) Reset() { *x = AgentInfo{} - mi := &file_woodpecker_proto_msgTypes[14] + mi := &file_woodpecker_proto_msgTypes[15] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -813,7 +920,7 @@ func (x *AgentInfo) String() string { func (*AgentInfo) ProtoMessage() {} func (x *AgentInfo) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[14] + mi := &file_woodpecker_proto_msgTypes[15] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -826,7 +933,7 @@ func (x *AgentInfo) ProtoReflect() protoreflect.Message { // Deprecated: Use AgentInfo.ProtoReflect.Descriptor instead. func (*AgentInfo) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{14} + return file_woodpecker_proto_rawDescGZIP(), []int{15} } func (x *AgentInfo) GetPlatform() string { @@ -873,7 +980,7 @@ type RegisterAgentRequest struct { func (x *RegisterAgentRequest) Reset() { *x = RegisterAgentRequest{} - mi := &file_woodpecker_proto_msgTypes[15] + mi := &file_woodpecker_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -885,7 +992,7 @@ func (x *RegisterAgentRequest) String() string { func (*RegisterAgentRequest) ProtoMessage() {} func (x *RegisterAgentRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[15] + mi := &file_woodpecker_proto_msgTypes[16] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -898,7 +1005,7 @@ func (x *RegisterAgentRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use RegisterAgentRequest.ProtoReflect.Descriptor instead. func (*RegisterAgentRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{15} + return file_woodpecker_proto_rawDescGZIP(), []int{16} } func (x *RegisterAgentRequest) GetInfo() *AgentInfo { @@ -918,7 +1025,7 @@ type VersionResponse struct { func (x *VersionResponse) Reset() { *x = VersionResponse{} - mi := &file_woodpecker_proto_msgTypes[16] + mi := &file_woodpecker_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -930,7 +1037,7 @@ func (x *VersionResponse) String() string { func (*VersionResponse) ProtoMessage() {} func (x *VersionResponse) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[16] + mi := &file_woodpecker_proto_msgTypes[17] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -943,7 +1050,7 @@ func (x *VersionResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use VersionResponse.ProtoReflect.Descriptor instead. func (*VersionResponse) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{16} + return file_woodpecker_proto_rawDescGZIP(), []int{17} } func (x *VersionResponse) GetGrpcVersion() int32 { @@ -969,7 +1076,7 @@ type NextResponse struct { func (x *NextResponse) Reset() { *x = NextResponse{} - mi := &file_woodpecker_proto_msgTypes[17] + mi := &file_woodpecker_proto_msgTypes[18] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -981,7 +1088,7 @@ func (x *NextResponse) String() string { func (*NextResponse) ProtoMessage() {} func (x *NextResponse) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[17] + mi := &file_woodpecker_proto_msgTypes[18] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -994,7 +1101,7 @@ func (x *NextResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use NextResponse.ProtoReflect.Descriptor instead. func (*NextResponse) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{17} + return file_woodpecker_proto_rawDescGZIP(), []int{18} } func (x *NextResponse) GetWorkflow() *Workflow { @@ -1006,14 +1113,14 @@ func (x *NextResponse) GetWorkflow() *Workflow { type RegisterAgentResponse struct { state protoimpl.MessageState `protogen:"open.v1"` - AgentId int64 `protobuf:"varint,1,opt,name=agent_id,json=agentId,proto3" json:"agent_id,omitempty"` + Config *AgentConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *RegisterAgentResponse) Reset() { *x = RegisterAgentResponse{} - mi := &file_woodpecker_proto_msgTypes[18] + mi := &file_woodpecker_proto_msgTypes[19] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1025,7 +1132,7 @@ func (x *RegisterAgentResponse) String() string { func (*RegisterAgentResponse) ProtoMessage() {} func (x *RegisterAgentResponse) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[18] + mi := &file_woodpecker_proto_msgTypes[19] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1038,14 +1145,14 @@ func (x *RegisterAgentResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RegisterAgentResponse.ProtoReflect.Descriptor instead. func (*RegisterAgentResponse) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{18} + return file_woodpecker_proto_rawDescGZIP(), []int{19} } -func (x *RegisterAgentResponse) GetAgentId() int64 { +func (x *RegisterAgentResponse) GetConfig() *AgentConfig { if x != nil { - return x.AgentId + return x.Config } - return 0 + return nil } type WaitResponse struct { @@ -1057,7 +1164,7 @@ type WaitResponse struct { func (x *WaitResponse) Reset() { *x = WaitResponse{} - mi := &file_woodpecker_proto_msgTypes[19] + mi := &file_woodpecker_proto_msgTypes[20] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1069,7 +1176,7 @@ func (x *WaitResponse) String() string { func (*WaitResponse) ProtoMessage() {} func (x *WaitResponse) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[19] + mi := &file_woodpecker_proto_msgTypes[20] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1082,7 +1189,7 @@ func (x *WaitResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use WaitResponse.ProtoReflect.Descriptor instead. func (*WaitResponse) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{19} + return file_woodpecker_proto_rawDescGZIP(), []int{20} } func (x *WaitResponse) GetCanceled() bool { @@ -1102,7 +1209,7 @@ type AuthRequest struct { func (x *AuthRequest) Reset() { *x = AuthRequest{} - mi := &file_woodpecker_proto_msgTypes[20] + mi := &file_woodpecker_proto_msgTypes[21] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1114,7 +1221,7 @@ func (x *AuthRequest) String() string { func (*AuthRequest) ProtoMessage() {} func (x *AuthRequest) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[20] + mi := &file_woodpecker_proto_msgTypes[21] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1127,7 +1234,7 @@ func (x *AuthRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use AuthRequest.ProtoReflect.Descriptor instead. func (*AuthRequest) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{20} + return file_woodpecker_proto_rawDescGZIP(), []int{21} } func (x *AuthRequest) GetAgentToken() string { @@ -1155,7 +1262,7 @@ type AuthResponse struct { func (x *AuthResponse) Reset() { *x = AuthResponse{} - mi := &file_woodpecker_proto_msgTypes[21] + mi := &file_woodpecker_proto_msgTypes[22] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1167,7 +1274,7 @@ func (x *AuthResponse) String() string { func (*AuthResponse) ProtoMessage() {} func (x *AuthResponse) ProtoReflect() protoreflect.Message { - mi := &file_woodpecker_proto_msgTypes[21] + mi := &file_woodpecker_proto_msgTypes[22] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1180,7 +1287,7 @@ func (x *AuthResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use AuthResponse.ProtoReflect.Descriptor instead. func (*AuthResponse) Descriptor() ([]byte, []int) { - return file_woodpecker_proto_rawDescGZIP(), []int{21} + return file_woodpecker_proto_rawDescGZIP(), []int{22} } func (x *AuthResponse) GetStatus() string { @@ -1204,6 +1311,238 @@ func (x *AuthResponse) GetAccessToken() string { return "" } +type InitWorkflowRecoveryRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + WorkflowId string `protobuf:"bytes,1,opt,name=workflow_id,json=workflowId,proto3" json:"workflow_id,omitempty"` + StepUuids []string `protobuf:"bytes,2,rep,name=step_uuids,json=stepUuids,proto3" json:"step_uuids,omitempty"` + TimeoutSeconds int64 `protobuf:"varint,3,opt,name=timeout_seconds,json=timeoutSeconds,proto3" json:"timeout_seconds,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *InitWorkflowRecoveryRequest) Reset() { + *x = InitWorkflowRecoveryRequest{} + mi := &file_woodpecker_proto_msgTypes[23] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *InitWorkflowRecoveryRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*InitWorkflowRecoveryRequest) ProtoMessage() {} + +func (x *InitWorkflowRecoveryRequest) ProtoReflect() protoreflect.Message { + mi := &file_woodpecker_proto_msgTypes[23] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use InitWorkflowRecoveryRequest.ProtoReflect.Descriptor instead. +func (*InitWorkflowRecoveryRequest) Descriptor() ([]byte, []int) { + return file_woodpecker_proto_rawDescGZIP(), []int{23} +} + +func (x *InitWorkflowRecoveryRequest) GetWorkflowId() string { + if x != nil { + return x.WorkflowId + } + return "" +} + +func (x *InitWorkflowRecoveryRequest) GetStepUuids() []string { + if x != nil { + return x.StepUuids + } + return nil +} + +func (x *InitWorkflowRecoveryRequest) GetTimeoutSeconds() int64 { + if x != nil { + return x.TimeoutSeconds + } + return 0 +} + +type StepRecoveryState struct { + state protoimpl.MessageState `protogen:"open.v1"` + StepUuid string `protobuf:"bytes,1,opt,name=step_uuid,json=stepUuid,proto3" json:"step_uuid,omitempty"` + Status RecoveryStatus `protobuf:"varint,2,opt,name=status,proto3,enum=proto.RecoveryStatus" json:"status,omitempty"` + ExitCode int32 `protobuf:"varint,3,opt,name=exit_code,json=exitCode,proto3" json:"exit_code,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StepRecoveryState) Reset() { + *x = StepRecoveryState{} + mi := &file_woodpecker_proto_msgTypes[24] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StepRecoveryState) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StepRecoveryState) ProtoMessage() {} + +func (x *StepRecoveryState) ProtoReflect() protoreflect.Message { + mi := &file_woodpecker_proto_msgTypes[24] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StepRecoveryState.ProtoReflect.Descriptor instead. +func (*StepRecoveryState) Descriptor() ([]byte, []int) { + return file_woodpecker_proto_rawDescGZIP(), []int{24} +} + +func (x *StepRecoveryState) GetStepUuid() string { + if x != nil { + return x.StepUuid + } + return "" +} + +func (x *StepRecoveryState) GetStatus() RecoveryStatus { + if x != nil { + return x.Status + } + return RecoveryStatus_RECOVERY_PENDING +} + +func (x *StepRecoveryState) GetExitCode() int32 { + if x != nil { + return x.ExitCode + } + return 0 +} + +type InitWorkflowRecoveryResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + States []*StepRecoveryState `protobuf:"bytes,1,rep,name=states,proto3" json:"states,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *InitWorkflowRecoveryResponse) Reset() { + *x = InitWorkflowRecoveryResponse{} + mi := &file_woodpecker_proto_msgTypes[25] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *InitWorkflowRecoveryResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*InitWorkflowRecoveryResponse) ProtoMessage() {} + +func (x *InitWorkflowRecoveryResponse) ProtoReflect() protoreflect.Message { + mi := &file_woodpecker_proto_msgTypes[25] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use InitWorkflowRecoveryResponse.ProtoReflect.Descriptor instead. +func (*InitWorkflowRecoveryResponse) Descriptor() ([]byte, []int) { + return file_woodpecker_proto_rawDescGZIP(), []int{25} +} + +func (x *InitWorkflowRecoveryResponse) GetStates() []*StepRecoveryState { + if x != nil { + return x.States + } + return nil +} + +type UpdateStepRecoveryStateRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + WorkflowId string `protobuf:"bytes,1,opt,name=workflow_id,json=workflowId,proto3" json:"workflow_id,omitempty"` + StepUuid string `protobuf:"bytes,2,opt,name=step_uuid,json=stepUuid,proto3" json:"step_uuid,omitempty"` + Status RecoveryStatus `protobuf:"varint,3,opt,name=status,proto3,enum=proto.RecoveryStatus" json:"status,omitempty"` + ExitCode int32 `protobuf:"varint,4,opt,name=exit_code,json=exitCode,proto3" json:"exit_code,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UpdateStepRecoveryStateRequest) Reset() { + *x = UpdateStepRecoveryStateRequest{} + mi := &file_woodpecker_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UpdateStepRecoveryStateRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UpdateStepRecoveryStateRequest) ProtoMessage() {} + +func (x *UpdateStepRecoveryStateRequest) ProtoReflect() protoreflect.Message { + mi := &file_woodpecker_proto_msgTypes[26] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UpdateStepRecoveryStateRequest.ProtoReflect.Descriptor instead. +func (*UpdateStepRecoveryStateRequest) Descriptor() ([]byte, []int) { + return file_woodpecker_proto_rawDescGZIP(), []int{26} +} + +func (x *UpdateStepRecoveryStateRequest) GetWorkflowId() string { + if x != nil { + return x.WorkflowId + } + return "" +} + +func (x *UpdateStepRecoveryStateRequest) GetStepUuid() string { + if x != nil { + return x.StepUuid + } + return "" +} + +func (x *UpdateStepRecoveryStateRequest) GetStatus() RecoveryStatus { + if x != nil { + return x.Status + } + return RecoveryStatus_RECOVERY_PENDING +} + +func (x *UpdateStepRecoveryStateRequest) GetExitCode() int32 { + if x != nil { + return x.ExitCode + } + return 0 +} + var File_woodpecker_proto protoreflect.FileDescriptor const file_woodpecker_proto_rawDesc = "" + @@ -1236,7 +1575,10 @@ const file_woodpecker_proto_rawDesc = "" + "\bWorkflow\x12\x0e\n" + "\x02id\x18\x01 \x01(\tR\x02id\x12\x18\n" + "\atimeout\x18\x02 \x01(\x03R\atimeout\x12\x18\n" + - "\apayload\x18\x03 \x01(\fR\apayload\"4\n" + + "\apayload\x18\x03 \x01(\fR\apayload\"S\n" + + "\vAgentConfig\x12\x19\n" + + "\bagent_id\x18\x01 \x01(\x03R\aagentId\x12)\n" + + "\x10recovery_enabled\x18\x02 \x01(\bR\x0frecoveryEnabled\"4\n" + "\vNextRequest\x12%\n" + "\x06filter\x18\x01 \x01(\v2\r.proto.FilterR\x06filter\"I\n" + "\vInitRequest\x12\x0e\n" + @@ -1275,9 +1617,9 @@ const file_woodpecker_proto_rawDesc = "" + "\fgrpc_version\x18\x01 \x01(\x05R\vgrpcVersion\x12%\n" + "\x0eserver_version\x18\x02 \x01(\tR\rserverVersion\";\n" + "\fNextResponse\x12+\n" + - "\bworkflow\x18\x01 \x01(\v2\x0f.proto.WorkflowR\bworkflow\"2\n" + - "\x15RegisterAgentResponse\x12\x19\n" + - "\bagent_id\x18\x01 \x01(\x03R\aagentId\"*\n" + + "\bworkflow\x18\x01 \x01(\v2\x0f.proto.WorkflowR\bworkflow\"C\n" + + "\x15RegisterAgentResponse\x12*\n" + + "\x06config\x18\x01 \x01(\v2\x12.proto.AgentConfigR\x06config\"*\n" + "\fWaitResponse\x12\x1a\n" + "\bcanceled\x18\x01 \x01(\bR\bcanceled\"I\n" + "\vAuthRequest\x12\x1f\n" + @@ -1287,7 +1629,31 @@ const file_woodpecker_proto_rawDesc = "" + "\fAuthResponse\x12\x16\n" + "\x06status\x18\x01 \x01(\tR\x06status\x12\x19\n" + "\bagent_id\x18\x02 \x01(\x03R\aagentId\x12!\n" + - "\faccess_token\x18\x03 \x01(\tR\vaccessToken2\xc2\x04\n" + + "\faccess_token\x18\x03 \x01(\tR\vaccessToken\"\x86\x01\n" + + "\x1bInitWorkflowRecoveryRequest\x12\x1f\n" + + "\vworkflow_id\x18\x01 \x01(\tR\n" + + "workflowId\x12\x1d\n" + + "\n" + + "step_uuids\x18\x02 \x03(\tR\tstepUuids\x12'\n" + + "\x0ftimeout_seconds\x18\x03 \x01(\x03R\x0etimeoutSeconds\"|\n" + + "\x11StepRecoveryState\x12\x1b\n" + + "\tstep_uuid\x18\x01 \x01(\tR\bstepUuid\x12-\n" + + "\x06status\x18\x02 \x01(\x0e2\x15.proto.RecoveryStatusR\x06status\x12\x1b\n" + + "\texit_code\x18\x03 \x01(\x05R\bexitCode\"P\n" + + "\x1cInitWorkflowRecoveryResponse\x120\n" + + "\x06states\x18\x01 \x03(\v2\x18.proto.StepRecoveryStateR\x06states\"\xaa\x01\n" + + "\x1eUpdateStepRecoveryStateRequest\x12\x1f\n" + + "\vworkflow_id\x18\x01 \x01(\tR\n" + + "workflowId\x12\x1b\n" + + "\tstep_uuid\x18\x02 \x01(\tR\bstepUuid\x12-\n" + + "\x06status\x18\x03 \x01(\x0e2\x15.proto.RecoveryStatusR\x06status\x12\x1b\n" + + "\texit_code\x18\x04 \x01(\x05R\bexitCode*}\n" + + "\x0eRecoveryStatus\x12\x14\n" + + "\x10RECOVERY_PENDING\x10\x00\x12\x14\n" + + "\x10RECOVERY_RUNNING\x10\x01\x12\x14\n" + + "\x10RECOVERY_SUCCESS\x10\x02\x12\x13\n" + + "\x0fRECOVERY_FAILED\x10\x03\x12\x14\n" + + "\x10RECOVERY_SKIPPED\x10\x042\xf7\x05\n" + "\n" + "Woodpecker\x121\n" + "\aVersion\x12\f.proto.Empty\x1a\x16.proto.VersionResponse\"\x00\x121\n" + @@ -1300,7 +1666,9 @@ const file_woodpecker_proto_rawDesc = "" + "\x03Log\x12\x11.proto.LogRequest\x1a\f.proto.Empty\"\x00\x12L\n" + "\rRegisterAgent\x12\x1b.proto.RegisterAgentRequest\x1a\x1c.proto.RegisterAgentResponse\"\x00\x12/\n" + "\x0fUnregisterAgent\x12\f.proto.Empty\x1a\f.proto.Empty\"\x00\x12:\n" + - "\fReportHealth\x12\x1a.proto.ReportHealthRequest\x1a\f.proto.Empty\"\x002C\n" + + "\fReportHealth\x12\x1a.proto.ReportHealthRequest\x1a\f.proto.Empty\"\x00\x12a\n" + + "\x14InitWorkflowRecovery\x12\".proto.InitWorkflowRecoveryRequest\x1a#.proto.InitWorkflowRecoveryResponse\"\x00\x12P\n" + + "\x17UpdateStepRecoveryState\x12%.proto.UpdateStepRecoveryStateRequest\x1a\f.proto.Empty\"\x002C\n" + "\x0eWoodpeckerAuth\x121\n" + "\x04Auth\x12\x12.proto.AuthRequest\x1a\x13.proto.AuthResponse\"\x00B.Z,go.woodpecker-ci.org/woodpecker/v3/rpc/protob\x06proto3" @@ -1316,72 +1684,87 @@ func file_woodpecker_proto_rawDescGZIP() []byte { return file_woodpecker_proto_rawDescData } -var file_woodpecker_proto_msgTypes = make([]protoimpl.MessageInfo, 24) +var file_woodpecker_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_woodpecker_proto_msgTypes = make([]protoimpl.MessageInfo, 29) var file_woodpecker_proto_goTypes = []any{ - (*StepState)(nil), // 0: proto.StepState - (*WorkflowState)(nil), // 1: proto.WorkflowState - (*LogEntry)(nil), // 2: proto.LogEntry - (*Filter)(nil), // 3: proto.Filter - (*Workflow)(nil), // 4: proto.Workflow - (*NextRequest)(nil), // 5: proto.NextRequest - (*InitRequest)(nil), // 6: proto.InitRequest - (*WaitRequest)(nil), // 7: proto.WaitRequest - (*DoneRequest)(nil), // 8: proto.DoneRequest - (*ExtendRequest)(nil), // 9: proto.ExtendRequest - (*UpdateRequest)(nil), // 10: proto.UpdateRequest - (*LogRequest)(nil), // 11: proto.LogRequest - (*Empty)(nil), // 12: proto.Empty - (*ReportHealthRequest)(nil), // 13: proto.ReportHealthRequest - (*AgentInfo)(nil), // 14: proto.AgentInfo - (*RegisterAgentRequest)(nil), // 15: proto.RegisterAgentRequest - (*VersionResponse)(nil), // 16: proto.VersionResponse - (*NextResponse)(nil), // 17: proto.NextResponse - (*RegisterAgentResponse)(nil), // 18: proto.RegisterAgentResponse - (*WaitResponse)(nil), // 19: proto.WaitResponse - (*AuthRequest)(nil), // 20: proto.AuthRequest - (*AuthResponse)(nil), // 21: proto.AuthResponse - nil, // 22: proto.Filter.LabelsEntry - nil, // 23: proto.AgentInfo.CustomLabelsEntry + (RecoveryStatus)(0), // 0: proto.RecoveryStatus + (*StepState)(nil), // 1: proto.StepState + (*WorkflowState)(nil), // 2: proto.WorkflowState + (*LogEntry)(nil), // 3: proto.LogEntry + (*Filter)(nil), // 4: proto.Filter + (*Workflow)(nil), // 5: proto.Workflow + (*AgentConfig)(nil), // 6: proto.AgentConfig + (*NextRequest)(nil), // 7: proto.NextRequest + (*InitRequest)(nil), // 8: proto.InitRequest + (*WaitRequest)(nil), // 9: proto.WaitRequest + (*DoneRequest)(nil), // 10: proto.DoneRequest + (*ExtendRequest)(nil), // 11: proto.ExtendRequest + (*UpdateRequest)(nil), // 12: proto.UpdateRequest + (*LogRequest)(nil), // 13: proto.LogRequest + (*Empty)(nil), // 14: proto.Empty + (*ReportHealthRequest)(nil), // 15: proto.ReportHealthRequest + (*AgentInfo)(nil), // 16: proto.AgentInfo + (*RegisterAgentRequest)(nil), // 17: proto.RegisterAgentRequest + (*VersionResponse)(nil), // 18: proto.VersionResponse + (*NextResponse)(nil), // 19: proto.NextResponse + (*RegisterAgentResponse)(nil), // 20: proto.RegisterAgentResponse + (*WaitResponse)(nil), // 21: proto.WaitResponse + (*AuthRequest)(nil), // 22: proto.AuthRequest + (*AuthResponse)(nil), // 23: proto.AuthResponse + (*InitWorkflowRecoveryRequest)(nil), // 24: proto.InitWorkflowRecoveryRequest + (*StepRecoveryState)(nil), // 25: proto.StepRecoveryState + (*InitWorkflowRecoveryResponse)(nil), // 26: proto.InitWorkflowRecoveryResponse + (*UpdateStepRecoveryStateRequest)(nil), // 27: proto.UpdateStepRecoveryStateRequest + nil, // 28: proto.Filter.LabelsEntry + nil, // 29: proto.AgentInfo.CustomLabelsEntry } var file_woodpecker_proto_depIdxs = []int32{ - 22, // 0: proto.Filter.labels:type_name -> proto.Filter.LabelsEntry - 3, // 1: proto.NextRequest.filter:type_name -> proto.Filter - 1, // 2: proto.InitRequest.state:type_name -> proto.WorkflowState - 1, // 3: proto.DoneRequest.state:type_name -> proto.WorkflowState - 0, // 4: proto.UpdateRequest.state:type_name -> proto.StepState - 2, // 5: proto.LogRequest.logEntries:type_name -> proto.LogEntry - 23, // 6: proto.AgentInfo.customLabels:type_name -> proto.AgentInfo.CustomLabelsEntry - 14, // 7: proto.RegisterAgentRequest.info:type_name -> proto.AgentInfo - 4, // 8: proto.NextResponse.workflow:type_name -> proto.Workflow - 12, // 9: proto.Woodpecker.Version:input_type -> proto.Empty - 5, // 10: proto.Woodpecker.Next:input_type -> proto.NextRequest - 6, // 11: proto.Woodpecker.Init:input_type -> proto.InitRequest - 7, // 12: proto.Woodpecker.Wait:input_type -> proto.WaitRequest - 8, // 13: proto.Woodpecker.Done:input_type -> proto.DoneRequest - 9, // 14: proto.Woodpecker.Extend:input_type -> proto.ExtendRequest - 10, // 15: proto.Woodpecker.Update:input_type -> proto.UpdateRequest - 11, // 16: proto.Woodpecker.Log:input_type -> proto.LogRequest - 15, // 17: proto.Woodpecker.RegisterAgent:input_type -> proto.RegisterAgentRequest - 12, // 18: proto.Woodpecker.UnregisterAgent:input_type -> proto.Empty - 13, // 19: proto.Woodpecker.ReportHealth:input_type -> proto.ReportHealthRequest - 20, // 20: proto.WoodpeckerAuth.Auth:input_type -> proto.AuthRequest - 16, // 21: proto.Woodpecker.Version:output_type -> proto.VersionResponse - 17, // 22: proto.Woodpecker.Next:output_type -> proto.NextResponse - 12, // 23: proto.Woodpecker.Init:output_type -> proto.Empty - 19, // 24: proto.Woodpecker.Wait:output_type -> proto.WaitResponse - 12, // 25: proto.Woodpecker.Done:output_type -> proto.Empty - 12, // 26: proto.Woodpecker.Extend:output_type -> proto.Empty - 12, // 27: proto.Woodpecker.Update:output_type -> proto.Empty - 12, // 28: proto.Woodpecker.Log:output_type -> proto.Empty - 18, // 29: proto.Woodpecker.RegisterAgent:output_type -> proto.RegisterAgentResponse - 12, // 30: proto.Woodpecker.UnregisterAgent:output_type -> proto.Empty - 12, // 31: proto.Woodpecker.ReportHealth:output_type -> proto.Empty - 21, // 32: proto.WoodpeckerAuth.Auth:output_type -> proto.AuthResponse - 21, // [21:33] is the sub-list for method output_type - 9, // [9:21] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 28, // 0: proto.Filter.labels:type_name -> proto.Filter.LabelsEntry + 4, // 1: proto.NextRequest.filter:type_name -> proto.Filter + 2, // 2: proto.InitRequest.state:type_name -> proto.WorkflowState + 2, // 3: proto.DoneRequest.state:type_name -> proto.WorkflowState + 1, // 4: proto.UpdateRequest.state:type_name -> proto.StepState + 3, // 5: proto.LogRequest.logEntries:type_name -> proto.LogEntry + 29, // 6: proto.AgentInfo.customLabels:type_name -> proto.AgentInfo.CustomLabelsEntry + 16, // 7: proto.RegisterAgentRequest.info:type_name -> proto.AgentInfo + 5, // 8: proto.NextResponse.workflow:type_name -> proto.Workflow + 6, // 9: proto.RegisterAgentResponse.config:type_name -> proto.AgentConfig + 0, // 10: proto.StepRecoveryState.status:type_name -> proto.RecoveryStatus + 25, // 11: proto.InitWorkflowRecoveryResponse.states:type_name -> proto.StepRecoveryState + 0, // 12: proto.UpdateStepRecoveryStateRequest.status:type_name -> proto.RecoveryStatus + 14, // 13: proto.Woodpecker.Version:input_type -> proto.Empty + 7, // 14: proto.Woodpecker.Next:input_type -> proto.NextRequest + 8, // 15: proto.Woodpecker.Init:input_type -> proto.InitRequest + 9, // 16: proto.Woodpecker.Wait:input_type -> proto.WaitRequest + 10, // 17: proto.Woodpecker.Done:input_type -> proto.DoneRequest + 11, // 18: proto.Woodpecker.Extend:input_type -> proto.ExtendRequest + 12, // 19: proto.Woodpecker.Update:input_type -> proto.UpdateRequest + 13, // 20: proto.Woodpecker.Log:input_type -> proto.LogRequest + 17, // 21: proto.Woodpecker.RegisterAgent:input_type -> proto.RegisterAgentRequest + 14, // 22: proto.Woodpecker.UnregisterAgent:input_type -> proto.Empty + 15, // 23: proto.Woodpecker.ReportHealth:input_type -> proto.ReportHealthRequest + 24, // 24: proto.Woodpecker.InitWorkflowRecovery:input_type -> proto.InitWorkflowRecoveryRequest + 27, // 25: proto.Woodpecker.UpdateStepRecoveryState:input_type -> proto.UpdateStepRecoveryStateRequest + 22, // 26: proto.WoodpeckerAuth.Auth:input_type -> proto.AuthRequest + 18, // 27: proto.Woodpecker.Version:output_type -> proto.VersionResponse + 19, // 28: proto.Woodpecker.Next:output_type -> proto.NextResponse + 14, // 29: proto.Woodpecker.Init:output_type -> proto.Empty + 21, // 30: proto.Woodpecker.Wait:output_type -> proto.WaitResponse + 14, // 31: proto.Woodpecker.Done:output_type -> proto.Empty + 14, // 32: proto.Woodpecker.Extend:output_type -> proto.Empty + 14, // 33: proto.Woodpecker.Update:output_type -> proto.Empty + 14, // 34: proto.Woodpecker.Log:output_type -> proto.Empty + 20, // 35: proto.Woodpecker.RegisterAgent:output_type -> proto.RegisterAgentResponse + 14, // 36: proto.Woodpecker.UnregisterAgent:output_type -> proto.Empty + 14, // 37: proto.Woodpecker.ReportHealth:output_type -> proto.Empty + 26, // 38: proto.Woodpecker.InitWorkflowRecovery:output_type -> proto.InitWorkflowRecoveryResponse + 14, // 39: proto.Woodpecker.UpdateStepRecoveryState:output_type -> proto.Empty + 23, // 40: proto.WoodpeckerAuth.Auth:output_type -> proto.AuthResponse + 27, // [27:41] is the sub-list for method output_type + 13, // [13:27] is the sub-list for method input_type + 13, // [13:13] is the sub-list for extension type_name + 13, // [13:13] is the sub-list for extension extendee + 0, // [0:13] is the sub-list for field type_name } func init() { file_woodpecker_proto_init() } @@ -1394,13 +1777,14 @@ func file_woodpecker_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_woodpecker_proto_rawDesc), len(file_woodpecker_proto_rawDesc)), - NumEnums: 0, - NumMessages: 24, + NumEnums: 1, + NumMessages: 29, NumExtensions: 0, NumServices: 2, }, GoTypes: file_woodpecker_proto_goTypes, DependencyIndexes: file_woodpecker_proto_depIdxs, + EnumInfos: file_woodpecker_proto_enumTypes, MessageInfos: file_woodpecker_proto_msgTypes, }.Build() File_woodpecker_proto = out.File diff --git a/rpc/proto/woodpecker.proto b/rpc/proto/woodpecker.proto index b348c095767..cc4c5c90673 100644 --- a/rpc/proto/woodpecker.proto +++ b/rpc/proto/woodpecker.proto @@ -35,6 +35,10 @@ service Woodpecker { rpc RegisterAgent (RegisterAgentRequest) returns (RegisterAgentResponse) {} rpc UnregisterAgent (Empty) returns (Empty) {} rpc ReportHealth (ReportHealthRequest) returns (Empty) {} + + // Recovery methods for agent workflow recovery + rpc InitWorkflowRecovery (InitWorkflowRecoveryRequest) returns (InitWorkflowRecoveryResponse) {} + rpc UpdateStepRecoveryState (UpdateStepRecoveryStateRequest) returns (Empty) {} } // @@ -76,6 +80,11 @@ message Workflow { bytes payload = 3; } +message AgentConfig { + int64 agent_id = 1; + bool recovery_enabled = 2; +} + // // Request types // @@ -144,7 +153,7 @@ message NextResponse { } message RegisterAgentResponse { - int64 agent_id = 1; + AgentConfig config = 1; } message WaitResponse { @@ -167,3 +176,38 @@ message AuthResponse { int64 agent_id = 2; string access_token = 3; } + +// +// Recovery types +// + +enum RecoveryStatus { + RECOVERY_PENDING = 0; + RECOVERY_RUNNING = 1; + RECOVERY_SUCCESS = 2; + RECOVERY_FAILED = 3; + RECOVERY_SKIPPED = 4; +} + +message InitWorkflowRecoveryRequest { + string workflow_id = 1; + repeated string step_uuids = 2; + int64 timeout_seconds = 3; +} + +message StepRecoveryState { + string step_uuid = 1; + RecoveryStatus status = 2; + int32 exit_code = 3; +} + +message InitWorkflowRecoveryResponse { + repeated StepRecoveryState states = 1; +} + +message UpdateStepRecoveryStateRequest { + string workflow_id = 1; + string step_uuid = 2; + RecoveryStatus status = 3; + int32 exit_code = 4; +} diff --git a/rpc/proto/woodpecker_grpc.pb.go b/rpc/proto/woodpecker_grpc.pb.go index 88ca6e7a965..14aae653451 100644 --- a/rpc/proto/woodpecker_grpc.pb.go +++ b/rpc/proto/woodpecker_grpc.pb.go @@ -15,8 +15,8 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.6.0 -// - protoc v6.33.1 +// - protoc-gen-go-grpc v1.6.1 +// - protoc v6.33.4 // source: woodpecker.proto package proto @@ -34,17 +34,19 @@ import ( const _ = grpc.SupportPackageIsVersion9 const ( - Woodpecker_Version_FullMethodName = "/proto.Woodpecker/Version" - Woodpecker_Next_FullMethodName = "/proto.Woodpecker/Next" - Woodpecker_Init_FullMethodName = "/proto.Woodpecker/Init" - Woodpecker_Wait_FullMethodName = "/proto.Woodpecker/Wait" - Woodpecker_Done_FullMethodName = "/proto.Woodpecker/Done" - Woodpecker_Extend_FullMethodName = "/proto.Woodpecker/Extend" - Woodpecker_Update_FullMethodName = "/proto.Woodpecker/Update" - Woodpecker_Log_FullMethodName = "/proto.Woodpecker/Log" - Woodpecker_RegisterAgent_FullMethodName = "/proto.Woodpecker/RegisterAgent" - Woodpecker_UnregisterAgent_FullMethodName = "/proto.Woodpecker/UnregisterAgent" - Woodpecker_ReportHealth_FullMethodName = "/proto.Woodpecker/ReportHealth" + Woodpecker_Version_FullMethodName = "/proto.Woodpecker/Version" + Woodpecker_Next_FullMethodName = "/proto.Woodpecker/Next" + Woodpecker_Init_FullMethodName = "/proto.Woodpecker/Init" + Woodpecker_Wait_FullMethodName = "/proto.Woodpecker/Wait" + Woodpecker_Done_FullMethodName = "/proto.Woodpecker/Done" + Woodpecker_Extend_FullMethodName = "/proto.Woodpecker/Extend" + Woodpecker_Update_FullMethodName = "/proto.Woodpecker/Update" + Woodpecker_Log_FullMethodName = "/proto.Woodpecker/Log" + Woodpecker_RegisterAgent_FullMethodName = "/proto.Woodpecker/RegisterAgent" + Woodpecker_UnregisterAgent_FullMethodName = "/proto.Woodpecker/UnregisterAgent" + Woodpecker_ReportHealth_FullMethodName = "/proto.Woodpecker/ReportHealth" + Woodpecker_InitWorkflowRecovery_FullMethodName = "/proto.Woodpecker/InitWorkflowRecovery" + Woodpecker_UpdateStepRecoveryState_FullMethodName = "/proto.Woodpecker/UpdateStepRecoveryState" ) // WoodpeckerClient is the client API for Woodpecker service. @@ -64,6 +66,9 @@ type WoodpeckerClient interface { RegisterAgent(ctx context.Context, in *RegisterAgentRequest, opts ...grpc.CallOption) (*RegisterAgentResponse, error) UnregisterAgent(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*Empty, error) ReportHealth(ctx context.Context, in *ReportHealthRequest, opts ...grpc.CallOption) (*Empty, error) + // Recovery methods for agent workflow recovery + InitWorkflowRecovery(ctx context.Context, in *InitWorkflowRecoveryRequest, opts ...grpc.CallOption) (*InitWorkflowRecoveryResponse, error) + UpdateStepRecoveryState(ctx context.Context, in *UpdateStepRecoveryStateRequest, opts ...grpc.CallOption) (*Empty, error) } type woodpeckerClient struct { @@ -184,6 +189,26 @@ func (c *woodpeckerClient) ReportHealth(ctx context.Context, in *ReportHealthReq return out, nil } +func (c *woodpeckerClient) InitWorkflowRecovery(ctx context.Context, in *InitWorkflowRecoveryRequest, opts ...grpc.CallOption) (*InitWorkflowRecoveryResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(InitWorkflowRecoveryResponse) + err := c.cc.Invoke(ctx, Woodpecker_InitWorkflowRecovery_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *woodpeckerClient) UpdateStepRecoveryState(ctx context.Context, in *UpdateStepRecoveryStateRequest, opts ...grpc.CallOption) (*Empty, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(Empty) + err := c.cc.Invoke(ctx, Woodpecker_UpdateStepRecoveryState_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + // WoodpeckerServer is the server API for Woodpecker service. // All implementations must embed UnimplementedWoodpeckerServer // for forward compatibility. @@ -201,6 +226,9 @@ type WoodpeckerServer interface { RegisterAgent(context.Context, *RegisterAgentRequest) (*RegisterAgentResponse, error) UnregisterAgent(context.Context, *Empty) (*Empty, error) ReportHealth(context.Context, *ReportHealthRequest) (*Empty, error) + // Recovery methods for agent workflow recovery + InitWorkflowRecovery(context.Context, *InitWorkflowRecoveryRequest) (*InitWorkflowRecoveryResponse, error) + UpdateStepRecoveryState(context.Context, *UpdateStepRecoveryStateRequest) (*Empty, error) mustEmbedUnimplementedWoodpeckerServer() } @@ -244,6 +272,12 @@ func (UnimplementedWoodpeckerServer) UnregisterAgent(context.Context, *Empty) (* func (UnimplementedWoodpeckerServer) ReportHealth(context.Context, *ReportHealthRequest) (*Empty, error) { return nil, status.Error(codes.Unimplemented, "method ReportHealth not implemented") } +func (UnimplementedWoodpeckerServer) InitWorkflowRecovery(context.Context, *InitWorkflowRecoveryRequest) (*InitWorkflowRecoveryResponse, error) { + return nil, status.Error(codes.Unimplemented, "method InitWorkflowRecovery not implemented") +} +func (UnimplementedWoodpeckerServer) UpdateStepRecoveryState(context.Context, *UpdateStepRecoveryStateRequest) (*Empty, error) { + return nil, status.Error(codes.Unimplemented, "method UpdateStepRecoveryState not implemented") +} func (UnimplementedWoodpeckerServer) mustEmbedUnimplementedWoodpeckerServer() {} func (UnimplementedWoodpeckerServer) testEmbeddedByValue() {} @@ -463,6 +497,42 @@ func _Woodpecker_ReportHealth_Handler(srv interface{}, ctx context.Context, dec return interceptor(ctx, in, info, handler) } +func _Woodpecker_InitWorkflowRecovery_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(InitWorkflowRecoveryRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(WoodpeckerServer).InitWorkflowRecovery(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Woodpecker_InitWorkflowRecovery_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(WoodpeckerServer).InitWorkflowRecovery(ctx, req.(*InitWorkflowRecoveryRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Woodpecker_UpdateStepRecoveryState_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(UpdateStepRecoveryStateRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(WoodpeckerServer).UpdateStepRecoveryState(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Woodpecker_UpdateStepRecoveryState_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(WoodpeckerServer).UpdateStepRecoveryState(ctx, req.(*UpdateStepRecoveryStateRequest)) + } + return interceptor(ctx, in, info, handler) +} + // Woodpecker_ServiceDesc is the grpc.ServiceDesc for Woodpecker service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -514,6 +584,14 @@ var Woodpecker_ServiceDesc = grpc.ServiceDesc{ MethodName: "ReportHealth", Handler: _Woodpecker_ReportHealth_Handler, }, + { + MethodName: "InitWorkflowRecovery", + Handler: _Woodpecker_InitWorkflowRecovery_Handler, + }, + { + MethodName: "UpdateStepRecoveryState", + Handler: _Woodpecker_UpdateStepRecoveryState_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "woodpecker.proto", diff --git a/rpc/types.go b/rpc/types.go index 18dc6ca2338..6a73ae9320f 100644 --- a/rpc/types.go +++ b/rpc/types.go @@ -63,4 +63,9 @@ type ( Capacity int `json:"capacity"` CustomLabels map[string]string `json:"custom_labels"` } + + AgentConfig struct { + AgentID int64 `json:"agent_id"` + RecoveryEnabled bool `json:"recovery_enabled"` + } ) diff --git a/server/model/step_recovery.go b/server/model/step_recovery.go new file mode 100644 index 00000000000..4e3ef6e36c9 --- /dev/null +++ b/server/model/step_recovery.go @@ -0,0 +1,36 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package model + +// StepRecoveryState represents the recovery state for a workflow step. +// This is used to track step progress and enable agent restart recovery. +type StepRecoveryState struct { + ID int64 `xorm:"pk autoincr 'id'"` + WorkflowID string `xorm:"VARCHAR(250) UNIQUE(s) INDEX 'workflow_id'"` // Task ID from queue + StepUUID string `xorm:"VARCHAR(250) UNIQUE(s) 'step_uuid'"` // Step UUID within workflow + Status int `xorm:"'status'"` // Recovery status (see rpc.RecoveryStatus) + ExitCode int `xorm:"'exit_code'"` + StartedAt int64 `xorm:"'started_at'"` + FinishedAt int64 `xorm:"'finished_at'"` + AgentID int64 `xorm:"'agent_id'"` + CreatedAt int64 `xorm:"created 'created_at'"` + UpdatedAt int64 `xorm:"updated 'updated_at'"` + ExpiresAt int64 `xorm:"INDEX 'expires_at'"` // For cleanup of old states +} + +// TableName returns the database table name. +func (StepRecoveryState) TableName() string { + return "step_recovery_states" +} diff --git a/server/rpc/rpc.go b/server/rpc/rpc.go index 6fa97735d73..404eb8242eb 100644 --- a/server/rpc/rpc.go +++ b/server/rpc/rpc.go @@ -29,6 +29,7 @@ import ( "github.com/rs/zerolog/log" grpcMetadata "google.golang.org/grpc/metadata" + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" "go.woodpecker-ci.org/woodpecker/v3/rpc" "go.woodpecker-ci.org/woodpecker/v3/server" "go.woodpecker-ci.org/woodpecker/v3/server/forge" @@ -43,13 +44,17 @@ import ( // updateAgentLastWorkDelay the delay before the LastWork info should be updated. const updateAgentLastWorkDelay = time.Minute +// ErrRecoveryDisabled is returned when recovery is not enabled on the server. +var ErrRecoveryDisabled = errors.New("pipeline recovery is not enabled on this server") + type RPC struct { - queue queue.Queue - pubsub *pubsub.Publisher - logger logging.Log - store store.Store - pipelineTime *prometheus.GaugeVec - pipelineCount *prometheus.CounterVec + queue queue.Queue + pubsub *pubsub.Publisher + logger logging.Log + store store.Store + pipelineTime *prometheus.GaugeVec + pipelineCount *prometheus.CounterVec + recoveryEnabled bool } // Next blocks until it provides the next workflow to execute. @@ -459,10 +464,10 @@ func (s *RPC) Log(c context.Context, stepUUID string, rpcLogEntries []*rpc.LogEn return nil } -func (s *RPC) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (int64, error) { +func (s *RPC) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (rpc.AgentConfig, error) { agent, err := s.getAgentFromContext(ctx) if err != nil { - return -1, err + return rpc.AgentConfig{}, err } if agent.Name == "" { @@ -479,10 +484,13 @@ func (s *RPC) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (int64, err err = s.store.AgentUpdate(agent) if err != nil { - return -1, err + return rpc.AgentConfig{}, err } - return agent.ID, nil + return rpc.AgentConfig{ + AgentID: agent.ID, + RecoveryEnabled: s.recoveryEnabled, + }, nil } // UnregisterAgent removes the agent from the database. @@ -652,3 +660,58 @@ func (s *RPC) updateAgentLastWork(agent *model.Agent) error { return nil } + +// InitWorkflowRecovery initializes recovery state for all steps in a workflow +// and returns the current states. +func (s *RPC) InitWorkflowRecovery(ctx context.Context, workflowID string, stepUUIDs []string, timeoutSeconds int64) (map[string]*types.RecoveryState, error) { + if !s.recoveryEnabled { + return nil, ErrRecoveryDisabled + } + + agent, err := s.getAgentFromContext(ctx) + if err != nil { + return nil, err + } + + expiresAt := time.Now().Add(time.Duration(timeoutSeconds) * time.Second).Unix() + if err := s.store.RecoveryStateCreate(workflowID, stepUUIDs, agent.ID, expiresAt); err != nil { + return nil, err + } + + states, err := s.store.RecoveryStateGetAll(workflowID) + if err != nil { + return nil, err + } + + result := make(map[string]*types.RecoveryState, len(states)) + for _, state := range states { + result[state.StepUUID] = &types.RecoveryState{ + Status: types.RecoveryStatus(state.Status), + ExitCode: state.ExitCode, + } + } + return result, nil +} + +// UpdateStepRecoveryState updates the recovery state for a specific step. +func (s *RPC) UpdateStepRecoveryState(ctx context.Context, workflowID, stepUUID string, status types.RecoveryStatus, exitCode int) error { + if !s.recoveryEnabled { + return ErrRecoveryDisabled + } + + state := &model.StepRecoveryState{ + WorkflowID: workflowID, + StepUUID: stepUUID, + Status: int(status), + ExitCode: exitCode, + } + + switch status { + case types.RecoveryStatusRunning: + state.StartedAt = time.Now().Unix() + case types.RecoveryStatusSuccess, types.RecoveryStatusFailed: + state.FinishedAt = time.Now().Unix() + } + + return s.store.RecoveryStateUpdate(state) +} diff --git a/server/rpc/rpc_recovery_test.go b/server/rpc/rpc_recovery_test.go new file mode 100644 index 00000000000..b89d60fb65d --- /dev/null +++ b/server/rpc/rpc_recovery_test.go @@ -0,0 +1,163 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpc + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/metadata" + + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" + "go.woodpecker-ci.org/woodpecker/v3/server/model" + store_mocks "go.woodpecker-ci.org/woodpecker/v3/server/store/mocks" +) + +func agentCtx(t *testing.T, agentID string) context.Context { + return metadata.NewIncomingContext( + t.Context(), + metadata.Pairs("agent_id", agentID), + ) +} + +func TestInitWorkflowRecovery(t *testing.T) { + t.Run("recovery disabled returns error", func(t *testing.T) { + rpcServer := &RPC{recoveryEnabled: false} + + _, err := rpcServer.InitWorkflowRecovery(t.Context(), "wf-1", []string{"s1"}, 300) + require.ErrorIs(t, err, ErrRecoveryDisabled) + }) + + t.Run("happy path returns correct state map", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + agent := &model.Agent{ID: 42} + storeMock.On("AgentFind", int64(42)).Return(agent, nil) + storeMock.On("RecoveryStateCreate", "wf-1", []string{"s1", "s2"}, int64(42), mock.AnythingOfType("int64")).Return(nil) + storeMock.On("RecoveryStateGetAll", "wf-1").Return([]*model.StepRecoveryState{ + {WorkflowID: "wf-1", StepUUID: "s1", Status: 0, ExitCode: 0}, + {WorkflowID: "wf-1", StepUUID: "s2", Status: 2, ExitCode: 0}, + }, nil) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + ctx := agentCtx(t, "42") + + result, err := rpcServer.InitWorkflowRecovery(ctx, "wf-1", []string{"s1", "s2"}, 300) + require.NoError(t, err) + require.Len(t, result, 2) + + assert.Equal(t, types.RecoveryStatusPending, result["s1"].Status) + assert.Equal(t, types.RecoveryStatusSuccess, result["s2"].Status) + }) + + t.Run("store error on create propagates", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + agent := &model.Agent{ID: 1} + storeMock.On("AgentFind", int64(1)).Return(agent, nil) + storeMock.On("RecoveryStateCreate", "wf-1", []string{"s1"}, int64(1), mock.AnythingOfType("int64")).Return(assert.AnError) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + ctx := agentCtx(t, "1") + + _, err := rpcServer.InitWorkflowRecovery(ctx, "wf-1", []string{"s1"}, 300) + require.ErrorIs(t, err, assert.AnError) + }) + + t.Run("store error on GetAll propagates", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + agent := &model.Agent{ID: 1} + storeMock.On("AgentFind", int64(1)).Return(agent, nil) + storeMock.On("RecoveryStateCreate", "wf-1", []string{"s1"}, int64(1), mock.AnythingOfType("int64")).Return(nil) + storeMock.On("RecoveryStateGetAll", "wf-1").Return(nil, assert.AnError) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + ctx := agentCtx(t, "1") + + _, err := rpcServer.InitWorkflowRecovery(ctx, "wf-1", []string{"s1"}, 300) + require.ErrorIs(t, err, assert.AnError) + }) +} + +func TestUpdateStepRecoveryState(t *testing.T) { + t.Run("recovery disabled returns error", func(t *testing.T) { + rpcServer := &RPC{recoveryEnabled: false} + + err := rpcServer.UpdateStepRecoveryState(t.Context(), "wf-1", "s1", types.RecoveryStatusRunning, 0) + require.ErrorIs(t, err, ErrRecoveryDisabled) + }) + + t.Run("status Pending sets no timestamps", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + storeMock.On("RecoveryStateUpdate", mock.MatchedBy(func(s *model.StepRecoveryState) bool { + return s.WorkflowID == "wf-1" && + s.StepUUID == "s1" && + s.Status == int(types.RecoveryStatusPending) && + s.StartedAt == 0 && + s.FinishedAt == 0 + })).Return(nil) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + + err := rpcServer.UpdateStepRecoveryState(t.Context(), "wf-1", "s1", types.RecoveryStatusPending, 0) + require.NoError(t, err) + }) + + t.Run("status Running sets StartedAt", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + storeMock.On("RecoveryStateUpdate", mock.MatchedBy(func(s *model.StepRecoveryState) bool { + return s.WorkflowID == "wf-1" && + s.StepUUID == "s1" && + s.Status == int(types.RecoveryStatusRunning) && + s.StartedAt > 0 && + s.FinishedAt == 0 + })).Return(nil) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + + err := rpcServer.UpdateStepRecoveryState(t.Context(), "wf-1", "s1", types.RecoveryStatusRunning, 0) + require.NoError(t, err) + }) + + t.Run("status Success sets FinishedAt", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + storeMock.On("RecoveryStateUpdate", mock.MatchedBy(func(s *model.StepRecoveryState) bool { + return s.Status == int(types.RecoveryStatusSuccess) && + s.FinishedAt > 0 && + s.StartedAt == 0 + })).Return(nil) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + + err := rpcServer.UpdateStepRecoveryState(t.Context(), "wf-1", "s1", types.RecoveryStatusSuccess, 0) + require.NoError(t, err) + }) + + t.Run("status Failed sets FinishedAt and ExitCode", func(t *testing.T) { + storeMock := store_mocks.NewMockStore(t) + storeMock.On("RecoveryStateUpdate", mock.MatchedBy(func(s *model.StepRecoveryState) bool { + return s.Status == int(types.RecoveryStatusFailed) && + s.ExitCode == 137 && + s.FinishedAt > 0 && + s.StartedAt == 0 + })).Return(nil) + + rpcServer := &RPC{store: storeMock, recoveryEnabled: true} + + err := rpcServer.UpdateStepRecoveryState(t.Context(), "wf-1", "s1", types.RecoveryStatusFailed, 137) + require.NoError(t, err) + }) +} diff --git a/server/rpc/rpc_test.go b/server/rpc/rpc_test.go index 04a6a6f6043..48f3bf9be79 100644 --- a/server/rpc/rpc_test.go +++ b/server/rpc/rpc_test.go @@ -57,7 +57,7 @@ func TestRegisterAgent(t *testing.T) { t.Context(), metadata.Pairs("hostname", "hostname", "agent_id", "1337"), ) - agentID, err := grpc.RegisterAgent(ctx, rpc.AgentInfo{ + agentConfig, err := grpc.RegisterAgent(ctx, rpc.AgentInfo{ Version: "version", Platform: "platform", Backend: "backend", @@ -65,7 +65,7 @@ func TestRegisterAgent(t *testing.T) { }) require.NoError(t, err) - assert.EqualValues(t, 1337, agentID) + assert.EqualValues(t, 1337, agentConfig.AgentID) }) t.Run("When existing agent hostname is present it should not update the hostname", func(t *testing.T) { @@ -97,7 +97,7 @@ func TestRegisterAgent(t *testing.T) { t.Context(), metadata.Pairs("hostname", "newHostname", "agent_id", "1337"), ) - agentID, err := grpc.RegisterAgent(ctx, rpc.AgentInfo{ + agentConfig, err := grpc.RegisterAgent(ctx, rpc.AgentInfo{ Version: "version", Platform: "platform", Backend: "backend", @@ -105,7 +105,7 @@ func TestRegisterAgent(t *testing.T) { }) require.NoError(t, err) - assert.EqualValues(t, 1337, agentID) + assert.EqualValues(t, 1337, agentConfig.AgentID) }) } diff --git a/server/rpc/server.go b/server/rpc/server.go index c9115ede5ce..d619c0477e3 100644 --- a/server/rpc/server.go +++ b/server/rpc/server.go @@ -22,6 +22,7 @@ import ( prometheus_auto "github.com/prometheus/client_golang/prometheus/promauto" "github.com/rs/zerolog/log" + "go.woodpecker-ci.org/woodpecker/v3/pipeline/types" "go.woodpecker-ci.org/woodpecker/v3/rpc" "go.woodpecker-ci.org/woodpecker/v3/rpc/proto" "go.woodpecker-ci.org/woodpecker/v3/server/logging" @@ -37,7 +38,7 @@ type WoodpeckerServer struct { peer RPC } -func NewWoodpeckerServer(queue queue.Queue, logger logging.Log, pubsub *pubsub.Publisher, store store.Store) proto.WoodpeckerServer { +func NewWoodpeckerServer(queue queue.Queue, logger logging.Log, pubsub *pubsub.Publisher, store store.Store, recoveryEnabled bool) proto.WoodpeckerServer { pipelineTime := prometheus_auto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "woodpecker", Name: "pipeline_time", @@ -49,12 +50,13 @@ func NewWoodpeckerServer(queue queue.Queue, logger logging.Log, pubsub *pubsub.P Help: "Pipeline count.", }, []string{"repo", "branch", "status", "pipeline"}) peer := RPC{ - store: store, - queue: queue, - pubsub: pubsub, - logger: logger, - pipelineTime: pipelineTime, - pipelineCount: pipelineCount, + store: store, + queue: queue, + pubsub: pubsub, + logger: logger, + pipelineTime: pipelineTime, + pipelineCount: pipelineCount, + recoveryEnabled: recoveryEnabled, } return &WoodpeckerServer{peer: peer} } @@ -183,17 +185,23 @@ func (s *WoodpeckerServer) Log(c context.Context, req *proto.LogRequest) (*proto // RegisterAgent register our agent to the server. func (s *WoodpeckerServer) RegisterAgent(c context.Context, req *proto.RegisterAgentRequest) (*proto.RegisterAgentResponse, error) { - res := new(proto.RegisterAgentResponse) agentInfo := req.GetInfo() - agentID, err := s.peer.RegisterAgent(c, rpc.AgentInfo{ + agentConfig, err := s.peer.RegisterAgent(c, rpc.AgentInfo{ Version: agentInfo.GetVersion(), Platform: agentInfo.GetPlatform(), Backend: agentInfo.GetBackend(), Capacity: int(agentInfo.GetCapacity()), CustomLabels: agentInfo.GetCustomLabels(), }) - res.AgentId = agentID - return res, err + if err != nil { + return nil, err + } + return &proto.RegisterAgentResponse{ + Config: &proto.AgentConfig{ + AgentId: agentConfig.AgentID, + RecoveryEnabled: agentConfig.RecoveryEnabled, + }, + }, nil } // UnregisterAgent unregister our agent from the server. @@ -208,3 +216,26 @@ func (s *WoodpeckerServer) ReportHealth(c context.Context, req *proto.ReportHeal err := s.peer.ReportHealth(c, req.GetStatus()) return res, err } + +func (s *WoodpeckerServer) InitWorkflowRecovery(c context.Context, req *proto.InitWorkflowRecoveryRequest) (*proto.InitWorkflowRecoveryResponse, error) { + states, err := s.peer.InitWorkflowRecovery(c, req.GetWorkflowId(), req.GetStepUuids(), req.GetTimeoutSeconds()) + if err != nil { + return nil, err + } + + protoStates := make([]*proto.StepRecoveryState, 0, len(states)) + for stepUUID, state := range states { + protoStates = append(protoStates, &proto.StepRecoveryState{ + StepUuid: stepUUID, + Status: proto.RecoveryStatus(state.Status), + ExitCode: int32(state.ExitCode), + }) + } + return &proto.InitWorkflowRecoveryResponse{States: protoStates}, nil +} + +func (s *WoodpeckerServer) UpdateStepRecoveryState(c context.Context, req *proto.UpdateStepRecoveryStateRequest) (*proto.Empty, error) { + res := new(proto.Empty) + err := s.peer.UpdateStepRecoveryState(c, req.GetWorkflowId(), req.GetStepUuid(), types.RecoveryStatus(req.GetStatus()), int(req.GetExitCode())) + return res, err +} diff --git a/server/store/datastore/migration/migration.go b/server/store/datastore/migration/migration.go index e5e72ea7896..625f03fee44 100644 --- a/server/store/datastore/migration/migration.go +++ b/server/store/datastore/migration/migration.go @@ -76,6 +76,7 @@ var allBeans = []any{ new(model.Forge), new(model.Workflow), new(model.Org), + new(model.StepRecoveryState), } // TODO: make xormigrate context aware diff --git a/server/store/datastore/recovery_state.go b/server/store/datastore/recovery_state.go new file mode 100644 index 00000000000..953419bd131 --- /dev/null +++ b/server/store/datastore/recovery_state.go @@ -0,0 +1,74 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datastore + +import ( + "time" + + "go.woodpecker-ci.org/woodpecker/v3/server/model" +) + +// RecoveryStateCreate creates recovery states for all steps in a workflow. +// This is idempotent - if states already exist for the workflow, it does nothing. +func (s storage) RecoveryStateCreate(workflowID string, stepUUIDs []string, agentID, expiresAt int64) error { + // Check if recovery states already exist for this workflow + exists, err := s.engine.Where("workflow_id = ?", workflowID).Exist(new(model.StepRecoveryState)) + if err != nil { + return err + } + if exists { + // Already initialized, nothing to do + return nil + } + + // Batch insert all step recovery states + now := time.Now().Unix() + states := make([]*model.StepRecoveryState, 0, len(stepUUIDs)) + for _, stepUUID := range stepUUIDs { + states = append(states, &model.StepRecoveryState{ + WorkflowID: workflowID, + StepUUID: stepUUID, + Status: 0, + AgentID: agentID, + CreatedAt: now, + UpdatedAt: now, + ExpiresAt: expiresAt, + }) + } + + _, err = s.engine.Insert(&states) + return err +} + +// RecoveryStateGetAll retrieves all recovery states for a workflow. +func (s storage) RecoveryStateGetAll(workflowID string) ([]*model.StepRecoveryState, error) { + var states []*model.StepRecoveryState + err := s.engine.Where("workflow_id = ?", workflowID).Find(&states) + return states, err +} + +// RecoveryStateUpdate updates a recovery state. +func (s storage) RecoveryStateUpdate(state *model.StepRecoveryState) error { + _, err := s.engine.Where("workflow_id = ? AND step_uuid = ?", state.WorkflowID, state.StepUUID). + Cols("status", "exit_code", "started_at", "finished_at", "updated_at"). + Update(state) + return err +} + +// RecoveryStateCleanExpired removes expired recovery states. +func (s storage) RecoveryStateCleanExpired() error { + _, err := s.engine.Where("expires_at < ?", time.Now().Unix()).Delete(new(model.StepRecoveryState)) + return err +} diff --git a/server/store/datastore/recovery_state_test.go b/server/store/datastore/recovery_state_test.go new file mode 100644 index 00000000000..ebf622b2035 --- /dev/null +++ b/server/store/datastore/recovery_state_test.go @@ -0,0 +1,154 @@ +// Copyright 2026 Woodpecker Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datastore + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.woodpecker-ci.org/woodpecker/v3/server/model" +) + +func TestRecoveryStateCreateAndGetAll(t *testing.T) { + store, closer := newTestStore(t, new(model.StepRecoveryState)) + defer closer() + + workflowID := "workflow-123" + stepUUIDs := []string{"step-a", "step-b", "step-c"} + agentID := int64(42) + expiresAt := time.Now().Add(time.Hour).Unix() + + err := store.RecoveryStateCreate(workflowID, stepUUIDs, agentID, expiresAt) + require.NoError(t, err) + + states, err := store.RecoveryStateGetAll(workflowID) + require.NoError(t, err) + require.Len(t, states, 3) + + uuids := make(map[string]bool) + for _, s := range states { + assert.Equal(t, workflowID, s.WorkflowID) + assert.Equal(t, 0, s.Status) + assert.Equal(t, agentID, s.AgentID) + assert.Equal(t, expiresAt, s.ExpiresAt) + assert.Greater(t, s.CreatedAt, int64(0), "CreatedAt should be auto-populated") + assert.Greater(t, s.UpdatedAt, int64(0), "UpdatedAt should be auto-populated") + uuids[s.StepUUID] = true + } + for _, uuid := range stepUUIDs { + assert.True(t, uuids[uuid], "expected step UUID %s", uuid) + } +} + +func TestRecoveryStateCreateIdempotent(t *testing.T) { + store, closer := newTestStore(t, new(model.StepRecoveryState)) + defer closer() + + workflowID := "workflow-456" + expiresAt := time.Now().Add(time.Hour).Unix() + + err := store.RecoveryStateCreate(workflowID, []string{"step-1", "step-2"}, 1, expiresAt) + require.NoError(t, err) + + // Second call with different steps should be a no-op + err = store.RecoveryStateCreate(workflowID, []string{"step-3", "step-4"}, 2, expiresAt) + require.NoError(t, err) + + states, err := store.RecoveryStateGetAll(workflowID) + require.NoError(t, err) + require.Len(t, states, 2) + + uuids := make(map[string]bool) + for _, s := range states { + uuids[s.StepUUID] = true + } + assert.True(t, uuids["step-1"]) + assert.True(t, uuids["step-2"]) + assert.False(t, uuids["step-3"]) + assert.False(t, uuids["step-4"]) +} + +func TestRecoveryStateUpdate(t *testing.T) { + store, closer := newTestStore(t, new(model.StepRecoveryState)) + defer closer() + + workflowID := "workflow-789" + expiresAt := time.Now().Add(time.Hour).Unix() + + err := store.RecoveryStateCreate(workflowID, []string{"step-x", "step-y"}, 1, expiresAt) + require.NoError(t, err) + + now := time.Now().Unix() + err = store.RecoveryStateUpdate(&model.StepRecoveryState{ + WorkflowID: workflowID, + StepUUID: "step-x", + Status: 2, // Success + ExitCode: 0, + FinishedAt: now, + }) + require.NoError(t, err) + + states, err := store.RecoveryStateGetAll(workflowID) + require.NoError(t, err) + require.Len(t, states, 2) + + for _, s := range states { + if s.StepUUID == "step-x" { + assert.Equal(t, 2, s.Status) + assert.Equal(t, 0, s.ExitCode) + assert.Equal(t, now, s.FinishedAt) + } else { + assert.Equal(t, "step-y", s.StepUUID) + assert.Equal(t, 0, s.Status) + } + } +} + +func TestRecoveryStateCleanExpired(t *testing.T) { + store, closer := newTestStore(t, new(model.StepRecoveryState)) + defer closer() + + pastExpiry := time.Now().Add(-time.Hour).Unix() + futureExpiry := time.Now().Add(time.Hour).Unix() + + err := store.RecoveryStateCreate("expired-wf", []string{"s1", "s2"}, 1, pastExpiry) + require.NoError(t, err) + + err = store.RecoveryStateCreate("active-wf", []string{"s3", "s4"}, 2, futureExpiry) + require.NoError(t, err) + + err = store.RecoveryStateCleanExpired() + require.NoError(t, err) + + expiredStates, err := store.RecoveryStateGetAll("expired-wf") + require.NoError(t, err) + assert.Empty(t, expiredStates) + + activeStates, err := store.RecoveryStateGetAll("active-wf") + require.NoError(t, err) + assert.Len(t, activeStates, 2) +} + +func TestRecoveryStateGetAllNonExistent(t *testing.T) { + store, closer := newTestStore(t, new(model.StepRecoveryState)) + defer closer() + + states, err := store.RecoveryStateGetAll("does-not-exist") + require.NoError(t, err) + assert.Empty(t, states) +} diff --git a/server/store/mocks/mock_Store.go b/server/store/mocks/mock_Store.go index 7a0a6ba7a54..7e7370556d1 100644 --- a/server/store/mocks/mock_Store.go +++ b/server/store/mocks/mock_Store.go @@ -4478,6 +4478,232 @@ func (_c *MockStore_PipelineConfigCreate_Call) RunAndReturn(run func(pipelineCon return _c } +// RecoveryStateCleanExpired provides a mock function for the type MockStore +func (_mock *MockStore) RecoveryStateCleanExpired() error { + ret := _mock.Called() + + if len(ret) == 0 { + panic("no return value specified for RecoveryStateCleanExpired") + } + + var r0 error + if returnFunc, ok := ret.Get(0).(func() error); ok { + r0 = returnFunc() + } else { + r0 = ret.Error(0) + } + return r0 +} + +// MockStore_RecoveryStateCleanExpired_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RecoveryStateCleanExpired' +type MockStore_RecoveryStateCleanExpired_Call struct { + *mock.Call +} + +// RecoveryStateCleanExpired is a helper method to define mock.On call +func (_e *MockStore_Expecter) RecoveryStateCleanExpired() *MockStore_RecoveryStateCleanExpired_Call { + return &MockStore_RecoveryStateCleanExpired_Call{Call: _e.mock.On("RecoveryStateCleanExpired")} +} + +func (_c *MockStore_RecoveryStateCleanExpired_Call) Run(run func()) *MockStore_RecoveryStateCleanExpired_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockStore_RecoveryStateCleanExpired_Call) Return(err error) *MockStore_RecoveryStateCleanExpired_Call { + _c.Call.Return(err) + return _c +} + +func (_c *MockStore_RecoveryStateCleanExpired_Call) RunAndReturn(run func() error) *MockStore_RecoveryStateCleanExpired_Call { + _c.Call.Return(run) + return _c +} + +// RecoveryStateCreate provides a mock function for the type MockStore +func (_mock *MockStore) RecoveryStateCreate(workflowID string, stepUUIDs []string, agentID int64, expiresAt int64) error { + ret := _mock.Called(workflowID, stepUUIDs, agentID, expiresAt) + + if len(ret) == 0 { + panic("no return value specified for RecoveryStateCreate") + } + + var r0 error + if returnFunc, ok := ret.Get(0).(func(string, []string, int64, int64) error); ok { + r0 = returnFunc(workflowID, stepUUIDs, agentID, expiresAt) + } else { + r0 = ret.Error(0) + } + return r0 +} + +// MockStore_RecoveryStateCreate_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RecoveryStateCreate' +type MockStore_RecoveryStateCreate_Call struct { + *mock.Call +} + +// RecoveryStateCreate is a helper method to define mock.On call +// - workflowID string +// - stepUUIDs []string +// - agentID int64 +// - expiresAt int64 +func (_e *MockStore_Expecter) RecoveryStateCreate(workflowID interface{}, stepUUIDs interface{}, agentID interface{}, expiresAt interface{}) *MockStore_RecoveryStateCreate_Call { + return &MockStore_RecoveryStateCreate_Call{Call: _e.mock.On("RecoveryStateCreate", workflowID, stepUUIDs, agentID, expiresAt)} +} + +func (_c *MockStore_RecoveryStateCreate_Call) Run(run func(workflowID string, stepUUIDs []string, agentID int64, expiresAt int64)) *MockStore_RecoveryStateCreate_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 string + if args[0] != nil { + arg0 = args[0].(string) + } + var arg1 []string + if args[1] != nil { + arg1 = args[1].([]string) + } + var arg2 int64 + if args[2] != nil { + arg2 = args[2].(int64) + } + var arg3 int64 + if args[3] != nil { + arg3 = args[3].(int64) + } + run( + arg0, + arg1, + arg2, + arg3, + ) + }) + return _c +} + +func (_c *MockStore_RecoveryStateCreate_Call) Return(err error) *MockStore_RecoveryStateCreate_Call { + _c.Call.Return(err) + return _c +} + +func (_c *MockStore_RecoveryStateCreate_Call) RunAndReturn(run func(workflowID string, stepUUIDs []string, agentID int64, expiresAt int64) error) *MockStore_RecoveryStateCreate_Call { + _c.Call.Return(run) + return _c +} + +// RecoveryStateGetAll provides a mock function for the type MockStore +func (_mock *MockStore) RecoveryStateGetAll(workflowID string) ([]*model.StepRecoveryState, error) { + ret := _mock.Called(workflowID) + + if len(ret) == 0 { + panic("no return value specified for RecoveryStateGetAll") + } + + var r0 []*model.StepRecoveryState + var r1 error + if returnFunc, ok := ret.Get(0).(func(string) ([]*model.StepRecoveryState, error)); ok { + return returnFunc(workflowID) + } + if returnFunc, ok := ret.Get(0).(func(string) []*model.StepRecoveryState); ok { + r0 = returnFunc(workflowID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*model.StepRecoveryState) + } + } + if returnFunc, ok := ret.Get(1).(func(string) error); ok { + r1 = returnFunc(workflowID) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// MockStore_RecoveryStateGetAll_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RecoveryStateGetAll' +type MockStore_RecoveryStateGetAll_Call struct { + *mock.Call +} + +// RecoveryStateGetAll is a helper method to define mock.On call +// - workflowID string +func (_e *MockStore_Expecter) RecoveryStateGetAll(workflowID interface{}) *MockStore_RecoveryStateGetAll_Call { + return &MockStore_RecoveryStateGetAll_Call{Call: _e.mock.On("RecoveryStateGetAll", workflowID)} +} + +func (_c *MockStore_RecoveryStateGetAll_Call) Run(run func(workflowID string)) *MockStore_RecoveryStateGetAll_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 string + if args[0] != nil { + arg0 = args[0].(string) + } + run( + arg0, + ) + }) + return _c +} + +func (_c *MockStore_RecoveryStateGetAll_Call) Return(stepRecoveryStates []*model.StepRecoveryState, err error) *MockStore_RecoveryStateGetAll_Call { + _c.Call.Return(stepRecoveryStates, err) + return _c +} + +func (_c *MockStore_RecoveryStateGetAll_Call) RunAndReturn(run func(workflowID string) ([]*model.StepRecoveryState, error)) *MockStore_RecoveryStateGetAll_Call { + _c.Call.Return(run) + return _c +} + +// RecoveryStateUpdate provides a mock function for the type MockStore +func (_mock *MockStore) RecoveryStateUpdate(state *model.StepRecoveryState) error { + ret := _mock.Called(state) + + if len(ret) == 0 { + panic("no return value specified for RecoveryStateUpdate") + } + + var r0 error + if returnFunc, ok := ret.Get(0).(func(*model.StepRecoveryState) error); ok { + r0 = returnFunc(state) + } else { + r0 = ret.Error(0) + } + return r0 +} + +// MockStore_RecoveryStateUpdate_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RecoveryStateUpdate' +type MockStore_RecoveryStateUpdate_Call struct { + *mock.Call +} + +// RecoveryStateUpdate is a helper method to define mock.On call +// - state *model.StepRecoveryState +func (_e *MockStore_Expecter) RecoveryStateUpdate(state interface{}) *MockStore_RecoveryStateUpdate_Call { + return &MockStore_RecoveryStateUpdate_Call{Call: _e.mock.On("RecoveryStateUpdate", state)} +} + +func (_c *MockStore_RecoveryStateUpdate_Call) Run(run func(state *model.StepRecoveryState)) *MockStore_RecoveryStateUpdate_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 *model.StepRecoveryState + if args[0] != nil { + arg0 = args[0].(*model.StepRecoveryState) + } + run( + arg0, + ) + }) + return _c +} + +func (_c *MockStore_RecoveryStateUpdate_Call) Return(err error) *MockStore_RecoveryStateUpdate_Call { + _c.Call.Return(err) + return _c +} + +func (_c *MockStore_RecoveryStateUpdate_Call) RunAndReturn(run func(state *model.StepRecoveryState) error) *MockStore_RecoveryStateUpdate_Call { + _c.Call.Return(run) + return _c +} + // RegistryCreate provides a mock function for the type MockStore func (_mock *MockStore) RegistryCreate(registry *model.Registry) error { ret := _mock.Called(registry) diff --git a/server/store/store.go b/server/store/store.go index 5041749081b..7978d8a7eb9 100644 --- a/server/store/store.go +++ b/server/store/store.go @@ -204,6 +204,12 @@ type Store interface { // Org repos OrgRepoList(*model.Org, *model.ListOptions) ([]*model.Repo, error) + // Recovery State + RecoveryStateCreate(workflowID string, stepUUIDs []string, agentID, expiresAt int64) error + RecoveryStateGetAll(workflowID string) ([]*model.StepRecoveryState, error) + RecoveryStateUpdate(state *model.StepRecoveryState) error + RecoveryStateCleanExpired() error + // Store operations Ping() error Close() error