|
| 1 | +package jobqueue |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "time" |
| 6 | +) |
| 7 | + |
| 8 | +// JobStatus represents the current state of a job in the queue. |
| 9 | +type JobStatus string |
| 10 | + |
| 11 | +const ( |
| 12 | + JobStatusPending JobStatus = "pending" |
| 13 | + JobStatusProcessing JobStatus = "processing" |
| 14 | + JobStatusCompleted JobStatus = "completed" |
| 15 | + JobStatusFailed JobStatus = "failed" |
| 16 | +) |
| 17 | + |
| 18 | +// Jobable is the interface that job payloads must implement to be stored in the queue. |
| 19 | +// It provides chain selector and message ID for database indexing and querying. |
| 20 | +type Jobable interface { |
| 21 | + // JobKey returns the chain selector and message ID for this job. |
| 22 | + // These are used for database indexing, querying, and job routing. |
| 23 | + JobKey() (chainSelector, messageID string) |
| 24 | +} |
| 25 | + |
| 26 | +// Job wraps a payload with queue metadata. |
| 27 | +type Job[T Jobable] struct { |
| 28 | + // Unique job identifier |
| 29 | + ID string |
| 30 | + // The actual payload to process |
| 31 | + Payload T |
| 32 | + // Number of times this job has been attempted |
| 33 | + AttemptCount int |
| 34 | + // Deadline after which retries are no longer allowed |
| 35 | + RetryDeadline time.Time |
| 36 | + // When the job was created |
| 37 | + CreatedAt time.Time |
| 38 | + // When processing started (nil if not started) |
| 39 | + StartedAt *time.Time |
| 40 | + // Chain selector for routing and monitoring |
| 41 | + ChainSelector string |
| 42 | + // Message ID for deduplication and tracking |
| 43 | + MessageID string |
| 44 | +} |
| 45 | + |
| 46 | +// JobQueue defines a generic durable queue interface backed by persistent storage. |
| 47 | +// The queue supports delayed retry, dead letter handling, and concurrent processing. |
| 48 | +// Type T must implement Jobable to provide chain selector and message ID. |
| 49 | +type JobQueue[T Jobable] interface { |
| 50 | + // Publish adds one or more jobs to the queue. |
| 51 | + // Jobs are immediately available for consumption unless a delay is specified. |
| 52 | + Publish(ctx context.Context, jobs ...T) error |
| 53 | + // PublishWithDelay adds jobs that become available after the specified delay. |
| 54 | + // Useful for implementing retry backoff strategies. |
| 55 | + PublishWithDelay(ctx context.Context, delay time.Duration, jobs ...T) error |
| 56 | + // Consume retrieves and locks up to batchSize jobs for processing. |
| 57 | + // Jobs in 'pending' or 'failed' status that are past their available_at time are eligible. |
| 58 | + // Additionally, jobs stuck in 'processing' for longer than the configured LockDuration |
| 59 | + // are considered stale (e.g. from a crashed worker) and are automatically reclaimed. |
| 60 | + // Returns empty slice if no jobs are available. |
| 61 | + // |
| 62 | + // The implementation should use SELECT FOR UPDATE SKIP LOCKED to ensure |
| 63 | + // concurrent consumers don't compete for the same jobs. |
| 64 | + Consume(ctx context.Context, batchSize int) ([]Job[T], error) |
| 65 | + // Complete marks jobs as successfully processed and removes them from active queue. |
| 66 | + // Completed jobs may be moved to an archive table for audit purposes. |
| 67 | + Complete(ctx context.Context, jobIDs ...string) error |
| 68 | + // Retry schedules jobs for retry after the specified delay. |
| 69 | + // Increments attempt count and records the error message. |
| 70 | + // If max attempts is exceeded, jobs are moved to failed status. |
| 71 | + Retry(ctx context.Context, delay time.Duration, errors map[string]error, jobIDs ...string) error |
| 72 | + // Fail marks jobs as permanently failed. |
| 73 | + // These jobs will not be retried and should be investigated. |
| 74 | + Fail(ctx context.Context, errors map[string]error, jobIDs ...string) error |
| 75 | + // Cleanup archives or deletes jobs older than the retention period. |
| 76 | + // Should be called periodically to prevent unbounded table growth. |
| 77 | + Cleanup(ctx context.Context, retentionPeriod time.Duration) (int, error) |
| 78 | + // Name returns the queue name for logging and monitoring |
| 79 | + Name() string |
| 80 | +} |
| 81 | + |
| 82 | +// QueueConfig contains configuration for queue behavior. |
| 83 | +type QueueConfig struct { |
| 84 | + // Queue name for logging and table naming |
| 85 | + Name string |
| 86 | + // OwnerID scopes jobs so multiple verifiers sharing the same table |
| 87 | + // only consume their own jobs (e.g. "CCTPVerifier", "LombardVerifier"). |
| 88 | + OwnerID string |
| 89 | + // RetryDuration is how long from creation a job is eligible for retry. |
| 90 | + // After this duration elapses, a failed retry marks the job as permanently failed. |
| 91 | + RetryDuration time.Duration |
| 92 | + // LockDuration is how long a job can remain in 'processing' before it is |
| 93 | + // considered stale and automatically reclaimed by the next Consume call. |
| 94 | + LockDuration time.Duration |
| 95 | +} |
0 commit comments