Skip to content

Commit 757c984

Browse files
chore: add retry functionality in the systemDb connection
1 parent f3c10f3 commit 757c984

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

dbos/dbos.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,15 @@ type DBOSContext interface {
101101
Shutdown(timeout time.Duration) // Gracefully shutdown all DBOS runtime components with ordered cleanup sequence
102102

103103
// Workflow operations
104-
RunAsStep(_ DBOSContext, fn StepFunc, opts ...StepOption) (any, error) // Execute a function as a durable step within a workflow
104+
RunAsStep(_ DBOSContext, fn StepFunc, opts ...StepOption) (any, error) // Execute a function as a durable step within a workflow
105105
RunWorkflow(_ DBOSContext, fn WorkflowFunc, input any, opts ...WorkflowOption) (WorkflowHandle[any], error) // Start a new workflow execution
106-
Send(_ DBOSContext, destinationID string, message any, topic string) error // Send a message to another workflow
107-
Recv(_ DBOSContext, topic string, timeout time.Duration) (any, error) // Receive a message sent to this workflow
108-
SetEvent(_ DBOSContext, key string, message any) error // Set a key-value event for this workflow
109-
GetEvent(_ DBOSContext, targetWorkflowID string, key string, timeout time.Duration) (any, error) // Get a key-value event from a target workflow
110-
Sleep(_ DBOSContext, duration time.Duration) (time.Duration, error) // Durable sleep that survives workflow recovery
111-
GetWorkflowID() (string, error) // Get the current workflow ID (only available within workflows)
112-
GetStepID() (int, error) // Get the current step ID (only available within workflows)
106+
Send(_ DBOSContext, destinationID string, message any, topic string) error // Send a message to another workflow
107+
Recv(_ DBOSContext, topic string, timeout time.Duration) (any, error) // Receive a message sent to this workflow
108+
SetEvent(_ DBOSContext, key string, message any) error // Set a key-value event for this workflow
109+
GetEvent(_ DBOSContext, targetWorkflowID string, key string, timeout time.Duration) (any, error) // Get a key-value event from a target workflow
110+
Sleep(_ DBOSContext, duration time.Duration) (time.Duration, error) // Durable sleep that survives workflow recovery
111+
GetWorkflowID() (string, error) // Get the current workflow ID (only available within workflows)
112+
GetStepID() (int, error) // Get the current step ID (only available within workflows)
113113

114114
// Workflow management
115115
RetrieveWorkflow(_ DBOSContext, workflowID string) (WorkflowHandle[any], error) // Get a handle to an existing workflow
@@ -317,12 +317,28 @@ func NewDBOSContext(inputConfig Config) (DBOSContext, error) {
317317
initExecutor.executorID = config.ExecutorID
318318

319319
initExecutor.applicationID = os.Getenv("DBOS__APPID")
320+
maxRetries := 5
321+
retryDelay := time.Second * 2
322+
323+
var systemDB systemDatabase
324+
325+
for attempt := 1; attempt <= maxRetries; attempt++ {
326+
systemDB, err = newSystemDatabase(initExecutor, config.DatabaseURL, initExecutor.logger)
327+
if err == nil {
328+
break
329+
}
330+
initExecutor.logger.Warn("Failed to connect to system DB (attempt %d/%d): %v", attempt, maxRetries, err)
331+
332+
if attempt < maxRetries {
333+
time.Sleep(retryDelay)
334+
retryDelay *= 2 // Exponential backoff
335+
}
336+
}
320337

321-
// Create the system database
322-
systemDB, err := newSystemDatabase(initExecutor, config.DatabaseURL, initExecutor.logger)
323338
if err != nil {
324-
return nil, newInitializationError(fmt.Sprintf("failed to create system database: %v", err))
339+
return nil, newInitializationError(fmt.Sprintf("failed to create system database after %d attempts: %v", maxRetries, err))
325340
}
341+
326342
initExecutor.systemDB = systemDB
327343
initExecutor.logger.Info("System database initialized")
328344

0 commit comments

Comments
 (0)