Skip to content

Commit c96f2bc

Browse files
authored
fix(worker): avoid worker cleanup on failure restart, remove exp. backoff (#483)
* avoid worker cleanup on failure restart, remove exp. backoff
1 parent be5ea87 commit c96f2bc

File tree

1 file changed

+3
-22
lines changed

1 file changed

+3
-22
lines changed

crates/worker/src/docker/service.rs

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@ pub struct DockerService {
2727
}
2828

2929
const TASK_PREFIX: &str = "prime-task";
30-
const INITIAL_BACKOFF_SECONDS: i64 = 5; // Start with 5 seconds
31-
const MAX_BACKOFF_SECONDS: i64 = 300; // Cap at 5 minutes
32-
const MAX_CONSECUTIVE_FAILURES: i64 = 100;
30+
const RESTART_INTERVAL_SECONDS: i64 = 10;
3331

3432
impl DockerService {
3533
pub fn new(
@@ -158,22 +156,14 @@ impl DockerService {
158156
};
159157
let elapsed = Utc::now().signed_duration_since(last_started_time).num_seconds();
160158

161-
// Calculate backoff time using exponential backoff
162-
let backoff_seconds = if consecutive_failures > 0 {
163-
// Clamp consecutive_failures to prevent overflow
164-
let clamped_failures = consecutive_failures.min(MAX_CONSECUTIVE_FAILURES);
165-
let backoff = INITIAL_BACKOFF_SECONDS.saturating_mul(2_i64.saturating_pow(clamped_failures as u32 - 1));
166-
backoff.min(MAX_BACKOFF_SECONDS)
167-
} else {
168-
INITIAL_BACKOFF_SECONDS
169-
};
159+
let backoff_seconds = RESTART_INTERVAL_SECONDS;
170160

171161
// wait for backoff period before starting a new container
172162
if elapsed < backoff_seconds {
173163
Console::info("DockerService", &format!("Waiting before starting new container ({}s remaining)...", backoff_seconds - elapsed));
174164
} else {
175165
if consecutive_failures > 0 {
176-
Console::info("DockerService", &format!("Starting new container after {} failures...", consecutive_failures));
166+
Console::info("DockerService", &format!("Starting new container after {} failures ({}s interval)...", consecutive_failures, RESTART_INTERVAL_SECONDS));
177167
} else {
178168
Console::info("DockerService", "Starting new container...");
179169
}
@@ -305,15 +295,6 @@ impl DockerService {
305295
consecutive_failures += 1;
306296
Console::info("DockerService", &format!("Task failed (attempt {}), waiting with exponential backoff before restart", consecutive_failures));
307297

308-
let terminate_manager_clone = terminate_manager.clone();
309-
let handle = tokio::spawn(async move {
310-
let termination = terminate_manager_clone.remove_container(&container_status.id).await;
311-
match termination {
312-
Ok(_) => Console::info("DockerService", "Container terminated successfully"),
313-
Err(e) => log::error!("Error terminating container: {}", e)
314-
}
315-
});
316-
terminating_container_tasks.lock().await.push(handle);
317298
} else if task_state_live == TaskState::RUNNING {
318299
// Reset failure counter when container runs successfully
319300
consecutive_failures = 0;

0 commit comments

Comments
 (0)