feat: make max_retries configurable per training env (#2025)

mikasenghaas · claude · web-flow · commit 8fa0dde0be16 · 2026-03-13T01:57:33.000-07:00
Move max_retries from EvalEnvConfig to EnvConfig so it applies to both
training and eval environments. The scheduler now looks up max_retries
per task from the env config instead of hardcoding 0.

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/prime_rl/configs/orchestrator.py b/src/prime_rl/configs/orchestrator.py
@@ -287,6 +287,13 @@ class EnvConfig(BaseConfig):
             ),
         ),
     ] = {}
+    max_retries: Annotated[
+        int,
+        Field(
+            ge=0,
+            description="Maximum number of times the environment will retry a failed rollout.",
+        ),
+    ] = 0
 
     @property
     def resolved_name(self) -> str:
@@ -324,17 +331,6 @@ class EvalEnvConfig(EnvConfig):
         ),
     ] = 0
 
-    # TODO: should live on the EnvConfig and also apply to training envs but
-    # this is hard right now because we use the vf.EnvGroup which treats all
-    # envs as one. for now training envs hardcode no retries, but we should
-    # probably treat them like environment groups long-term
-    max_retries: Annotated[
-        int,
-        Field(
-            description="Maximum number of times the environment will try to retry running a rollout.",
-        ),
-    ] = 0
-
 
 class ValConfig(BaseConfig):
     """Configures the validation of the model."""
diff --git a/src/prime_rl/orchestrator/scheduler.py b/src/prime_rl/orchestrator/scheduler.py
@@ -93,6 +93,7 @@ def __init__(
         # Inference pool - used for admin operations (adapter sync) and metrics
         self.inference_pool = inference_pool
 
+        self.max_retries_by_task = {env.resolved_name: env.max_retries for env in config.env}
         self.deferred_group_scoring_tasks = set(deferred_group_scoring_tasks or ())
         if self.deferred_group_scoring_tasks:
             task_list = ", ".join(sorted(self.deferred_group_scoring_tasks))
@@ -203,7 +204,7 @@ async def schedule_rollout(self, group_id: int):
                 example=group.example,
                 model_name=self.model_name,
                 sampling_args=self.sampling_args,
-                max_retries=0,  # TODO: make configurable
+                max_retries=self.max_retries_by_task.get(group.example["task"], 0),
             )
         )
         self.inflight_requests[run_rollout_task] = InflightRolloutInfo(