fix precommit

thwu1 · thwu1 · commit 8c9414031e5f · 2026-02-11T13:43:22.000-08:00
diff --git a/rllm/experimental/fully_async/client.py b/rllm/experimental/fully_async/client.py
@@ -165,4 +165,4 @@ async def chat_completion(
         return message, output
 
     async def close(self):
-        await self.client.aclose()
+        await self.client.aclose()
diff --git a/rllm/experimental/fully_async/fully_async_trainer.py b/rllm/experimental/fully_async/fully_async_trainer.py
@@ -20,24 +20,24 @@
 import ray
 from omegaconf import OmegaConf
 from tqdm import tqdm
-
-from rllm.experimental.fully_async.message_queue import MessageQueueClient
-from rllm.experimental.fully_async.metric_utils import MetricsAggregator, ValidateMetrics
-from rllm.experimental.fully_async.utils import (
-    assemble_batch_from_trajectory_group_ls,
-    compute_grpo_outcome_advantage,
-    reduce_metrics_with_flatten,
-)
 from verl import DataProto
 from verl.experimental.fully_async_policy.ray_trainer import FullyAsyncRayPPOTrainer
 from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup
 from verl.trainer.ppo import core_algos
 from verl.trainer.ppo.core_algos import agg_loss
 from verl.trainer.ppo.ray_trainer import ResourcePoolManager, apply_kl_penalty, compute_response_mask
-from verl.trainer.ppo.utils import Role, WorkerType, need_critic, need_reference_policy, need_reward_model
+from verl.trainer.ppo.utils import Role, WorkerType, need_critic, need_reference_policy
 from verl.utils.checkpoint.checkpoint_manager import find_latest_ckpt_path, should_save_ckpt_esi
 from verl.utils.debug import marked_timer
 
+from rllm.experimental.fully_async.message_queue import MessageQueueClient
+from rllm.experimental.fully_async.metric_utils import MetricsAggregator, ValidateMetrics
+from rllm.experimental.fully_async.utils import (
+    assemble_batch_from_trajectory_group_ls,
+    compute_grpo_outcome_advantage,
+    reduce_metrics_with_flatten,
+)
+
 
 @ray.remote(num_cpus=10)
 class FullyAsyncTrainer(FullyAsyncRayPPOTrainer):
@@ -637,4 +637,4 @@ def compute_old_log_prob(batch):
 
             actor_output_metrics = reduce_metrics_with_flatten(actor_output.meta_info["metrics"])
             metrics.update(actor_output_metrics)
-        return batch, {}
+        return batch, {}
diff --git a/rllm/experimental/fully_async/inference_manager.py b/rllm/experimental/fully_async/inference_manager.py
@@ -12,11 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import asyncio
 import subprocess
 
 import ray
-
 from verl.experimental.fully_async_policy.ray_trainer import FullyAsyncRayPPOTrainer
 from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup
 from verl.trainer.ppo.ray_trainer import ResourcePoolManager
@@ -163,4 +161,4 @@ def launch_router(self, port: int = 30000):
         return self.router_url
 
     async def clear_kv_cache(self):
-        await self.async_rollout_manager.clear_kv_cache()
+        await self.async_rollout_manager.clear_kv_cache()
diff --git a/rllm/experimental/fully_async/message_queue.py b/rllm/experimental/fully_async/message_queue.py
@@ -215,4 +215,4 @@ def put_sample_sync(self, sample: Any) -> bool:
 
     def get_sample_sync(self) -> Any | None:
         """Get single sample from queue (sync - deprecated, use get_sample instead)"""
-        return ray.get(self.queue_actor.get_sample.remote())
+        return ray.get(self.queue_actor.get_sample.remote())
diff --git a/rllm/experimental/fully_async/message_utils.py b/rllm/experimental/fully_async/message_utils.py
@@ -5,7 +5,6 @@
 import json
 import re
 
-
 # Regex for thinking content: <think>...</think>
 THINK_PATTERN = re.compile(r"<think>(.*?)</think>", re.DOTALL)
 
@@ -73,4 +72,4 @@ def build_tool_message(tool_name: str, tool_output: str, tool_call_id: str | Non
     message = {"role": "tool", "name": tool_name, "content": tool_output}
     if tool_call_id:
         message["tool_call_id"] = tool_call_id
-    return message
+    return message
diff --git a/rllm/experimental/fully_async/metric_utils.py b/rllm/experimental/fully_async/metric_utils.py
@@ -22,7 +22,7 @@
 import time
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Any, Optional
+from typing import Any
 
 import numpy as np
 import torch
@@ -33,9 +33,9 @@ class ValidateMetrics:
     """Metrics for validation"""
 
     timing_raw: dict[str, Any]
-    metrics: Optional[dict[str, Any]] = None
-    global_steps: Optional[int] = None
-    param_version: Optional[int] = None
+    metrics: dict[str, Any] | None = None
+    global_steps: int | None = None
+    param_version: int | None = None
 
 
 class MetricsAggregator:
@@ -225,4 +225,4 @@ def get_current_stats(self) -> dict[str, Any]:
             "metric_count": len(self.metric_values),
             "total_samples": sum(self.sample_counts),
             "metric_names": list(self.metric_values.keys()),
-        }
+        }
diff --git a/rllm/experimental/fully_async/param_sync.py b/rllm/experimental/fully_async/param_sync.py
@@ -17,7 +17,6 @@
 
 import ray
 from ray.util.collective import collective
-
 from verl.utils.device import get_nccl_backend
 
 logger = logging.getLogger(__name__)
@@ -152,7 +151,7 @@ def sync_weights(self, version, validate=False, global_steps=0):
         # Update staleness tracking - subtracts consumed samples from enqueued count
         # This must be called AFTER resume so continue_event can be set if there's capacity
         ray.get(self.rollout_executor.update_staleness_tracking.remote())
-        print(f"[ParameterSynchronizer] update_staleness_tracking completed", flush=True)
+        print("[ParameterSynchronizer] update_staleness_tracking completed", flush=True)
 
         pause_time = time.time()
 
@@ -197,4 +196,4 @@ def rollout_executor_save_checkpoint(self, local_global_step_folder: str):
         if not hasattr(self, "rollout_executor") or self.rollout_executor is None:
             raise RuntimeError("rollout_executor is not set; call set_rollout_executor() before saving checkpoint")
         print(f"[ParameterSynchronizer] Triggering RolloutExecutor checkpoint save at {local_global_step_folder} ...")
-        return ray.get(self.rollout_executor.save_checkpoint.remote(local_global_step_folder))
+        return ray.get(self.rollout_executor.save_checkpoint.remote(local_global_step_folder))
diff --git a/rllm/experimental/fully_async/protocol.py b/rllm/experimental/fully_async/protocol.py
@@ -1,5 +1,4 @@
-from dataclasses import dataclass, field
-from typing import Optional
+from dataclasses import dataclass
 
 
 @dataclass
@@ -175,4 +174,4 @@ def merge(self):
 
 @dataclass
 class TrajectoryGroup:
-    trajectories: list[Trajectory]
+    trajectories: list[Trajectory]
diff --git a/rllm/experimental/fully_async/runner.py b/rllm/experimental/fully_async/runner.py
@@ -22,18 +22,16 @@
 
 import ray
 from omegaconf import OmegaConf
+from verl.experimental.fully_async_policy.fully_async_main import create_resource_pool_manager, create_role_worker_mapping
+from verl.trainer.ppo.utils import Role
+from verl.utils.fs import copy_to_local
 
-from rllm.experimental.fully_async.inference_manager import InferenceManager
 from rllm.experimental.fully_async.fully_async_trainer import FullyAsyncTrainer
+from rllm.experimental.fully_async.inference_manager import InferenceManager
 from rllm.experimental.fully_async.message_queue import MessageQueue, MessageQueueClient
 from rllm.experimental.fully_async.param_sync import ParameterSynchronizer
-from rllm.experimental.fully_async.protocol import Trajectory
 from rllm.experimental.fully_async.rollout_executor import RolloutExecutor
 from rllm.experimental.fully_async.utils import calculate_max_concurrency
-from verl.experimental.fully_async_policy.fully_async_main import create_resource_pool_manager, create_role_worker_mapping
-from verl.trainer.ppo.ray_trainer import ResourcePoolManager
-from verl.trainer.ppo.utils import Role, need_reference_policy
-from verl.utils.fs import copy_to_local
 
 
 def create_task_runner_with_rollout_fn(rollout_fn, val_rollout_fn=None):
@@ -294,4 +292,4 @@ def train(self):
         task_runner_class = create_task_runner_with_rollout_fn(self.rollout_fn, self.val_rollout_fn)
         run_ppo(self.config, task_runner_class=task_runner_class)
 
-        print(f"total time: {time.time() - start_time:.2f} seconds")
+        print(f"total time: {time.time() - start_time:.2f} seconds")