2020import ray
2121from omegaconf import OmegaConf
2222from tqdm import tqdm
23-
24- from rllm .experimental .fully_async .message_queue import MessageQueueClient
25- from rllm .experimental .fully_async .metric_utils import MetricsAggregator , ValidateMetrics
26- from rllm .experimental .fully_async .utils import (
27- assemble_batch_from_trajectory_group_ls ,
28- compute_grpo_outcome_advantage ,
29- reduce_metrics_with_flatten ,
30- )
3123from verl import DataProto
3224from verl .experimental .fully_async_policy .ray_trainer import FullyAsyncRayPPOTrainer
3325from verl .single_controller .ray import RayClassWithInitArgs , RayWorkerGroup
3426from verl .trainer .ppo import core_algos
3527from verl .trainer .ppo .core_algos import agg_loss
3628from verl .trainer .ppo .ray_trainer import ResourcePoolManager , apply_kl_penalty , compute_response_mask
37- from verl .trainer .ppo .utils import Role , WorkerType , need_critic , need_reference_policy , need_reward_model
29+ from verl .trainer .ppo .utils import Role , WorkerType , need_critic , need_reference_policy
3830from verl .utils .checkpoint .checkpoint_manager import find_latest_ckpt_path , should_save_ckpt_esi
3931from verl .utils .debug import marked_timer
4032
33+ from rllm .experimental .fully_async .message_queue import MessageQueueClient
34+ from rllm .experimental .fully_async .metric_utils import MetricsAggregator , ValidateMetrics
35+ from rllm .experimental .fully_async .utils import (
36+ assemble_batch_from_trajectory_group_ls ,
37+ compute_grpo_outcome_advantage ,
38+ reduce_metrics_with_flatten ,
39+ )
40+
4141
4242@ray .remote (num_cpus = 10 )
4343class FullyAsyncTrainer (FullyAsyncRayPPOTrainer ):
@@ -637,4 +637,4 @@ def compute_old_log_prob(batch):
637637
638638 actor_output_metrics = reduce_metrics_with_flatten (actor_output .meta_info ["metrics" ])
639639 metrics .update (actor_output_metrics )
640- return batch , {}
640+ return batch , {}
0 commit comments