generated from fastai/nbdev_template
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Open
Labels
Description
Reproduction
Minimal Reproducible Code:
The issue occurs when running the script using accelerate launch with multiple processes (e.g., accelerate launch --num_processes=2 train.py).
import torch
from trl import GRPOConfig, GRPOTrainer
from trl.experimental.openenv import generate_rollout_completions
def rollout_func(prompts, trainer):
# Construct dummy messages for the experimental API
messages = [[{"role": "user", "content": p}] for p in prompts]
# π₯ CRASH HAPPENS HERE during multi-GPU execution
rollout_outputs = generate_rollout_completions(trainer, messages)[0]
# Dummy return
return {
"prompt_ids": [torch.tensor([1, 2])],
"completion_ids": [torch.tensor([3, 4])],
"advantages": [torch.tensor(0.0)]
}
def main():
config = GRPOConfig(
use_vllm=True,
vllm_mode="server", # Triggers the bug in distributed mode
vllm_server_base_url="http://localhost:8888",
output_dir="./outputs",
)
# Dummy dataset
dataset = [{"prompt": "Test prompt 1"}, {"prompt": "Test prompt 2"}]
trainer = GRPOTrainer(
model="Qwen/Qwen2.5-1.5B-Instruct", # Placeholder model
reward_funcs=[lambda prompts, completions, **kwargs: [0.0] * len(completions)],
train_dataset=dataset,
args=config,
rollout_func=rollout_func,
)
trainer.train()
if __name__ == "__main__":
main()outputs:
[rank1]: Traceback (most recent call last):
[rank1]: File "/fs1/private/user/wyt/VSCodeWorkplace/RLtrain/train.py", line 300, in <module>
[rank1]: main()
[rank1]: File "/fs1/private/user/wyt/VSCodeWorkplace/RLtrain/train.py", line 294, in main
[rank1]: trainer.train()
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/transformers/trainer.py", line 2325, in train
[rank1]: return inner_training_loop(
[rank1]: ^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/transformers/trainer.py", line 2674, in _inner_training_loop
[rank1]: tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/trl/trainer/grpo_trainer.py", line 1090, in training_step
[rank1]: output = super().training_step(model, inputs, num_items_in_batch)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/transformers/trainer.py", line 4014, in training_step
[rank1]: inputs = self._prepare_inputs(inputs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/trl/extras/profiling.py", line 202, in wrapper
[rank1]: return func(self, *args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/trl/trainer/grpo_trainer.py", line 1119, in _prepare_inputs
[rank1]: generation_batch = self._generate_and_score_completions(generation_batch)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/fs1/private/user/wyt/VSCodeWorkplace/RLtrain/StepwiseGRPOTrainer.py", line 52, in _generate_and_score_completions
[rank1]: rollout_output = self.rollout_func(prompts_duplicated, self)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/fs1/private/user/wyt/VSCodeWorkplace/RLtrain/train.py", line 220, in rollout_func
[rank1]: trajectory = rollout_once(
[rank1]: ^^^^^^^^^^^^^
[rank1]: File "/fs1/private/user/wyt/VSCodeWorkplace/RLtrain/train.py", line 110, in rollout_once
[rank1]: rollout_outputs = generate_rollout_completions(trainer, [messages])[0]
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/trl/experimental/openenv/utils.py", line 109, in generate_rollout_completions
[rank1]: return _generate_rollout_completions_server(trainer, prompts, generation_overrides, as_chat)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/home/wyt/anaconda3/envs/trll/lib/python3.12/site-packages/trl/experimental/openenv/utils.py", line 131, in _generate_rollout_completions_server
[rank1]: output = trainer.vllm_generation.vllm_client.chat(
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: AttributeError: 'VLLMGeneration' object has no attribute 'vllm_client'
System Info
- Platform: Linux-5.4.0-204-generic-x86_64-with-glibc2.31
- Python version: 3.12.12
- TRL version: 1.0.0.dev0
- PyTorch version: 2.9.1
- accelerator(s): NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB
- Transformers version: 4.57.6
- Accelerate version: 1.13.0
- Accelerate config:
- compute_environment: LOCAL_MACHINE
- distributed_type: DEEPSPEED
- mixed_precision: bf16
- use_cpu: False
- debug: True
- num_processes: 2
- machine_rank: 0
- num_machines: 1
- rdzv_backend: static
- same_network: True
- main_training_function: main
- enable_cpu_affinity: False
- deepspeed_config: {'gradient_accumulation_steps': 4, 'offload_optimizer_device': 'none', 'offload_param_device': 'none', 'zero3_init_flag': True, 'zero_stage': 2}
- downcast_bf16: no
- tpu_use_cluster: False
- tpu_use_sudo: False
- tpu_env: []
- dynamo_config: {'dynamo_backend': 'EAGER', 'dynamo_mode': 'default', 'dynamo_use_dynamic': False, 'dynamo_use_fullgraph': False, 'dynamo_use_regional_compilation': False}
- Datasets version: 4.6.1
- HF Hub version: 0.36.2
- bitsandbytes version: 0.49.2
- DeepSpeed version: 0.18.8
- Liger-Kernel version: not installed
- LLM-Blender version: not installed
- OpenAI version: 2.26.0
- PEFT version: 0.18.1
- vLLM version: 0.14.1
Checklist
- I have checked that my issue isn't already filed (see open issues)
- I have included my system information
- Any code provided is minimal, complete, and reproducible (more on MREs)
- Any code provided is properly formatted in code blocks, (no screenshot, more on code blocks)
- Any traceback provided is complete
Reactions are currently unavailable