Skip to content

Commit 15f8a8b

Browse files
authored
Update verl to v0.4.1, vllm to v0.9.2 (#125)
1 parent 90f4e91 commit 15f8a8b

File tree

13 files changed

+215
-164
lines changed

13 files changed

+215
-164
lines changed

docs/sphinx_doc/source/tutorial/faq.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ ray start --head
9696

9797
**A:** The following parameters may be helpful:
9898

99-
- For trainer, adjust `actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu` when `actor_rollout_ref.actor.use_dynamic_bsz=false`; adjust `actor_rollout_ref.actor.ppo_max_token_len_per_gpu` and `actor_rollout_ref.actor.ulysses_sequence_parallel_size` when `actor_rollout_ref.actor.use_dynamic_bsz=true`.
99+
- For trainer, adjust `actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu` when `actor_rollout_ref.actor.use_dynamic_bsz=false`; adjust `actor_rollout_ref.actor.ppo_max_token_len_per_gpu` and `actor_rollout_ref.actor.ulysses_sequence_parallel_size` when `actor_rollout_ref.actor.use_dynamic_bsz=true`. Setting `actor_rollout_ref.actor.entropy_from_logits_with_chunking=true` may also help.
100100
- For explorer, adjust `explorer.rollout_model.tensor_parallel_size`,
101101

102102

docs/sphinx_doc/source/tutorial/trinity_configs.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,11 @@ actor_rollout_ref:
443443
ppo_epochs: 1
444444
shuffle: False
445445
ulysses_sequence_parallel_size: 1 # sp size
446+
entropy_from_logits_with_chunking: false
447+
entropy_checkpointing: false
446448
checkpoint:
447-
contents: ['model', 'hf_model', 'optimizer', 'extra'] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
449+
load_contents: ['model', 'optimizer', 'extra']
450+
save_contents: ['model', 'optimizer', 'extra']
448451
optim:
449452
lr: 1e-6
450453
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
@@ -458,17 +461,22 @@ actor_rollout_ref:
458461
param_offload: False
459462
optimizer_offload: False
460463
fsdp_size: -1
464+
forward_prefetch: False
461465
ref:
462466
fsdp_config:
463467
param_offload: False
464468
wrap_policy:
465469
# transformer_layer_cls_to_wrap: None
466470
min_num_params: 0
471+
fsdp_size: -1
472+
forward_prefetch: False
467473
# log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
468474
log_prob_micro_batch_size_per_gpu: 8
469475
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
470476
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
471477
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
478+
entropy_from_logits_with_chunking: ${actor_rollout_ref.actor.entropy_from_logits_with_chunking}
479+
entropy_checkpointing: ${actor_rollout_ref.actor.entropy_checkpointing}
472480
473481
critic:
474482
strategy: fsdp
@@ -490,6 +498,7 @@ critic:
490498
# transformer_layer_cls_to_wrap: None
491499
min_num_params: 0
492500
fsdp_size: -1
501+
forward_prefetch: False
493502
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
494503
ppo_micro_batch_size_per_gpu: 8
495504
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
@@ -523,6 +532,9 @@ trainer:
523532
- `actor_rollout_ref.actor.use_dynamic_bsz`: Whether to reorganize the batch data, specifically to splice the shorter data to reduce the batch size in the actual training process.
524533
- `actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu`: Batch size for one GPU in one forward pass.
525534
- `actor_rollout_ref.actor.ulysses_sequence_parallel_size`: Ulysses sequence parallel size.
535+
- `actor_rollout_ref.actor.entropy_from_logits_with_chunking`: Calculate entropy with chunking to reduce memory peak.
536+
- `actor_rollout_ref.actor.entropy_checkpointing`: Recompute entropy.
537+
- `actor_rollout_ref.actor.checkpoint`: Contents to be loaded and saved. With 'hf_model' you can save whole model as hf format; now only use sharded model checkpoint to save space.
526538
- `actor_rollout_ref.actor.optim.lr`: Learning rate for actor model.
527539
- `actor_rollout_ref.actor.optim.lr_warmup_steps_ratio`: Ratio of warmup steps for learning rate.
528540
- `actor_rollout_ref.actor.optim.warmup_style`: Warmup style for learning rate.

examples/ppo_countdown/train_countdown.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ actor_rollout_ref:
1515
shuffle: False
1616
ulysses_sequence_parallel_size: 1 # sp size
1717
checkpoint:
18-
contents: ['model', 'hf_model', 'optimizer', 'extra'] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
18+
load_contents: ['model', 'hf_model', 'optimizer', 'extra'] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
19+
save_contents: ['model', 'hf_model', 'optimizer', 'extra']
1920
optim:
2021
lr: 1e-6
2122
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ classifiers = [
2121
]
2222
requires-python = ">=3.10"
2323
dependencies = [
24-
"verl==0.4.0",
24+
"verl==0.4.1",
2525
"ray[default]>=2.45.0",
26-
"vllm==0.9.1",
26+
"vllm==0.9.2",
2727
"tensordict==0.6.2",
2828
"wandb",
2929
"omegaconf",

tests/template/verl_config.yaml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ actor_rollout_ref:
1515
ppo_epochs: 1
1616
shuffle: False
1717
ulysses_sequence_parallel_size: 1 # sp size
18+
entropy_from_logits_with_chunking: false
19+
entropy_checkpointing: false
1820
checkpoint:
19-
contents: ["model", "optimizer", "extra"] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
21+
load_contents: ['model', 'optimizer', 'extra'] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
22+
save_contents: ['model', 'optimizer', 'extra']
2023
optim:
2124
lr: 1e-6
2225
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
@@ -30,16 +33,21 @@ actor_rollout_ref:
3033
param_offload: False
3134
optimizer_offload: False
3235
fsdp_size: -1
36+
forward_prefetch: False
3337
ref:
3438
fsdp_config:
3539
param_offload: False
3640
wrap_policy:
3741
# transformer_layer_cls_to_wrap: None
3842
min_num_params: 0
43+
fsdp_size: -1
44+
forward_prefetch: False
3945
log_prob_micro_batch_size_per_gpu: 1
4046
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
4147
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
4248
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
49+
entropy_from_logits_with_chunking: ${actor_rollout_ref.actor.entropy_from_logits_with_chunking}
50+
entropy_checkpointing: ${actor_rollout_ref.actor.entropy_checkpointing}
4351

4452
critic:
4553
strategy: fsdp
@@ -61,6 +69,7 @@ critic:
6169
# transformer_layer_cls_to_wrap: None
6270
min_num_params: 0
6371
fsdp_size: -1
72+
forward_prefetch: False
6473
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
6574
ppo_micro_batch_size_per_gpu: 1
6675
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
@@ -73,7 +82,8 @@ critic:
7382
grad_clip: 1.0
7483
cliprange_value: 0.5
7584
checkpoint:
76-
contents: ["model", "optimizer", "extra"] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
85+
load_contents: ['model', 'optimizer', 'extra'] # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
86+
save_contents: ['model', 'optimizer', 'extra']
7787

7888
trainer:
7989
balance_batch: True

trinity/buffer/reader/file_reader.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def __init__(self, meta: StorageConfig, config: BufferConfig):
111111
self.response_key = meta.format.response_key
112112
self.read_batch_size = config.batch_size
113113
self.dataset = _HFBatchReader(
114-
load_dataset(meta.path, name=subset_name, split=self.split, trust_remote_code=True),
114+
load_dataset(meta.path, name=subset_name, split=self.split),
115115
name=meta.name,
116116
default_batch_size=self.read_batch_size,
117117
total_epochs=meta.total_epochs,
@@ -193,7 +193,7 @@ def __init__(self, meta: StorageConfig, config: BufferConfig):
193193
self.rejected_key = meta.format.rejected_key
194194
self.read_batch_size = config.batch_size
195195
self.dataset = _HFBatchReader(
196-
load_dataset(meta.path, name=subset_name, split=self.split, trust_remote_code=True),
196+
load_dataset(meta.path, name=subset_name, split=self.split),
197197
name=meta.name,
198198
default_batch_size=self.read_batch_size,
199199
total_epochs=meta.total_epochs,
@@ -272,7 +272,7 @@ def __init__(self, meta: StorageConfig, config: BufferConfig):
272272
datasets.disable_caching()
273273
self.read_batch_size = config.batch_size
274274
self.dataset = _HFBatchReader(
275-
load_dataset(meta.path, name=subset_name, split=self.split, trust_remote_code=True),
275+
load_dataset(meta.path, name=subset_name, split=self.split),
276276
name=meta.name,
277277
default_batch_size=self.read_batch_size,
278278
total_epochs=self.meta.total_epochs if meta.task_type == TaskType.EXPLORE else 1,
@@ -328,9 +328,7 @@ def read(
328328
class RawDataReader(BufferReader):
329329
def __init__(self, meta: StorageConfig, config: Optional[BufferConfig]):
330330
self.returned = False
331-
self.dataset = load_dataset(
332-
meta.path, name=meta.subset_name, split=meta.split, trust_remote_code=True
333-
)
331+
self.dataset = load_dataset(meta.path, name=meta.subset_name, split=meta.split)
334332

335333
def __len__(self):
336334
return len(self.dataset)

trinity/common/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ class ModelConfig:
177177
critic_model_path: str = ""
178178
max_prompt_tokens: Optional[int] = None
179179
max_response_tokens: Optional[int] = None
180+
custom_chat_template: Optional[str] = None
180181

181182

182183
@dataclass

trinity/common/verl_config.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class ActorModel:
2424
enable_gradient_checkpointing: bool = True
2525
use_remove_padding: bool = False
2626
use_fused_kernels: bool = False
27+
custom_chat_template: Optional[str] = None
2728

2829

2930
@dataclass
@@ -49,11 +50,13 @@ class FSDPConfig:
4950
param_offload: bool = False
5051
optimizer_offload: bool = False
5152
fsdp_size: int = -1
53+
forward_prefetch: bool = False
5254

5355

5456
@dataclass
5557
class Checkpoint:
56-
contents: List[str] = field(default_factory=lambda: ["model", "hf_model", "optimizer", "extra"])
58+
load_contents: List[str] = field(default_factory=lambda: ["model", "optimizer", "extra"])
59+
save_contents: List[str] = field(default_factory=lambda: ["model", "optimizer", "extra"])
5760

5861

5962
@dataclass
@@ -70,6 +73,8 @@ class Actor:
7073
ppo_epochs: int = 1
7174
shuffle: bool = False
7275
ulysses_sequence_parallel_size: int = 1
76+
entropy_from_logits_with_chunking: bool = False
77+
entropy_checkpointing: bool = False
7378
checkpoint: Checkpoint = field(default_factory=Checkpoint)
7479
optim: Optim = field(default_factory=Optim)
7580
fsdp_config: FSDPConfig = field(default_factory=FSDPConfig)
@@ -90,6 +95,11 @@ class Ref:
9095
log_prob_use_dynamic_bsz: bool = False
9196
log_prob_max_token_len_per_gpu: int = 0
9297
ulysses_sequence_parallel_size: int = 1
98+
entropy_from_logits_with_chunking: bool = False
99+
entropy_checkpointing: bool = False
100+
checkpoint: Checkpoint = field(
101+
default_factory=lambda: Checkpoint(load_contents=["model"], save_contents=["model"])
102+
)
93103

94104

95105
@dataclass
@@ -309,6 +319,7 @@ def synchronize_config(self, config: Config) -> None: # noqa: C901
309319

310320
# Actor / Critic config
311321
self.actor_rollout_ref.model.path = config.model.model_path
322+
self.actor_rollout_ref.model.custom_chat_template = config.model.custom_chat_template
312323
self.critic.model.path = config.model.critic_model_path
313324
self.critic.model.tokenizer_path = config.model.critic_model_path
314325
self.actor_rollout_ref.actor.ppo_mini_batch_size = (

trinity/manager/config_manager.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def _expert_verl_training_part(self):
257257

258258
self.get_configs("ppo_epochs", "training_strategy", "resume_mode")
259259

260-
self.get_configs("param_offload", "optimizer_offload")
260+
self.get_configs("param_offload", "optimizer_offload", "forward_prefetch")
261261
self.get_configs("resume_from_path")
262262

263263
with st.expander("Advanced Config"):
@@ -275,6 +275,8 @@ def _expert_verl_actor_part(self):
275275
"actor_ppo_micro_batch_size_per_gpu",
276276
"ref_log_prob_micro_batch_size_per_gpu",
277277
"actor_ulysses_sequence_parallel_size",
278+
"actor_entropy_from_logits_with_chunking",
279+
"actor_entropy_checkpointing",
278280
)
279281

280282
self.get_configs("actor_lr", "actor_warmup_style", "actor_lr_warmup_steps_ratio")
@@ -335,6 +337,7 @@ def _generate_verl_config(self):
335337
"param_offload": st.session_state["param_offload"],
336338
"optimizer_offload": st.session_state["optimizer_offload"],
337339
"fsdp_size": -1,
340+
"forward_prefetch": st.session_state["forward_prefetch"],
338341
}
339342
else:
340343
fsdp_config = {}
@@ -363,6 +366,10 @@ def _generate_verl_config(self):
363366
"ulysses_sequence_parallel_size": st.session_state[
364367
"actor_ulysses_sequence_parallel_size"
365368
],
369+
"entropy_from_logits_with_chunking": st.session_state[
370+
"actor_entropy_from_logits_with_chunking"
371+
],
372+
"entropy_checkpointing": st.session_state["actor_entropy_checkpointing"],
366373
"checkpoint": {"contents": st.session_state["actor_checkpoint"]},
367374
"optim": {
368375
"lr": st.session_state["actor_lr"],
@@ -386,6 +393,10 @@ def _generate_verl_config(self):
386393
"ulysses_sequence_parallel_size": st.session_state[
387394
"actor_ulysses_sequence_parallel_size"
388395
],
396+
"entropy_from_logits_with_chunking": st.session_state[
397+
"actor_entropy_from_logits_with_chunking"
398+
],
399+
"entropy_checkpointing": st.session_state["actor_entropy_checkpointing"],
389400
},
390401
},
391402
"critic": {},

trinity/manager/config_registry/trainer_config_manager.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ def set_optimizer_offload(**kwargs):
114114
st.checkbox("FSDP Optimizer Offload", **kwargs)
115115

116116

117+
@CONFIG_GENERATORS.register_config(default_value=False, visible=use_fsdp)
118+
def set_forward_prefetch(**kwargs):
119+
st.checkbox("FSDP Forward Prefetch", **kwargs)
120+
121+
117122
@CONFIG_GENERATORS.register_config(default_value="auto")
118123
def set_resume_mode(**kwargs):
119124
st.selectbox("Resume Mode", ["disable", "auto", "resume_path"], **kwargs)
@@ -235,6 +240,16 @@ def set_actor_ulysses_sequence_parallel_size(**kwargs):
235240
)
236241

237242

243+
@CONFIG_GENERATORS.register_config(default_value=False)
244+
def set_actor_entropy_from_logits_with_chunking(**kwargs):
245+
st.checkbox("Entropy from Logits with Chunking", **kwargs)
246+
247+
248+
@CONFIG_GENERATORS.register_config(default_value=False)
249+
def set_actor_entropy_checkpointing(**kwargs):
250+
st.checkbox("Entropy Checkpointing", **kwargs)
251+
252+
238253
@CONFIG_GENERATORS.register_config(default_value=1e-6)
239254
def set_actor_lr(**kwargs):
240255
st.number_input(

0 commit comments

Comments
 (0)