Skip to content

Commit 9fcba4d

Browse files
峯回daihao
authored andcommitted
PullRequest: 933 新增训练配置并更新扩展与命令行参数
Merge branch test/tmp1030 of [email protected]:inclusionAI/AReaL.git into asystem/gh https://code.alipay.com/inclusionAI/AReaL/pull_requests/933 Reviewed-by: 楚财 <[email protected]> * update cli args * update extension * add trainer * .
1 parent 87b1dba commit 9fcba4d

File tree

15 files changed

+1014
-100
lines changed

15 files changed

+1014
-100
lines changed

areal/api/cli_args.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,15 @@ class TrainEngineConfig:
431431
default="lora",
432432
metadata={"help": "peft method type. Only LoRA is supported for now."},
433433
)
434-
scheduling_spec: SchedulingSpec = field(
435-
default_factory=lambda: SchedulingSpec(
436-
cmd="python -m areal.scheduler.rpc.rpc_server"
434+
435+
scheduling_specs: tuple[SchedulingSpec, SchedulingSpec] = field(
436+
default_factory=lambda: (
437+
SchedulingSpec(cmd="python -m areal.scheduler.rpc.rpc_server"),
438+
SchedulingSpec(cmd="python -m areal.scheduler.rpc.rpc_server"),
437439
),
438-
metadata={"help": "train engine schedule specs"},
440+
metadata={
441+
"help": "train engine schedule specs, first is worker, second is engine"
442+
},
439443
)
440444
scheduling_strategy: SchedulingStrategy = field(default_factory=SchedulingStrategy)
441445

@@ -908,11 +912,14 @@ class InferenceEngineConfig:
908912
"help": "The grace period after calling /pause_generation. Wait until all requests have been dropped."
909913
},
910914
)
911-
scheduling_spec: SchedulingSpec = field(
912-
default_factory=lambda: SchedulingSpec(
913-
cmd="python -m areal.scheduler.rpc.rpc_server"
915+
scheduling_specs: tuple[SchedulingSpec, SchedulingSpec] = field(
916+
default_factory=lambda: (
917+
SchedulingSpec(cmd="python -m areal.scheduler.rpc.rpc_server"),
918+
SchedulingSpec(cmd="python -m areal.scheduler.rpc.rpc_server"),
914919
),
915-
metadata={"help": "inference engine schedule specs"},
920+
metadata={
921+
"help": "inference engine schedule specs, first is worker, second is engine"
922+
},
916923
)
917924
scheduling_strategy: SchedulingStrategy = field(default_factory=SchedulingStrategy)
918925

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
experiment_name: mini-model
2+
trial_name: on-policy
3+
allocation_mode: "sglang:d8t4p1+d8t1p4"
4+
seed: 42
5+
total_train_epochs: 10
6+
total_train_steps: 1145
7+
weight_update_type: "astate"
8+
enable_colocate_mode: true
9+
10+
storage_prefix: "/storage/openpsi"
11+
12+
tokenizer_path: "/storage/xukuan.xk/repos/antnlp/personal/pretrained_models/ring-moe-v2-sft-general700w_longcot200w_0725/hf_ckpts/28869_kz"
13+
train_dataset:
14+
path: "/storage/dataset/nlp/areal/moe_lite_math_0527_merge_train_areal.jsonl"
15+
shuffle: true
16+
max_length: 1024
17+
batch_size: 64
18+
type: "rl"
19+
20+
scheduler:
21+
endpoint: "http://asystem-scheduler.asystem-my001-swift.svc.sigma-my001.ml01.sgp-ml.local:8081"
22+
functioncall_service_domain: "http://110.75.237.19:8080"
23+
reward_model_path: "/storage/jiulin.jl/Skywork-Reward-V2-Qwen3-8B"
24+
reward_model_service_url: "http://reward-model-service.asystem-test.svc.sigma-my001.ml01.sgp-ml.local:30000/classify"
25+
26+
stats_logger:
27+
experiment_name: ${experiment_name}
28+
trial_name: ${trial_name}
29+
fileroot: "${storage_prefix}/experiments"
30+
wandb:
31+
mode: "online"
32+
wandb_base_url: "https://slurm.alipay.com"
33+
wandb_api_key: "local-3bca3d5f00a980f3075b3e8ff2e16adc4ef43ffe"
34+
tensorboard:
35+
path: "/home/admin/logs/tfevent/asystem"
36+
37+
gconfig:
38+
n_samples: 8
39+
min_new_tokens: 0
40+
# NOTE!!
41+
# Due to the limitations of sglang, max_new_tokens + max_prompt_len must be less than the model's context_len (set in the model's config.json),
42+
# and cannot be equal to it. See https://github.com/sgl-project/sglang/blob/f98366604b23e331422bf3c62d4e7410ae4fab87/python/sglang/srt/managers/tokenizer_manager.py#L638C9-L638C11
43+
max_new_tokens: 15360
44+
greedy: false
45+
temperature: 1.0
46+
top_k: 1000000
47+
top_p: 1.0
48+
49+
rollout:
50+
experiment_name: ${experiment_name}
51+
trial_name: ${trial_name}
52+
model_path: ${tokenizer_path}
53+
storage_path: "${storage_prefix}/checkpoints"
54+
seed: ${seed}
55+
engine_config:
56+
attention_backend: "triton"
57+
disable_custom_all_reduce: true
58+
enable_metrics: true
59+
mem_fraction_static: 0.7
60+
triton_attention_num_kv_splits: 16
61+
tokenizer_mode: "auto"
62+
load_format: "auto"
63+
is_embedding: false
64+
kv_cache_dtype: "auto"
65+
max_prefill_tokens: 32768
66+
schedule_policy: "fcfs"
67+
schedule_conservativeness: 1.0
68+
disable_cuda_graph: false
69+
disable_radix_cache: true
70+
disable_cuda_graph_padding: false
71+
enable_nccl_nvls: false
72+
disable_outlines_disk_cache: false
73+
disable_overlap_schedule: false
74+
enable_mixed_chunk: false
75+
enable_dp_attention: false
76+
enable_ep_moe: false
77+
enable_torch_compile: false
78+
torch_compile_max_bs: 32
79+
triton_attention_reduce_in_fp32: false
80+
cuda_graph_bs: [ 1, 2, 4, 8, 16, 32, 64, 128, 256, 384, 512 ]
81+
num_continuous_decode_steps: 1
82+
enable_nan_detection: false
83+
allow_auto_truncate: false
84+
enable_p2p_check: false
85+
enable_memory_saver: false
86+
chunked_prefill_size: null
87+
context_length: null
88+
cpu_offload_gb: 0
89+
dp_size: 1
90+
dtype: "auto"
91+
sampling_backend: "pytorch"
92+
log_level: "info"
93+
log_level_http: null
94+
log_requests: false
95+
log_requests_level: 0
96+
max_running_requests: null
97+
show_time_cost: false
98+
scheduling_specs:
99+
- type: worker
100+
image: /storage/openpsi/images/areal-25.01-sglang-bf16-editable-metrics-xccl-20250716.sif
101+
- type: engine
102+
gpu: 1
103+
# you can customize environment variables here
104+
env_vars:
105+
# if use ling max v2, need to specify USE_MAX_V2 = 1
106+
USE_MAX_V2: 1
107+
image: /storage/openpsi/images/hybrid-engine-13680179-20250923154343.sif
108+
109+
actor: &actor_ref
110+
experiment_name: ${experiment_name}
111+
trial_name: ${trial_name}
112+
hybrid_engine:
113+
experiment_name: ${experiment_name}
114+
trial_name: ${trial_name}
115+
group_size: ${gconfig.n_samples}
116+
train_bs_n_seqs: ${train_dataset.batch_size}
117+
max_tokens_per_mb: 16384
118+
wrap_policy:
119+
n_minibatches: 1
120+
kl_ctl: 0.0
121+
recompute_logp: false
122+
adv_norm: false
123+
discount: 1.0
124+
gae_lambda: 1.0
125+
eps_clip: 0.2
126+
clip_ratio_low: 0.2
127+
clip_ratio_high: 0.28
128+
c_clip: null
129+
value_eps_clip: 0.2
130+
max_reward_clip: 5.0
131+
disable_value: true
132+
early_stop_kl: null
133+
early_stop_imp_ratio: null
134+
adaptive_kl_ctl: false
135+
adaptive_kl_target: 6
136+
adaptive_kl_horizon: 10000
137+
enable_save: true
138+
value_norm: true
139+
value_norm_type: "exp"
140+
value_norm_beta: 0.99995
141+
value_norm_eps: 1e-5
142+
group_size: 8
143+
generation_size: null
144+
mask_no_eos_with_zero: false
145+
group_adv_norm: true
146+
mask_too_long: false
147+
use_dense_reward: false
148+
reward_delta: true
149+
token_normalize_scope: "global"
150+
sample_reuse: 1
151+
temperature: 1.0
152+
reward_output_scaling: 0.5
153+
reward_output_bias: -1.0
154+
remote_megatron_config:
155+
adam_beta1: 0.9
156+
adam_beta2: 0.999
157+
adam_eps: 1.0e-08
158+
adaptive_layer_bias_update_strategy: sqrt
159+
add_bias_linear: false
160+
add_position_embedding: true
161+
apply_rope_fusion: true
162+
async_save: false
163+
attention_backend: "flash"
164+
attention_dropout: 0.0
165+
attention_softmax_in_fp32: true
166+
auto_detect_ckpt_format: true
167+
bf16: true
168+
clip_grad: 1.0
169+
context_parallel_size: 1
170+
cp_comm_type: "p2p"
171+
cross_entropy_loss_fusion: false
172+
distributed_backend: "nccl"
173+
distributed_timeout_minutes: 600
174+
enable_one_logger: false
175+
expert_model_parallel_size: 8
176+
ffn_hidden_size: 5120
177+
first_k_dense_replace: 1
178+
global_batch_size: 512
179+
gradient_accumulation_fusion: true
180+
group_query_attention: true
181+
hidden_dropout: 0.0
182+
hidden_size: 2048
183+
init_method_std: 0.006
184+
load: /storage/xukuan.xk/repos/antnlp/personal/pretrained_models/ring-moe-v2-sft-general700w_longcot200w_0725/iter_0028869
185+
log_loss_scale_to_tensorboard: false
186+
log_num_zeros_in_grad: true
187+
log_params_norm: true
188+
log_throughput: true
189+
log_timers_to_tensorboard: true
190+
log_validation_ppl_to_tensorboard: true
191+
lr: 3.0e-06
192+
lr_decay_style: constant
193+
lr_warmup_iters: 10
194+
make_vocab_size_divisible_by: 128
195+
masked_softmax_fusion: true
196+
max_position_embeddings: 16384
197+
micro_batch_size: 1
198+
moe_ffn_hidden_size: 512
199+
moe_grouped_gemm: true
200+
moe_layer_freq:
201+
- 0
202+
- 1
203+
- 1
204+
- 1
205+
- 1
206+
- 1
207+
- 1
208+
- 1
209+
- 1
210+
- 1
211+
- 1
212+
- 1
213+
- 1
214+
- 1
215+
- 1
216+
- 1
217+
- 1
218+
- 1
219+
- 1
220+
- 1
221+
moe_per_layer_logging: true
222+
moe_permute_fusion: true
223+
moe_router_bias_update_rate: 0.00
224+
moe_router_dtype: fp32
225+
moe_router_enable_expert_bias: true
226+
moe_router_group_topk: 4
227+
moe_router_num_groups: 8
228+
moe_router_score_function: sigmoid
229+
moe_router_topk: 8
230+
moe_router_topk_scaling_factor: 2.5
231+
moe_shared_expert_intermediate_size: 512
232+
moe_shared_expert_overlap: true
233+
moe_token_dispatcher_type: alltoall
234+
norm_epsilon: 1.0e-06
235+
normalization: "RMSNorm"
236+
num_attention_heads: 16
237+
num_experts: 256
238+
num_layers: 20
239+
num_query_groups: 4
240+
optim_normhead_fwd_alltoall: true
241+
optimizer: "adam"
242+
overlap_grad_reduce: true
243+
overlap_p2p_comm: true
244+
overlap_param_gather: false
245+
pipeline_model_parallel_size: 4
246+
position_embedding_type: "rope"
247+
qk_layernorm: true
248+
recompute_granularity: "full"
249+
recompute_method: "uniform"
250+
recompute_num_layers: 5
251+
rotary_base: 600000
252+
rotary_percent: 0.5
253+
save: /mnt/asystem-s3/common/users/senlin.zsl/experiments/2025-07-19_14-32-43/experiments/models/mcore_ckpt_32/asystem_moe_mini
254+
save_interval: 1
255+
seed: 42
256+
seq_length: 16384
257+
sequence_parallel: true
258+
skip_casting_dtype_for_param_pattern: '^expert_bias$|.+\.expert_bias$'
259+
swiglu: true
260+
tensor_model_parallel_size: 1
261+
tensorboard_log_interval: 1
262+
tokenizer_model: ${tokenizer_path}
263+
tokenizer_type: "HuggingFaceTokenizer"
264+
train_iters: 100000
265+
transformer_xl: false
266+
unidirectional: true
267+
untie_embeddings_and_output_weights: true
268+
use_distributed_optimizer: true
269+
use_flash_attn: true
270+
use_init_chunk: true
271+
use_mcore_models: true
272+
use_norm_head: false
273+
use_pack_lazy_loader: true
274+
use_random_logits: true
275+
use_rotary_position_embeddings: true
276+
vocab_size: 157184
277+
weight_decay: 0.01
278+
loss_configs:
279+
kl_ctl: 0.0
280+
scheduling_specs:
281+
- type: worker
282+
image: /storage/openpsi/images/areal-25.01-sglang-bf16-editable-metrics-xccl-20250716.sif
283+
- type: engine
284+
gpu: 1
285+
# you can customize environment variables here
286+
env_vars:
287+
# if not set CUDA_LAUNCH_BLOCKING = 1, megatron engine will hang in train phase
288+
CUDA_LAUNCH_BLOCKING: 1
289+
# if use ling max v2, need to specify USE_MAX_V2 = 1
290+
USE_MAX_V2: 1
291+
image: /storage/openpsi/images/hybrid-engine-13680179-20250923154343.sif
292+
293+
ref:
294+
<<: *actor_ref
295+
296+
recover:
297+
experiment_name: ${experiment_name}
298+
trial_name: ${trial_name}
299+
recover_meta_info_path: ""
300+
enable_recover: true
301+
latest_disable_save_hf: true
302+
periodic_disable_save_hf: false
303+
latest_save_interval: 1
304+
periodic_save_interval: 20
305+
fileroot: "${storage_prefix}/experiments"

0 commit comments

Comments
 (0)