Skip to content

Commit 638bc52

Browse files
authored
test: Update on-policy distillation release tests (#1363)
Signed-off-by: Zhaopeng Qiu <alexq@nvidia.com>
1 parent 96656c3 commit 638bc52

14 files changed

+64
-401
lines changed

examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ defaults: ../../distillation_math.yaml
22
distillation:
33
num_prompts_per_step: 64
44
max_num_steps: 20
5-
val_batch_size: 32
5+
val_batch_size: 256
66
val_period: 10
77
max_val_samples: 256
88
loss_fn:
@@ -11,43 +11,15 @@ checkpointing:
1111
checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-dynamicbatch
1212
policy:
1313
model_name: Qwen/Qwen3-4B-Base
14-
train_global_batch_size: 32
15-
generation_batch_size: 32
1614
dtensor_cfg:
1715
context_parallel_size: 1
1816
make_sequence_length_divisible_by: 2
19-
scheduler:
20-
- name: torch.optim.lr_scheduler.LinearLR
21-
kwargs:
22-
start_factor: 0.1
23-
end_factor: 1.0
24-
total_iters: 20
25-
- name: torch.optim.lr_scheduler.ConstantLR
26-
kwargs:
27-
factor: 1.0
28-
total_iters: 10000000000
29-
- milestones:
30-
- 20
3117
teacher:
3218
model_name: Qwen/Qwen3-32B
33-
train_global_batch_size: 32
34-
generation_batch_size: 32
3519
dtensor_cfg:
3620
tensor_parallel_size: 8
3721
context_parallel_size: 1
3822
make_sequence_length_divisible_by: 2
39-
scheduler:
40-
- name: torch.optim.lr_scheduler.LinearLR
41-
kwargs:
42-
start_factor: 0.1
43-
end_factor: 1.0
44-
total_iters: 20
45-
- name: torch.optim.lr_scheduler.ConstantLR
46-
kwargs:
47-
factor: 1.0
48-
total_iters: 10000000000
49-
- milestones:
50-
- 20
5123
logger:
5224
log_dir: logs/distillation-qwen3-32b-to-4b-base-dynamicbatch
5325
wandb:

examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml

Lines changed: 9 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,58 +2,22 @@ defaults: ../../distillation_math.yaml
22
distillation:
33
num_prompts_per_step: 64
44
max_num_steps: 500
5-
val_batch_size: 32
5+
val_batch_size: 512
66
val_period: 50
7-
max_val_samples: 256
7+
loss_fn:
8+
kl_type: reverse
89
checkpointing:
910
checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-long
10-
save_period: 50
11+
save_period: 10
1112
policy:
1213
model_name: Qwen/Qwen3-4B-Base
13-
train_global_batch_size: 32
14-
generation_batch_size: 32
15-
max_total_sequence_length: 32768
16-
dynamic_batching:
17-
enabled: false
18-
make_sequence_length_divisible_by: 2
19-
optimizer:
20-
kwargs:
21-
lr: 1.0e-05
22-
scheduler:
23-
- name: torch.optim.lr_scheduler.LinearLR
24-
kwargs:
25-
start_factor: 0.1
26-
end_factor: 1.0
27-
total_iters: 100
28-
- name: torch.optim.lr_scheduler.CosineAnnealingLR
29-
kwargs:
30-
T_max: 900
31-
eta_min: 1.0e-07
32-
- milestones:
33-
- 100
14+
max_total_sequence_length: 20480
15+
generation:
16+
vllm_cfg:
17+
tensor_parallel_size: 2
3418
teacher:
3519
model_name: Qwen/Qwen3-32B
36-
train_global_batch_size: 32
37-
generation_batch_size: 32
38-
max_total_sequence_length: 32768
39-
dynamic_batching:
40-
enabled: false
41-
make_sequence_length_divisible_by: 2
42-
optimizer:
43-
kwargs:
44-
lr: 1.0e-05
45-
scheduler:
46-
- name: torch.optim.lr_scheduler.LinearLR
47-
kwargs:
48-
start_factor: 0.1
49-
end_factor: 1.0
50-
total_iters: 100
51-
- name: torch.optim.lr_scheduler.CosineAnnealingLR
52-
kwargs:
53-
T_max: 900
54-
eta_min: 1.0e-07
55-
- milestones:
56-
- 100
20+
max_total_sequence_length: 20480
5721
logger:
5822
log_dir: logs/distillation-qwen3-32b-to-4b-base-long
5923
wandb:
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
defaults: ../../distillation_math.yaml
2+
distillation:
3+
num_prompts_per_step: 64
4+
max_num_steps: 20
5+
val_batch_size: 256
6+
val_period: 10
7+
max_val_samples: 256
8+
loss_fn:
9+
kl_type: reverse
10+
checkpointing:
11+
checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-seqpack
12+
policy:
13+
model_name: Qwen/Qwen3-4B-Base
14+
dtensor_cfg:
15+
context_parallel_size: 1
16+
dynamic_batching:
17+
enabled: false
18+
sequence_packing:
19+
enabled: true
20+
make_sequence_length_divisible_by: 2
21+
teacher:
22+
model_name: Qwen/Qwen3-32B
23+
dtensor_cfg:
24+
tensor_parallel_size: 8
25+
context_parallel_size: 1
26+
dynamic_batching:
27+
enabled: false
28+
sequence_packing:
29+
enabled: true
30+
make_sequence_length_divisible_by: 2
31+
logger:
32+
log_dir: logs/distillation-qwen3-32b-to-4b-base-seqpack
33+
wandb:
34+
project: nemo-rl
35+
name: distillation-qwen3-32b-to-4b-base-seqpack
36+
cluster:
37+
num_nodes: 2

examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ defaults: ../../distillation_math.yaml
22
distillation:
33
num_prompts_per_step: 64
44
max_num_steps: 20
5-
val_batch_size: 32
5+
val_batch_size: 256
66
val_period: 10
77
max_val_samples: 256
88
loss_fn:
@@ -12,29 +12,10 @@ checkpointing:
1212
save_period: 50
1313
policy:
1414
model_name: Qwen/Qwen3-4B-Base
15-
train_global_batch_size: 32
16-
generation_batch_size: 32
1715
dtensor_cfg:
1816
tensor_parallel_size: 8
1917
context_parallel_size: 1
20-
dynamic_batching:
21-
enabled: false
2218
make_sequence_length_divisible_by: 2
23-
optimizer:
24-
kwargs:
25-
lr: 1.0e-05
26-
scheduler:
27-
- name: torch.optim.lr_scheduler.LinearLR
28-
kwargs:
29-
start_factor: 0.1
30-
end_factor: 1.0
31-
total_iters: 100
32-
- name: torch.optim.lr_scheduler.CosineAnnealingLR
33-
kwargs:
34-
T_max: 900
35-
eta_min: 1.0e-07
36-
- milestones:
37-
- 100
3819
generation:
3920
colocated:
4021
enabled: false
@@ -43,29 +24,10 @@ policy:
4324
num_nodes: 1
4425
teacher:
4526
model_name: Qwen/Qwen3-32B
46-
train_global_batch_size: 32
47-
generation_batch_size: 32
4827
dtensor_cfg:
4928
tensor_parallel_size: 8
5029
context_parallel_size: 1
51-
dynamic_batching:
52-
enabled: false
5330
make_sequence_length_divisible_by: 2
54-
optimizer:
55-
kwargs:
56-
lr: 1.0e-05
57-
scheduler:
58-
- name: torch.optim.lr_scheduler.LinearLR
59-
kwargs:
60-
start_factor: 0.1
61-
end_factor: 1.0
62-
total_iters: 100
63-
- name: torch.optim.lr_scheduler.CosineAnnealingLR
64-
kwargs:
65-
T_max: 900
66-
eta_min: 1.0e-07
67-
- milestones:
68-
- 100
6931
generation:
7032
colocated:
7133
enabled: false

examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.yaml

Lines changed: 0 additions & 65 deletions
This file was deleted.

examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.yaml

Lines changed: 0 additions & 57 deletions
This file was deleted.

0 commit comments

Comments
 (0)