Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
d9cf489
sglang support:initial commit
PrinsYin Nov 23, 2025
3eace5f
sglang:manually set cuda visible to let localran=0 to manage gpus of …
PrinsYin Nov 24, 2025
6fbbbb7
sglang: add sglang setup in grpo.py, add find available port to set u…
PrinsYin Nov 25, 2025
242612c
sglang: add shutdown
PrinsYin Nov 25, 2025
a3d8ad6
sglang server: fix gpu allocation when tp =1
PrinsYin Nov 28, 2025
88971e3
generate only first request
PrinsYin Nov 25, 2025
db8b07b
fix : choose the correct gpu using base gpu id
PrinsYin Nov 26, 2025
dd0e54f
asyncio to roolout all saples
PrinsYin Nov 26, 2025
21c54e3
fix new event loop for rollout
PrinsYin Nov 26, 2025
5e24fab
added mem_fraction
PrinsYin Nov 26, 2025
50189a9
modified build_sampling_paras and stop token handling
PrinsYin Nov 28, 2025
ec35b6b
temp: prevent server overlaod with semaphore
PrinsYin Nov 28, 2025
f099caa
sglang: refactor, move async loop position
PrinsYin Nov 30, 2025
a03eba8
sglang: fix total length in generate
PrinsYin Nov 30, 2025
e08cfd6
sglang: env setup
PrinsYin Nov 30, 2025
ccc66f6
from tensor:
PrinsYin Nov 27, 2025
2ce928b
sglang refit: fix sglang import
PrinsYin Nov 27, 2025
4aa1e74
fix: match fsdp ranks correctly with sglang
PrinsYin Nov 28, 2025
9098077
flush cache before update begins
PrinsYin Nov 28, 2025
9900a33
Fix SGLang compatibility: add hasattr checks for vLLM-specific methods
PrinsYin Dec 1, 2025
5cb78e3
sglang: modified config (increase mem_fration, enable wandb)
PrinsYin Dec 1, 2025
03d9d0c
refactor(grpo): extract init logic for generation backends
PrinsYin Dec 2, 2025
7ca9776
refactor SGLangConfig
PrinsYin Dec 2, 2025
f1c26dd
refactor: generalize logger metrics for all generation backends
PrinsYin Dec 4, 2025
255dcc6
refactor sglang config loading to make it consistent with other backendw
PrinsYin Dec 4, 2025
ee01f91
resolved ai comments
PrinsYin Dec 6, 2025
e25e573
changed print to using loging
PrinsYin Dec 6, 2025
e93699f
Merge branch 'main' into sglang_server
PrinsYin Dec 9, 2025
85d6a92
Update nemo_rl/models/generation/sglang/sglang_worker.py
PrinsYin Dec 17, 2025
be1ae27
Merge branch 'main' into sglang_server
PrinsYin Dec 17, 2025
ede624f
fix comments about config defaults
PrinsYin Dec 17, 2025
f9dd700
functional test and unit tests added
RolaoDenthu Dec 20, 2025
313eaa7
nightly test added
RolaoDenthu Dec 21, 2025
d207bf3
add more unit tests
RolaoDenthu Dec 21, 2025
ceb934e
add test script
RolaoDenthu Dec 21, 2025
570584f
fix: correct comment to match gpus_per_server=2
RolaoDenthu Dec 21, 2025
1f34c61
fix: add assertion for SGLang non-colocated mode
RolaoDenthu Dec 28, 2025
a9d3d69
fix: minor bug fixes
RolaoDenthu Dec 28, 2025
72946a3
remove run.sh
RolaoDenthu Dec 28, 2025
ea2f0ab
add more unit tests
RolaoDenthu Dec 21, 2025
446e87f
add sglang test
RolaoDenthu Dec 28, 2025
c58fffb
Merge branch 'main' into add-tests
guyueh1 Dec 27, 2025
6882cb7
Merge prfork/add-tests
RolaoDenthu Dec 28, 2025
6f72efe
add test for qwen2.5-math-1.5b for sglang backend
RolaoDenthu Dec 28, 2025
1adc4a1
modify test file name
RolaoDenthu Dec 30, 2025
256c0ee
fix lint
RolaoDenthu Dec 30, 2025
c345a15
fix lints
RolaoDenthu Jan 3, 2026
02a7e1f
add sglang init
RolaoDenthu Jan 5, 2026
5ec83e1
update uv.lock
RolaoDenthu Jan 6, 2026
0513cbf
Add sglang/config.py to pyrefly
RolaoDenthu Jan 6, 2026
2338fdd
uv.lock updated
RolaoDenthu Jan 7, 2026
b846b36
fix envir
RolaoDenthu Jan 7, 2026
570f996
Merge branch 'main' into add-tests
guyueh1 Jan 8, 2026
ae4bc44
add sglang-only marker filtering
RolaoDenthu Jan 8, 2026
8adbbe5
fix sglang import
RolaoDenthu Jan 8, 2026
f511940
fix test name
RolaoDenthu Jan 8, 2026
df99998
Merge remote-tracking branch 'origin/main' into add-tests
RolaoDenthu Jan 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions examples/configs/grpo_math_1B_sglang.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
defaults: grpo_math_1B.yaml

grpo:
val_batch_size: 128

policy:
generation:
backend: "sglang"
sglang_cfg:
# SGLang specific configuration
model_path: ${policy.model_name}
gpus_per_server: 1
dtype: ${policy.precision}
context_length: 512 # Maximum context length
allow_auto_truncate: true
enable_memory_saver: false
dp_size: 1
pp_size: 1
ep_size: 1
max_running_requests: null
mem_fraction_static: 0.7
skip_server_warmup: true

logger:
wandb_enabled: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
defaults: ../../grpo_math_1B.yaml

grpo:
max_num_steps: 450

checkpointing:
checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang

policy:
model_name: Qwen/Qwen2.5-Math-1.5B-Instruct
tokenizer:
name: Qwen/Qwen2.5-Math-1.5B-Instruct
dynamic_batching:
enabled: true
sequence_packing:
enabled: false
make_sequence_length_divisible_by: 1
generation:
backend: "sglang"
max_new_tokens: 512
sglang_cfg:
model_path: ${policy.model_name}
gpus_per_server: 8
dtype: ${policy.precision}
context_length: 512
allow_auto_truncate: true
enable_memory_saver: false
dp_size: 1
pp_size: 1
ep_size: 1
max_running_requests: null
mem_fraction_static: 0.7
skip_server_warmup: true

data:
max_input_seq_length: 512

logger:
log_dir: logs/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang

cluster:
gpus_per_node: 8

49 changes: 49 additions & 0 deletions examples/configs/recipes/llm/grpo-qwen3-0.6b-1n8g-sglang.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
defaults: ../../grpo_math_1B.yaml

grpo:
max_num_steps: 500
val_batch_size: 128

checkpointing:
checkpoint_dir: results/grpo-qwen3-0.6b-1n8g-sglang

policy:
model_name: Qwen/Qwen3-0.6B
tokenizer:
name: Qwen/Qwen3-0.6B
dynamic_batching:
enabled: true
sequence_packing:
enabled: false
make_sequence_length_divisible_by: 1
generation:
backend: "sglang"
max_new_tokens: 512
sglang_cfg:
model_path: ${policy.model_name}
gpus_per_server: 8
dtype: ${policy.precision}
context_length: 512
allow_auto_truncate: true
enable_memory_saver: false
dp_size: 1
pp_size: 1
ep_size: 1
max_running_requests: null
mem_fraction_static: 0.7
skip_server_warmup: true

data:
max_input_seq_length: 512

logger:
log_dir: logs/grpo-qwen3-0.6b-1n8g-sglang
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-qwen3-0.6b-1n8g-sglang

cluster:
gpus_per_node: 8

Loading