Skip to content

Commit a84d93b

Browse files
authored
[VLM] fix: fix non true-on-policy vlm regression (#1093)
1 parent 6d43f53 commit a84d93b

File tree

5 files changed

+18
-51
lines changed

5 files changed

+18
-51
lines changed

docker/patch/latest/sglang.patch

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ index 932f52aeb..79c6b664f 100644
215215

216216
hidden_states = self._communicate_simple_fn(
217217
diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py
218-
index 3293a8a59..02999afd0 100644
218+
index 3293a8a59..a075b71ce 100644
219219
--- a/python/sglang/srt/layers/layernorm.py
220220
+++ b/python/sglang/srt/layers/layernorm.py
221221
@@ -84,15 +84,12 @@ class RMSNorm(CustomOp):
@@ -236,7 +236,7 @@ index 3293a8a59..02999afd0 100644
236236
self.variance_epsilon = eps
237237
self.hidden_size = hidden_size
238238
self.variance_size_override = (
239-
@@ -105,15 +102,16 @@ class RMSNorm(CustomOp):
239+
@@ -105,21 +102,26 @@ class RMSNorm(CustomOp):
240240
self,
241241
x: torch.Tensor,
242242
residual: Optional[torch.Tensor] = None,
@@ -255,7 +255,17 @@ index 3293a8a59..02999afd0 100644
255255
return rms_norm_batch_invariant(
256256
x,
257257
self.weight.data,
258-
@@ -179,17 +177,35 @@ class RMSNorm(CustomOp):
258+
self.variance_epsilon,
259+
)
260+
if residual is not None:
261+
+ # TODO: Ideally we want to have (a+b)+c. but right now we can only have a+(b+c).
262+
+ # (a+b)+c != a+(b+c), we probably need to add another parameter to fused_add_rmsnorm
263+
+ if post_residual_addition is not None:
264+
+ residual = residual + post_residual_addition
265+
fused_add_rmsnorm(x, residual, self.weight.data, self.variance_epsilon)
266+
return x, residual
267+
out = rmsnorm(x, self.weight.data, self.variance_epsilon)
268+
@@ -179,17 +181,35 @@ class RMSNorm(CustomOp):
259269
self,
260270
x: torch.Tensor,
261271
residual: Optional[torch.Tensor] = None,

examples/geo3k_vlm/run_geo3k_vlm.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
NUM_GPUS = int(os.environ.get("SLIME_SCRIPT_NUM_GPUS", "1"))
1010
EXTERNAL_RAY = int(os.environ.get("SLIME_SCRIPT_EXTERNAL_RAY", "0"))
11-
MASTER_ADDR = os.environ.get("MASTER_ADDR", "127.0.0.1")
1211

1312

1413
def prepare():
@@ -40,7 +39,7 @@ def execute():
4039
)
4140

4241
eval_args = (
43-
# "--eval-interval 20 "
42+
"--eval-interval 20 "
4443
"--eval-prompt-data geo3k /root/datasets/geo3k_imgurl/test.parquet "
4544
"--n-samples-per-eval-prompt 1 "
4645
"--eval-max-response-len 4096 "
@@ -119,27 +118,6 @@ def execute():
119118
# f"{true_on_policy_args} "
120119
)
121120

122-
# Kill existing processes
123-
U.exec_command(
124-
"pkill -9 sglang; "
125-
"sleep 3; "
126-
f"{'' if EXTERNAL_RAY else 'ray stop --force; '}"
127-
f"{'' if EXTERNAL_RAY else 'pkill -9 ray; '}"
128-
"pkill -9 slime; "
129-
"sleep 3; "
130-
f"{'' if EXTERNAL_RAY else 'pkill -9 ray; '}"
131-
"pkill -9 slime; "
132-
"pkill -9 redis; "
133-
"true; "
134-
)
135-
136-
if not EXTERNAL_RAY:
137-
# Start Ray
138-
U.exec_command(
139-
f"export PYTHONBUFFERED=16 && "
140-
f"ray start --head --node-ip-address {MASTER_ADDR} --num-gpus {NUM_GPUS} "
141-
f"--disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265"
142-
)
143121
# Submit Ray job
144122
execute_train(
145123
train_args=train_args,

examples/true_on_policy_vlm/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ This example demonstrates true on-policy training with Qwen3-VL dense model on F
55
<p align="center">
66
<img src="diff.png" alt="Training Inference Log Prob Diff" width="800">
77
</p>
8+
89
## Usage
910

1011
```bash

examples/true_on_policy_vlm/run_simple.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
NUM_GPUS = int(os.environ.get("SLIME_SCRIPT_NUM_GPUS", "1"))
1010
EXTERNAL_RAY = int(os.environ.get("SLIME_SCRIPT_EXTERNAL_RAY", "0"))
11-
MASTER_ADDR = os.environ.get("MASTER_ADDR", "127.0.0.1")
1211

1312

1413
def prepare():
@@ -39,7 +38,7 @@ def execute():
3938
)
4039

4140
eval_args = (
42-
# "--eval-interval 20 "
41+
"--eval-interval 20 "
4342
"--eval-prompt-data geo3k /root/datasets/geo3k_imgurl/test.parquet "
4443
"--n-samples-per-eval-prompt 1 "
4544
"--eval-max-response-len 4096 "
@@ -127,28 +126,6 @@ def execute():
127126
f"{true_on_policy_args} "
128127
)
129128

130-
# Kill existing processes
131-
U.exec_command(
132-
"pkill -9 sglang; "
133-
"sleep 3; "
134-
f"{'' if EXTERNAL_RAY else 'ray stop --force; '}"
135-
f"{'' if EXTERNAL_RAY else 'pkill -9 ray; '}"
136-
"pkill -9 slime; "
137-
"sleep 3; "
138-
f"{'' if EXTERNAL_RAY else 'pkill -9 ray; '}"
139-
"pkill -9 slime; "
140-
"pkill -9 redis; "
141-
"true; "
142-
)
143-
144-
if not EXTERNAL_RAY:
145-
# Start Ray
146-
U.exec_command(
147-
f"export PYTHONBUFFERED=16 && "
148-
f"ray start --head --node-ip-address {MASTER_ADDR} --num-gpus {NUM_GPUS} "
149-
f"--disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265"
150-
)
151-
152129
# Submit Ray job
153130
execute_train(
154131
train_args=train_args,

slime/backends/fsdp_utils/actor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ def init(self, args: Namespace, role: str, with_ref: bool = False) -> int: # ty
8080
if i == dist.get_rank():
8181
self.hf_config = AutoConfig.from_pretrained(self.args.hf_checkpoint, trust_remote_code=True)
8282
self.tokenizer = load_tokenizer(self.args.hf_checkpoint, trust_remote_code=True)
83-
if self.args.multimodal_keys:
83+
# Vision models have `vision_config` in the config
84+
if hasattr(self.hf_config, "vision_config"):
8485
self.processor = load_processor(self.args.hf_checkpoint, trust_remote_code=True)
8586
dist.barrier(group=get_gloo_group())
8687

0 commit comments

Comments
 (0)