Skip to content

Commit fd3574c

Browse files
committed
update test
Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
1 parent 27a5452 commit fd3574c

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

tests/unit_tests/models/qwen3_vl_moe/test_qwen3_vl_moe_state_dict_adapter.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,68 @@ def test_respects_exclude_regex(self, adapter):
120120
assert "exclude.me" not in out
121121

122122

123+
def test_aggregates_with_device_mesh_non_dtensor(self, adapter, monkeypatch):
124+
local_experts = torch.tensor(
125+
[
126+
[[1.0, 2.0], [3.0, 4.0]],
127+
[[5.0, 6.0], [7.0, 8.0]],
128+
],
129+
dtype=adapter.dtype,
130+
) # shape: [2, 2, 2]
131+
132+
# Only experts 1 and 2 live on this rank
133+
monkeypatch.setattr(
134+
"nemo_automodel.components.moe.state_dict_utils.get_expert_range_for_rank_from_mesh",
135+
lambda mesh, n_experts: (1, 3),
136+
)
137+
# No distributed init => skip all_gather branch
138+
monkeypatch.setattr("torch.distributed.is_initialized", lambda: False)
139+
140+
device_mesh = Mock()
141+
device_mesh.mesh_dim_names = ["ep"]
142+
143+
state_dict = {
144+
"model.language_model.layers.0.mlp.experts.gate_and_up_projs": local_experts,
145+
}
146+
147+
out = adapter.to_hf(state_dict, device_mesh=device_mesh)
148+
gate_key = "model.language_model.layers.0.mlp.experts.gate_up_proj"
149+
global_gate = out[gate_key]
150+
151+
assert global_gate.shape == (adapter.moe_config.n_routed_experts, 2, 2)
152+
# Experts 1 and 2 should be populated from local_experts; others remain zero
153+
torch.testing.assert_close(global_gate[1:3], local_experts)
154+
assert torch.all(global_gate[0] == 0)
155+
assert torch.all(global_gate[3] == 0)
156+
157+
158+
def test_aggregates_dtensor_path_uses_split_helper(self, adapter, monkeypatch):
159+
local_slice = torch.tensor([[9.0, 10.0]], dtype=adapter.dtype) # shape: [1, 2]
160+
161+
monkeypatch.setattr(
162+
"nemo_automodel.components.moe.state_dict_utils.is_dtensor", lambda tensor: True
163+
)
164+
monkeypatch.setattr(
165+
"nemo_automodel.components.moe.state_dict_utils.split_experts_weights_dtensor_aware",
166+
lambda weight, n_experts: ([local_slice], [2]),
167+
)
168+
monkeypatch.setattr("torch.distributed.is_initialized", lambda: False)
169+
170+
device_mesh = Mock()
171+
device_mesh.mesh_dim_names = ["ep"]
172+
173+
state_dict = {
174+
"model.language_model.layers.0.mlp.experts.down_projs": torch.empty(1, 1, 2),
175+
}
176+
177+
out = adapter.to_hf(state_dict, device_mesh=device_mesh)
178+
down_key = "model.language_model.layers.0.mlp.experts.down_proj"
179+
global_down = out[down_key]
180+
181+
assert global_down.shape[0] == adapter.moe_config.n_routed_experts
182+
torch.testing.assert_close(global_down[2], local_slice)
183+
184+
123185
class TestFromHF:
124186
def test_detects_model_prefix(self, adapter):
125187
hf_state = {
@@ -173,6 +235,9 @@ def __init__(self, data):
173235
def to_local(self):
174236
return self._data
175237

238+
def __getitem__(self, idx):
239+
return self._data[idx]
240+
176241
captured = {"locals": []}
177242

178243
monkeypatch.setattr(

0 commit comments

Comments
 (0)