Skip to content

Commit d523b4d

Browse files
zhang-cheng09chanchzhang
authored andcommitted
fix hf_path
Signed-off-by: zhangcheng <chzhang_bj@163.com>
1 parent 8cc497f commit d523b4d

File tree

6 files changed

+45
-33
lines changed

6 files changed

+45
-33
lines changed

src/megatron/bridge/models/qwen_omni/context_parallel_utils.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,19 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
import torch
16+
from typing import Optional
217

318
from megatron.core import parallel_state as mpu
419

@@ -191,7 +206,7 @@ def backward(ctx, grad_output):
191206

192207

193208
def split_data_cp_rank(
194-
val: torch.Tensor, cp_size: int, seq_dim: int, cp_rank: int = None
209+
val: Optional[torch.Tensor], cp_size: int, seq_dim: int, cp_rank: int = None
195210
):
196211
assert cp_size > 1
197212
assert 0 == val.shape[seq_dim] % (2 * cp_size), f"{val.shape=} {cp_size=}"

src/megatron/bridge/models/qwen_omni/modelling_qwen3_omni.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -118,25 +118,25 @@ def forward(
118118
**kwargs,
119119
) -> torch.Tensor:
120120
return self.thinker(
121-
input_ids,
122-
input_features,
123-
position_ids,
124-
attention_mask,
125-
labels,
126-
loss_mask,
127-
inference_params,
128-
packed_seq_params,
129-
extra_block_kwargs,
130-
pixel_values,
131-
pixel_values_videos,
132-
image_grid_thw,
133-
video_grid_thw,
134-
image_input_mask,
135-
video_input_mask,
136-
feature_attention_mask,
137-
audio_feature_lengths,
138-
cp_img_num,
139-
use_audio_in_video,
140-
video_second_per_grid,
121+
input_ids=input_ids,
122+
input_features=input_features,
123+
position_ids=position_ids,
124+
attention_mask=attention_mask,
125+
labels=labels,
126+
loss_mask=loss_mask,
127+
inference_params=inference_params,
128+
packed_seq_params=packed_seq_params,
129+
extra_block_kwargs=extra_block_kwargs,
130+
pixel_values=pixel_values,
131+
pixel_values_videos=pixel_values_videos,
132+
image_grid_thw=image_grid_thw,
133+
video_grid_thw=video_grid_thw,
134+
image_input_mask=image_input_mask,
135+
video_input_mask=video_input_mask,
136+
feature_attention_mask=feature_attention_mask,
137+
audio_feature_lengths=audio_feature_lengths,
138+
cp_img_num=cp_img_num,
139+
use_audio_in_video=use_audio_in_video,
140+
video_second_per_grid=video_second_per_grid,
141141
**kwargs,
142142
)

src/megatron/bridge/models/qwen_omni/qwen3_omni_bridge.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
16-
from typing import Dict, Mapping, Union
17-
1815
import torch
19-
import torch.nn as nn
20-
from megatron.core import parallel_state
2116
from transformers import Qwen3OmniMoeForConditionalGeneration
2217

2318
from megatron.bridge.models.conversion.mapping_registry import MegatronMappingRegistry
2419
from megatron.bridge.models.qwen_omni.modelling_qwen3_omni import Qwen3OmniMoeModel
25-
from megatron.bridge.models.conversion.model_bridge import MegatronModelBridge, WeightConversionTask
20+
from megatron.bridge.models.conversion.model_bridge import MegatronModelBridge
2621
from megatron.bridge.models.conversion.param_mapping import (
2722
AutoMapping,
2823
ConcatenatedQKVMapping,
@@ -32,7 +27,6 @@
3227
)
3328
from megatron.bridge.models.hf_pretrained.vlm import PreTrainedVLM
3429
from megatron.bridge.models.qwen_omni.qwen3_omni_provider import Qwen3OmniMoeModelProvider
35-
from megatron.bridge.utils.common_utils import extract_expert_number_from_param
3630

3731

3832
@MegatronModelBridge.register_bridge(source=Qwen3OmniMoeForConditionalGeneration, target=Qwen3OmniMoeModel)

src/megatron/bridge/models/qwen_omni/qwen3_omni_provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class Qwen3OmniMoeModelProvider(Qwen3MoEModelProvider):
4545

4646
pretrained_model_name: str = "Qwen/Qwen3-Omni-30B-A3B-Instruct"
4747

48-
# Vision-specific token IDs matching Qwen3VL MoE configuration
48+
# Vision-specific token IDs matching Qwen3-Omni-MoE configuration
4949
# Based on HuggingFace Qwen3-Omni-MoE configs
5050
# Token ID for image placeholder in text
5151
image_token_id: int = 151655
@@ -143,7 +143,7 @@ def finalize(self) -> None:
143143

144144
def provide(self, pre_process=None, post_process=None, vp_stage=None):
145145
"""
146-
Provide a Qwen3VL MoE model instance with vision and language components.
146+
Provide a Qwen3 Omni MoE model instance with vision and language components.
147147
"""
148148
language_transformer_config = self
149149

src/megatron/bridge/models/qwen_omni/thinker_model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
# http://www.apache.org/licenses/LICENSE-2.0
88
#
99
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1012
# See the License for the specific language governing permissions and
1113
# limitations under the License.
1214

@@ -139,7 +141,7 @@ def __init__(
139141
pg_collection=pg_collection,
140142
)
141143
assert len(vision_transformer_config.vision_config.deepstack_visual_indexes) < len(self.language_model.decoder.layers), (
142-
"the deepstack_visual_embeds should on the first pp-stage",
144+
f"the deepstack_visual_embeds should on the first pp-stage",
143145
f"got {len(vision_transformer_config.vision_config.deepstack_visual_indexes)} deepstack_visual_indexes, "
144146
f" {len(self.language_model.decoder.layers)} language model layers",
145147
)
@@ -208,7 +210,8 @@ def get_audio_features(
208210
else:
209211
audio_feature_lengths = None
210212

211-
feature_lens = audio_feature_lengths if audio_feature_lengths is not None else feature_attention_mask.sum(-1)
213+
# feature_lens = audio_feature_lengths if audio_feature_lengths is not None else feature_attention_mask.sum(-1)
214+
feature_lens = audio_feature_lengths
212215
audio_outputs = self.audio_model(
213216
input_features,
214217
feature_lens=feature_lens,

src/megatron/bridge/recipes/qwen_vl/qwen3_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def qwen3_omni_30b_a3b_finetune_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs
248248
is_full_sft = peft_value is None or (isinstance(peft_value, str) and peft_value.lower() == "none")
249249

250250
recommended_kwargs: Qwen3VLCommonKwargs = {
251-
"hf_path": "../hf-hub/Qwen/Qwen3-Omni-30B-A3B-Instruct",
251+
"hf_path": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
252252
"tensor_model_parallel_size": 1,
253253
"pipeline_model_parallel_size": 1,
254254
"pipeline_dtype": torch.bfloat16,

0 commit comments

Comments
 (0)