File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -170,8 +170,9 @@ def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
170
170
return quant_config
171
171
172
172
def forward (self , hidden_states : torch .Tensor ) -> torch .Tensor :
173
- # NOTE: hidden_states can have either 1D or 2D shape.
174
- orig_shape = hidden_states .shape
173
+ assert hidden_states .dim (
174
+ ) <= 2 , "Qwen3MoeSparseMoeBlock only supports 1D or 2D inputs"
175
+ is_input_1d = hidden_states .dim () == 1
175
176
hidden_dim = hidden_states .shape [- 1 ]
176
177
hidden_states = hidden_states .view (- 1 , hidden_dim )
177
178
@@ -180,7 +181,9 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
180
181
final_hidden_states = self .experts (hidden_states = hidden_states ,
181
182
router_logits = router_logits )
182
183
183
- return final_hidden_states .view (orig_shape )
184
+ # return to 1d if input is 1d
185
+ return final_hidden_states .squeeze (0 ) if is_input_1d else \
186
+ final_hidden_states
184
187
185
188
186
189
class Qwen3MoeAttention (nn .Module ):
You can’t perform that action at this time.
0 commit comments