2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
"""Inference-only Qwen3Next model."""
4
4
from collections .abc import Iterable
5
+ from itertools import islice
5
6
from typing import Optional
6
7
7
8
import torch
@@ -917,8 +918,11 @@ def get_layer(prefix: str):
917
918
make_empty_intermediate_tensors_factory (
918
919
["hidden_states" , "residual" ], config .hidden_size ))
919
920
920
- self .norm = Qwen3NextRMSNorm (config .hidden_size ,
921
- eps = config .rms_norm_eps )
921
+ if get_pp_group ().is_last_rank :
922
+ self .norm = Qwen3NextRMSNorm (config .hidden_size ,
923
+ eps = config .rms_norm_eps )
924
+ else :
925
+ self .norm = PPMissingLayer ()
922
926
923
927
def get_input_embeddings (self , input_ids : torch .Tensor ) -> torch .Tensor :
924
928
return self .embed_tokens (input_ids )
@@ -941,7 +945,7 @@ def forward(
941
945
hidden_states = intermediate_tensors ["hidden_states" ]
942
946
residual = intermediate_tensors ["residual" ]
943
947
944
- for layer in self .layers :
948
+ for layer in islice ( self .layers , self . start_layer , self . end_layer ) :
945
949
hidden_states , residual = layer (
946
950
positions = positions ,
947
951
hidden_states = hidden_states ,
0 commit comments