@@ -265,8 +265,6 @@ def verify_with_parallel_config(
265
265
" must be divisible by tensor parallel size "
266
266
f"({ tensor_parallel_size } )." )
267
267
268
- total_num_hidden_layers = getattr (self .hf_text_config ,
269
- "num_hidden_layers" , 0 )
270
268
pipeline_parallel_size = parallel_config .pipeline_parallel_size
271
269
architectures = getattr (self .hf_config , "architectures" , [])
272
270
if not all (arch in _PP_SUPPORTED_MODELS
@@ -275,12 +273,6 @@ def verify_with_parallel_config(
275
273
"Pipeline parallelism is only supported for the following "
276
274
f" architectures: { _PP_SUPPORTED_MODELS } ." )
277
275
278
- if total_num_hidden_layers % pipeline_parallel_size != 0 :
279
- raise ValueError (
280
- f"Total number of hidden layers ({ total_num_hidden_layers } ) "
281
- "must be divisible by pipeline parallel size "
282
- f"({ pipeline_parallel_size } )." )
283
-
284
276
if self .quantization == "bitsandbytes" and (
285
277
parallel_config .tensor_parallel_size > 1
286
278
or parallel_config .pipeline_parallel_size > 1 ):
@@ -385,9 +377,13 @@ def get_num_attention_heads(self,
385
377
return num_heads // parallel_config .tensor_parallel_size
386
378
387
379
def get_num_layers (self , parallel_config : "ParallelConfig" ) -> int :
380
+ from vllm .distributed .utils import get_pp_indices
388
381
total_num_hidden_layers = getattr (self .hf_text_config ,
389
382
"num_hidden_layers" , 0 )
390
- return total_num_hidden_layers // parallel_config .pipeline_parallel_size
383
+ pp_rank = parallel_config .rank // parallel_config .tensor_parallel_size
384
+ pp_size = parallel_config .pipeline_parallel_size
385
+ start , end = get_pp_indices (total_num_hidden_layers , pp_rank , pp_size )
386
+ return end - start
391
387
392
388
def contains_seqlen_agnostic_layers (
393
389
self , parallel_config : "ParallelConfig" ) -> bool :
@@ -709,6 +705,7 @@ def __init__(
709
705
{"CUDA_VISIBLE_DEVICES" : envs .CUDA_VISIBLE_DEVICES })
710
706
711
707
self ._verify_args ()
708
+ self .rank = 0
712
709
713
710
def _verify_args (self ) -> None :
714
711
if (self .pipeline_parallel_size > 1
0 commit comments