@@ -244,8 +244,22 @@ async def estimate_with_mbridge(config: MBridgeEstimateConfig):
244244 args .moe_grouped_gemm = True # Default
245245 args .qk_layernorm = tf_config .qk_layernorm
246246 args .multi_latent_attention = "deepseek" in getattr (hf_config , "model_type" , "" )
247- args .padded_vocab_size = getattr (hf_config , "vocab_size" )
248- args .max_position_embeddings = getattr (hf_config , "max_position_embeddings" )
247+ if hasattr (hf_config , "vocab_size" ):
248+ args .padded_vocab_size = hf_config .vocab_size
249+ elif hasattr (hf_config , "text_config" ) and hasattr (hf_config .text_config , "vocab_size" ):
250+ args .padded_vocab_size = hf_config .text_config .vocab_size
251+ else :
252+ raise ValueError (f"Vocab size attribute not found for the current model configuration { type (hf_config ).__name__ } . "
253+ f"It should be either `vocab_size` (for text-only models) or `text_config.vocab_size` (for multimodal models)." )
254+
255+ if hasattr (hf_config , "max_position_embeddings" ):
256+ args .max_position_embeddings = hf_config .max_position_embeddings
257+ elif hasattr (hf_config , "text_config" ) and hasattr (hf_config .text_config , "max_position_embeddings" ):
258+ args .max_position_embeddings = hf_config .text_config .max_position_embeddings
259+ else :
260+ raise ValueError (f"Max position embeddings attribute not found for the current model configuration { type (hf_config ).__name__ } . "
261+ f"It should be either `max_position_embeddings` (for text-only models) or `text_config.max_position_embeddings` (for multimodal models)." )
262+
249263 args .tie_word_embeddings = getattr (hf_config , "tie_word_embeddings" , False )
250264 args .world_size = config .num_gpus
251265
0 commit comments