We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a62de9e commit 898285cCopy full SHA for 898285c
vllm/config.py
@@ -114,8 +114,9 @@ def get_num_heads(self, parallel_config: "ParallelConfig") -> int:
114
# Note: for falcon, when new_decoder_architecture is True, the
115
# multi_query flag is ignored and we use n_head_kv for the number of
116
# KV heads.
117
+ falcon_model_types = ["falcon", "RefinedWeb", "RefinedWebModel"]
118
new_decoder_arch_falcon = (
- self.hf_config.model_type == "falcon"
119
+ self.hf_config.model_type in falcon_model_types
120
and getattr(self.hf_config, "new_decoder_architecture", False))
121
if not new_decoder_arch_falcon and getattr(self.hf_config,
122
"multi_query", False):
0 commit comments