-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Resolved half rope,multi-EOS issues in convert_hf_togguf.py for GLM4Z Model #12957
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
b928f8c
1dd934b
4928b30
b2a3eaa
285fe3a
498ed97
9ecf1bd
5592c08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -281,8 +281,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter | |
return [(self.map_tensor_name(name), data_torch)] | ||
|
||
def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: int) -> gguf.GGMLQuantizationType | bool: | ||
# 删除未使用的参数,避免警告或错误 | ||
del name, new_name, bid, n_dims # unused | ||
|
||
# 返回False,表示不进行量化 | ||
return False | ||
|
||
# some models need extra generated tensors (like rope_freqs) | ||
|
@@ -669,7 +671,8 @@ def get_vocab_base_pre(self, tokenizer) -> str: | |
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": | ||
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code | ||
res = "jina-v2-code" | ||
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516": | ||
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516" or chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2": | ||
# ref: https://huggingface.co/THUDM/glm-4-9b-hf | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you make this change with
I've never done ran that myself so I don't know how to use it but the style here looks inconsistent with other models, so makes me think it wasn't used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, I modified it based on GLM4, chatglm4, and glmedge, because the chkhsh of these models are all different.I haven’t used this script either. |
||
# ref: https://huggingface.co/THUDM/glm-4-9b-chat | ||
res = "chatglm-bpe" | ||
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee": | ||
|
@@ -735,9 +738,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: | |
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406": | ||
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct | ||
res = "llama4" | ||
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2": | ||
# ref: https://huggingface.co/THUDM/glm-4-9b-hf | ||
res = "glm4" | ||
|
||
if res is None: | ||
logger.warning("\n") | ||
|
@@ -4929,23 +4929,7 @@ def prepare_tensors(self): | |
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) | ||
|
||
|
||
@Model.register("Glm4ForCausalLM") | ||
class Glm4Model(Model): | ||
model_arch = gguf.MODEL_ARCH.GLM4 | ||
|
||
def set_vocab(self): | ||
self._set_vocab_gpt2() | ||
|
||
def set_gguf_parameters(self): | ||
super().set_gguf_parameters() | ||
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: | ||
if self.hparams["rope_scaling"].get("type") == "yarn": | ||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) | ||
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) | ||
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) | ||
|
||
|
||
@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") | ||
@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration", "Glm4ForCausalLM") | ||
class ChatGLMModel(Model): | ||
model_arch = gguf.MODEL_ARCH.CHATGLM | ||
|
||
|
@@ -5065,13 +5049,20 @@ def set_vocab(self): | |
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) | ||
# this one is usually not in config.json anyway | ||
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) | ||
# exclude glm-edge 1.5B & 4B | ||
if self.hparams.get("partial_rotary_factor", 1.0) == 0.5: | ||
print("add bos in model") | ||
piDack marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"]) | ||
special_vocab.add_to_gguf(self.gguf_writer) | ||
|
||
def set_gguf_parameters(self): | ||
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed")) | ||
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads")) | ||
n_head_kv = self.hparams.get("multi_query_group_num", self.hparams.get("num_key_value_heads", n_head)) | ||
self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed)) | ||
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx", "seq_length"], optional=True)) is not None: | ||
self.gguf_writer.add_context_length(n_ctx) | ||
else: | ||
self.gguf_writer.add_context_length(n_embed) | ||
self.gguf_writer.add_embedding_length(n_embed) | ||
self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", self.hparams.get("intermediate_size", 4 * n_embed))) | ||
self.gguf_writer.add_block_count(self.hparams.get("num_layers", self.hparams["num_hidden_layers"])) | ||
|
@@ -5085,6 +5076,11 @@ def set_gguf_parameters(self): | |
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] | ||
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5))) | ||
self.gguf_writer.add_add_bos_token(False) | ||
piDack marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: | ||
if self.hparams["rope_scaling"].get("type") == "yarn": | ||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) | ||
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) | ||
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) | ||
rope_freq = 10000 | ||
if "rope_ratio" in self.hparams: | ||
rope_freq = rope_freq * self.hparams["rope_ratio"] | ||
|
Uh oh!
There was an error while loading. Please reload this page.