Skip to content

Commit 4928b30

Browse files
committed
add max seqlen for glm4z
1 parent 1dd934b commit 4928b30

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
281281
return [(self.map_tensor_name(name), data_torch)]
282282

283283
def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: int) -> gguf.GGMLQuantizationType | bool:
284+
# 删除未使用的参数,避免警告或错误
284285
del name, new_name, bid, n_dims # unused
285286

287+
# 返回False,表示不进行量化
286288
return False
287289

288290
# some models need extra generated tensors (like rope_freqs)
@@ -5053,14 +5055,18 @@ def set_gguf_parameters(self):
50535055
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
50545056
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
50555057
n_head_kv = self.hparams.get("multi_query_group_num", self.hparams.get("num_key_value_heads", n_head))
5056-
self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
5058+
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx", "seq_length"], optional=True)) is not None:
5059+
self.gguf_writer.add_context_length(n_ctx)
5060+
else:
5061+
self.gguf_writer.add_context_length(n_embed)
50575062
self.gguf_writer.add_embedding_length(n_embed)
50585063
self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", self.hparams.get("intermediate_size", 4 * n_embed)))
50595064
self.gguf_writer.add_block_count(self.hparams.get("num_layers", self.hparams["num_hidden_layers"]))
50605065
self.gguf_writer.add_head_count(n_head)
50615066
self.gguf_writer.add_head_count_kv(n_head_kv)
50625067
self.gguf_writer.add_layer_norm_rms_eps(self.hparams.get("layernorm_epsilon",1e-5))
50635068
self.gguf_writer.add_file_type(self.ftype)
5069+
self.gguf_writer.set_gguf_parameters(self)
50645070
if "attention_dim" in self.hparams:
50655071
rope_dim = self.hparams["attention_dim"]
50665072
else:

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10868,7 +10868,7 @@ struct llm_build_chatglm : public llm_graph_context {
1086810868
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
1086910869
}
1087010870

10871-
// Post-attention norm (new!)
10871+
// Post-attention norm (Glm4-Z)
1087210872
if (model.layers[il].attn_post_norm){
1087310873
cur = build_norm(cur,
1087410874
model.layers[il].attn_post_norm,

0 commit comments

Comments
 (0)