@@ -2910,7 +2910,6 @@ def set_gguf_parameters(self):
29102910 n_kv_head if (i - attn_offset ) % attn_period == 0 else 0 for i in range (attn_offset , self .block_count )
29112911 ]
29122912
2913- self .gguf_writer .add_name (self .dir_model .name )
29142913 self .gguf_writer .add_block_count (self .block_count )
29152914 self .gguf_writer .add_context_length (self .find_hparam (["max_position_embeddings" , "n_ctx" ]))
29162915 self .gguf_writer .add_embedding_length (d_model )
@@ -2979,29 +2978,15 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29792978
29802979 yield new_name , data_torch
29812980
2982- def write_tensors (self ):
2983- super ().write_tensors ()
2981+ def prepare_tensors (self ):
2982+ super ().prepare_tensors ()
29842983
29852984 if self ._experts is not None :
29862985 # flatten `list[dict[str, Tensor]]` into `list[str]`
29872986 experts = [k for d in self ._experts for k in d .keys ()]
29882987 if len (experts ) > 0 :
29892988 raise ValueError (f"Unprocessed experts: { experts } " )
29902989
2991- # same as Mamba
2992- def extra_f32_tensors (self , name : str , new_name : str , bid : int | None , n_dims : int ) -> bool :
2993- del n_dims # unused
2994-
2995- return bid is not None and new_name in (
2996- self .format_tensor_name (n , bid , ".weight" if name .endswith (".weight" ) else "" ) for n in [
2997- gguf .MODEL_TENSOR .SSM_CONV1D ,
2998- gguf .MODEL_TENSOR .SSM_X ,
2999- gguf .MODEL_TENSOR .SSM_DT ,
3000- gguf .MODEL_TENSOR .SSM_A ,
3001- gguf .MODEL_TENSOR .SSM_D ,
3002- ]
3003- )
3004-
30052990
30062991@Model .register ("CohereForCausalLM" )
30072992class CommandR2Model (Model ):
0 commit comments