@@ -519,7 +519,7 @@ def prepare_metadata(self, vocab_only: bool):
519519 def set_gguf_parameters (self ):
520520 self .gguf_writer .add_block_count (self .block_count )
521521
522- if (n_ctx := self .find_hparam (["max_position_embeddings" , "n_ctx" , "n_positions" ], optional = True )) is not None :
522+ if (n_ctx := self .find_hparam (["max_position_embeddings" , "n_ctx" , "n_positions" , "max_length" ], optional = True )) is not None :
523523 self .gguf_writer .add_context_length (n_ctx )
524524 logger .info (f"gguf: context length = { n_ctx } " )
525525
@@ -2020,6 +2020,20 @@ def prepare_tensors(self):
20202020 raise ValueError (f"Unprocessed experts: { experts } " )
20212021
20222022
2023+ @ModelBase .register ("ArceeForCausalLM" )
2024+ class ArceeModel (LlamaModel ):
2025+ model_arch = gguf .MODEL_ARCH .ARCEE
2026+
2027+ def set_gguf_parameters (self ):
2028+ super ().set_gguf_parameters ()
2029+ self ._try_set_pooling_type ()
2030+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2031+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
2032+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2033+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2034+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
2035+
2036+
20232037@ModelBase .register (
20242038 "LlavaForConditionalGeneration" , # pixtral
20252039 "Mistral3ForConditionalGeneration" , # mistral small 3.1
@@ -4062,6 +4076,34 @@ def _is_tokenizer_xlmroberta(self) -> bool:
40624076 raise ValueError (f"unknown tokenizer: { toktyp } " )
40634077
40644078
4079+ @ModelBase .register ("NeoBERT" , "NeoBERTLMHead" , "NeoBERTForSequenceClassification" )
4080+ class NeoBert (BertModel ):
4081+ model_arch = gguf .MODEL_ARCH .NEO_BERT
4082+
4083+ def set_gguf_parameters (self ):
4084+ super ().set_gguf_parameters ()
4085+
4086+ # NeoBERT uses 2/3 of the intermediate size as feed forward length
4087+ self .gguf_writer .add_feed_forward_length (int (2 * self .hparams ["intermediate_size" ] / 3 ))
4088+ self .gguf_writer .add_rope_freq_base (10000.0 ) # default value for NeoBERT
4089+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
4090+
4091+ f_rms_eps = self .hparams .get ("norm_eps" , 1e-6 ) # default value for NeoBERT
4092+ self .gguf_writer .add_layer_norm_rms_eps (f_rms_eps )
4093+ logger .info (f"gguf: rms norm epsilon = { f_rms_eps } " )
4094+
4095+ self .gguf_writer .add_pooling_type (gguf .PoolingType .CLS ) # https://huggingface.co/chandar-lab/NeoBERT#how-to-use
4096+
4097+ def modify_tensors (self , data_torch , name , bid ):
4098+ if name .startswith ("decoder." ):
4099+ return []
4100+
4101+ if name .startswith ("model." ):
4102+ name = name [6 :]
4103+
4104+ return super ().modify_tensors (data_torch , name , bid )
4105+
4106+
40654107@ModelBase .register ("XLMRobertaModel" , "XLMRobertaForSequenceClassification" )
40664108class XLMRobertaModel (BertModel ):
40674109 model_arch = gguf .MODEL_ARCH .BERT
@@ -5262,6 +5304,34 @@ def prepare_tensors(self):
52625304 raise ValueError (f"Unprocessed experts: { experts } " )
52635305
52645306
5307+ @ModelBase .register ("Dots1ForCausalLM" )
5308+ class Dots1Model (Qwen2MoeModel ):
5309+ model_arch = gguf .MODEL_ARCH .DOTS1
5310+
5311+ def __init__ (self , * args , ** kwargs ):
5312+ super ().__init__ (* args , ** kwargs )
5313+ self .hparams ["num_experts" ] = self .hparams ["n_routed_experts" ]
5314+
5315+ def set_gguf_parameters (self ):
5316+ super ().set_gguf_parameters ()
5317+ self .gguf_writer .add_leading_dense_block_count (self .hparams ["first_k_dense_replace" ])
5318+ self .gguf_writer .add_expert_shared_count (self .hparams ["n_shared_experts" ])
5319+ self .gguf_writer .add_expert_weights_scale (self .hparams ["routed_scaling_factor" ])
5320+ self .gguf_writer .add_expert_weights_norm (self .hparams ["norm_topk_prob" ])
5321+
5322+ if self .hparams ["scoring_func" ] == "noaux_tc" :
5323+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
5324+ else :
5325+ raise ValueError (f"Unsupported scoring_func value: { self .hparams ['scoring_func' ]} " )
5326+
5327+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
5328+ if name .endswith ("e_score_correction_bias" ):
5329+ name = name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
5330+ if "shared_experts" in name :
5331+ return [(self .map_tensor_name (name ), data_torch )]
5332+ return super ().modify_tensors (data_torch , name , bid )
5333+
5334+
52655335@ModelBase .register ("PLMForCausalLM" )
52665336class PLMModel (TextModel ):
52675337 model_arch = gguf .MODEL_ARCH .PLM
0 commit comments