@@ -423,7 +423,7 @@ def load_hparams(dir_model: Path):
423423 try :
424424 # for security reason, we don't allow loading remote code by default
425425 # if a model need remote code, we will fallback to config.json
426- return AutoConfig .from_pretrained (dir_model , trust_remote_code = False ).to_dict ()
426+ return AutoConfig .from_pretrained (dir_model , trust_remote_code = True ).to_dict ()
427427 except Exception as e :
428428 logger .warning (f"Failed to load model config from { dir_model } : { e } " )
429429 logger .warning ("Trying to load config.json instead" )
@@ -2187,6 +2187,49 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
21872187 return super ().modify_tensors (data_torch , name , bid )
21882188
21892189
2190+ @ModelBase .register ("Plamo2ForCausalLM" )
2191+ class Plamo2Model (LlamaModel ):
2192+ model_arch = gguf .MODEL_ARCH .PLAMO2
2193+
2194+ def set_vocab (self ):
2195+ # Plamo2 uses sentencepiece tokenizer similar to Llama
2196+ self ._set_vocab_sentencepiece ()
2197+
2198+ def set_gguf_parameters (self ):
2199+ super ().set_gguf_parameters ()
2200+ hparams = self .hparams
2201+
2202+ # Plamo2 specific parameters - hybrid attention/Mamba architecture
2203+ # Mamba parameters
2204+ if hparams .get ("mamba_enabled" , False ):
2205+ self .gguf_writer .add_ssm_conv_kernel (hparams .get ("mamba_d_conv" , 4 ))
2206+ self .gguf_writer .add_ssm_inner_size (hparams .get ("mamba_d_state" , 64 ) * hparams .get ("intermediate_size" , 13312 ) // hparams .get ("hidden_size" , 4096 ))
2207+ self .gguf_writer .add_ssm_state_size (hparams .get ("mamba_d_state" , 64 ))
2208+ self .gguf_writer .add_ssm_time_step_rank (hparams .get ("mamba_d_state" , 64 ) // 16 ) # Commonly d_state/16
2209+
2210+ # Attention window parameters
2211+ if "attention_window_size" in hparams :
2212+ self .gguf_writer .add_sliding_window (hparams ["attention_window_size" ])
2213+
2214+ # Full attention layer indices
2215+ if "full_attention_idx" in hparams and hparams ["full_attention_idx" ]:
2216+ # Store which layers use full attention vs sliding window
2217+ # This may need custom handling in llama.cpp
2218+ pass
2219+
2220+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2221+ # Handle Plamo2 specific tensor naming
2222+ # The model has both attention and Mamba layers
2223+
2224+ # Handle Mamba-specific tensors if present
2225+ if "mamba" in name :
2226+ # Mamba layers might need special handling
2227+ # For now, pass through with standard naming
2228+ pass
2229+
2230+ return super ().modify_tensors (data_torch , name , bid )
2231+
2232+
21902233@ModelBase .register ("DeciLMForCausalLM" )
21912234class DeciModel (TextModel ):
21922235 model_arch = gguf .MODEL_ARCH .DECI
0 commit comments