Skip to content

Commit 8a3d814

Browse files
committed
Add Plamo2Model to convert scripts
1 parent 5289f4f commit 8a3d814

File tree

1 file changed

+44
-1
lines changed

1 file changed

+44
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def load_hparams(dir_model: Path):
423423
try:
424424
# for security reason, we don't allow loading remote code by default
425425
# if a model need remote code, we will fallback to config.json
426-
return AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
426+
return AutoConfig.from_pretrained(dir_model, trust_remote_code=True).to_dict()
427427
except Exception as e:
428428
logger.warning(f"Failed to load model config from {dir_model}: {e}")
429429
logger.warning("Trying to load config.json instead")
@@ -2187,6 +2187,49 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
21872187
return super().modify_tensors(data_torch, name, bid)
21882188

21892189

2190+
@ModelBase.register("Plamo2ForCausalLM")
2191+
class Plamo2Model(LlamaModel):
2192+
model_arch = gguf.MODEL_ARCH.PLAMO2
2193+
2194+
def set_vocab(self):
2195+
# Plamo2 uses sentencepiece tokenizer similar to Llama
2196+
self._set_vocab_sentencepiece()
2197+
2198+
def set_gguf_parameters(self):
2199+
super().set_gguf_parameters()
2200+
hparams = self.hparams
2201+
2202+
# Plamo2 specific parameters - hybrid attention/Mamba architecture
2203+
# Mamba parameters
2204+
if hparams.get("mamba_enabled", False):
2205+
self.gguf_writer.add_ssm_conv_kernel(hparams.get("mamba_d_conv", 4))
2206+
self.gguf_writer.add_ssm_inner_size(hparams.get("mamba_d_state", 64) * hparams.get("intermediate_size", 13312) // hparams.get("hidden_size", 4096))
2207+
self.gguf_writer.add_ssm_state_size(hparams.get("mamba_d_state", 64))
2208+
self.gguf_writer.add_ssm_time_step_rank(hparams.get("mamba_d_state", 64) // 16) # Commonly d_state/16
2209+
2210+
# Attention window parameters
2211+
if "attention_window_size" in hparams:
2212+
self.gguf_writer.add_sliding_window(hparams["attention_window_size"])
2213+
2214+
# Full attention layer indices
2215+
if "full_attention_idx" in hparams and hparams["full_attention_idx"]:
2216+
# Store which layers use full attention vs sliding window
2217+
# This may need custom handling in llama.cpp
2218+
pass
2219+
2220+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2221+
# Handle Plamo2 specific tensor naming
2222+
# The model has both attention and Mamba layers
2223+
2224+
# Handle Mamba-specific tensors if present
2225+
if "mamba" in name:
2226+
# Mamba layers might need special handling
2227+
# For now, pass through with standard naming
2228+
pass
2229+
2230+
return super().modify_tensors(data_torch, name, bid)
2231+
2232+
21902233
@ModelBase.register("DeciLMForCausalLM")
21912234
class DeciModel(TextModel):
21922235
model_arch = gguf.MODEL_ARCH.DECI

0 commit comments

Comments
 (0)