@@ -508,12 +508,7 @@ def load_hparams(dir_model: Path):
508508 with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
509509 hparams = json .load (f )
510510 if "text_config" in hparams :
511- text_config = hparams ["text_config" ]
512- model_id = text_config .get ("_name_or_path" , None )
513- # for example, llava-1.5-7b-hf misses the language model config, need to retrieve it via model ID
514- if model_id is not None and model_id != "None" and model_id != "" :
515- text_config = AutoConfig .from_pretrained (text_config ["_name_or_path" ]).to_dict ()
516- hparams = {** text_config , ** hparams }
511+ hparams = {** hparams ["text_config" ], ** hparams }
517512 return hparams
518513
519514 @staticmethod
@@ -1646,14 +1641,14 @@ def prepare_tensors(self):
16461641 raise ValueError (f"Unprocessed norms: { norms } " )
16471642
16481643
1649- @Model .register ("LLaMAForCausalLM" , "LlamaForCausalLM" , "MistralForCausalLM" , "MixtralForCausalLM" , "LlavaForConditionalGeneration" , " MobileLlamaForCausalLM" , "Idefics3ForConditionalGeneration" )
1644+ @Model .register ("LLaMAForCausalLM" , "LlamaForCausalLM" , "MistralForCausalLM" , "MixtralForCausalLM" , "MobileLlamaForCausalLM" , "Idefics3ForConditionalGeneration" )
16501645class LlamaModel (Model ):
16511646 model_arch = gguf .MODEL_ARCH .LLAMA
16521647
16531648 def __init__ (self , * args , ** kwargs ):
16541649 super ().__init__ (* args , ** kwargs )
16551650
1656- model_type = self .hparams .get ("model_type" , None )
1651+ model_type = self .hparams .get ("model_type" )
16571652 self .vision_arch = None
16581653
16591654 # only tested with https://huggingface.co/llava-hf/llava-1.5-7b-hf
@@ -1762,15 +1757,14 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
17621757 is_vision_tensor = "vision_tower" in name or "vision_model" in name
17631758
17641759 if is_vision_tensor :
1765- if name .startswith ("model.text_model" ):
1766- name = name .replace ("text_model." , "" ) # for SmolVLM
1767- else :
1768- name = name .replace ("model.vision_tower." , "" )
1760+ name = name .replace ("model.vision_tower." , "" )
17691761 if "post_layernorm" in name and self .vision_arch != gguf .MODEL_ARCH .VISION_IDEFICS3 :
17701762 return [] # skip post_layernorm
17711763
17721764 if not is_vision_tensor :
1773- if name .startswith ("language_model" ):
1765+ if name .startswith ("model.text_model" ):
1766+ name = name .replace ("text_model." , "" ) # for SmolVLM
1767+ elif name .startswith ("language_model" ):
17741768 # language model tensors, remove the prefix
17751769 name = name .replace ("language_model." , "" )
17761770 if name .endswith (("q_proj.weight" , "q_proj.bias" )):
@@ -1853,6 +1847,22 @@ def prepare_tensors(self):
18531847 raise ValueError (f"Unprocessed experts: { experts } " )
18541848
18551849
1850+ @Model .register ("LlavaForConditionalGeneration" )
1851+ class LlavaModel (LlamaModel ):
1852+ model_arch = gguf .MODEL_ARCH .LLAMA
1853+
1854+ def __init__ (self , * args , ** kwargs ):
1855+ # quick fix for llava model
1856+ # see: https://huggingface.co/llava-hf/llava-1.5-7b-hf/discussions/34
1857+ hparams = Model .load_hparams (kwargs ["dir_model" ])
1858+ if "vision_config" in hparams and hparams .get ("model_type" ) == "llava" :
1859+ text_config = hparams ["text_config" ]
1860+ text_config = AutoConfig .from_pretrained (text_config ["_name_or_path" ]).to_dict ()
1861+ kwargs ["hparams" ] = {** text_config , ** hparams }
1862+
1863+ super ().__init__ (* args , ** kwargs )
1864+
1865+
18561866@Model .register ("DeciLMForCausalLM" )
18571867class DeciModel (Model ):
18581868 model_arch = gguf .MODEL_ARCH .DECI
0 commit comments