give up, chat template is shit

ngxson · ngxson · commit 992fde1737c0 · 2025-05-26T23:18:52.000+02:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -432,6 +432,9 @@ def load_hparams(dir_model: Path):
                 if "llm_config" in config:
                     # rename for InternVL
                     config["text_config"] = config["llm_config"]
+                if "language_config" in config:
+                    # rename for Janus Pro
+                    config["text_config"] = config["language_config"]
                 return config
 
     @classmethod
@@ -1975,6 +1978,31 @@ def prepare_tensors(self):
                 raise ValueError(f"Unprocessed experts: {experts}")
 
 
+@ModelBase.register("JanusProForCausalLM")
+class JanusProModel(TextModel):
+    model_arch = gguf.MODEL_ARCH.LLAMA
+    undo_permute = True
+    
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
+        self.hparams["num_key_value_heads"] = self.hparams.get("num_key_value_heads", 32)
+        self.hparams["hidden_size"] = self.hparams.get("hidden_size", 4096)
+        self.hparams["intermediate_size"] = self.hparams.get("intermediate_size", 11008)
+        self.hparams["rms_norm_eps"] = self.hparams.get("rms_norm_eps", 1e-6)
+
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        self.gguf_writer.add_chat_template("janus-pro")
+
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        if "language_model." in name:
+            name = name.replace("language_model.", "")
+            return super().modify_tensors(data_torch, name, bid)
+        else:
+            return []
+
+
 @ModelBase.register(
     "LlavaForConditionalGeneration", # pixtral
     "Mistral3ForConditionalGeneration", # mistral small 3.1
@@ -6222,6 +6250,9 @@ def split_str_to_n_bytes(split_str: str) -> int:
 
 
 def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> str:
+    # exception: Janus Pro
+    if "aligner_config" in hparams:
+        return "JanusProForCausalLM"
     # TODO @ngxson : this won't work correctly if the model has both audio & vision encoders
     # maybe we should fallback to text model's arch in that case, since not many models have both
     text_config = hparams.get("text_config", {})
diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
@@ -64,6 +64,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "bailing",           LLM_CHAT_TEMPLATE_BAILING           },
     { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
     { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
+    { "janus-pro",         LLM_CHAT_TEMPLATE_JANUS_PRO         },
 };
 
 llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -508,6 +509,22 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << LU8("<｜Assistant｜>");
         }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_JANUS_PRO) {
+        // variant of DeepSeek-V2, used by Janus Pro
+        for (auto message : chat) {
+            std::string role(message->role);
+            ss << LU8("<｜begin▁of▁sentence｜>");
+            if (role == "system") {
+                //ss << message->content << "\n\n";
+            } else if (role == "user") {
+                ss << "<|User|>" << message->content << "\n\n";
+            } else if (role == "assistant") {
+                ss << "<|Assistant|>" << message->content << LU8("<｜end▁of▁sentence｜>");
+            }
+        }
+        if (add_ass) {
+            ss << "<|Assistant|>";
+        }
     } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
         // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
         // EXAONE-3.0-7.8B-Instruct
diff --git a/src/llama-chat.h b/src/llama-chat.h
@@ -43,6 +43,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_BAILING,
     LLM_CHAT_TEMPLATE_LLAMA4,
     LLM_CHAT_TEMPLATE_SMOLVLM,
+    LLM_CHAT_TEMPLATE_JANUS_PRO,
     LLM_CHAT_TEMPLATE_UNKNOWN,
 };