diff --git a/requirements.txt b/requirements.txt
index 3b19883..27e1db5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
torch>=1.13.1
-huggingface-hub==0.29.1
-transformers==4.49.0
+huggingface-hub>=0.34.0,<1.0
+transformers>=4.51.0
datasets>=2.14.3
accelerate>=0.27.2
loguru==0.7.0
diff --git a/src/core/constant.py b/src/core/constant.py
index 056bbb1..29738bc 100644
--- a/src/core/constant.py
+++ b/src/core/constant.py
@@ -14,6 +14,7 @@
"Qwen/Qwen2.5-32B-Instruct",
"Qwen/Qwen2.5-72B",
"Qwen/Qwen2.5-72B-Instruct",
+ "Qwen/Qwen3-4B-Instruct-2507",
# yi 1.5
"01-ai/Yi-1.5-6B",
"01-ai/Yi-1.5-6B-Chat",
@@ -50,3 +51,59 @@
"microsoft/Phi-4-mini-instruct",
"microsoft/phi-4",
]
+
+MODEL_TEMPLATE_MAP = {
+ # Qwen
+ "Qwen/Qwen2.5-0.5B": "qwen1.5",
+ "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5",
+ "Qwen/Qwen2.5-1.5B": "qwen1.5",
+ "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5",
+ "Qwen/Qwen2.5-3B": "qwen1.5",
+ "Qwen/Qwen2.5-3B-Instruct": "qwen1.5",
+ "Qwen/Qwen2.5-7B": "qwen1.5",
+ "Qwen/Qwen2.5-7B-Instruct": "qwen1.5",
+ "Qwen/Qwen2.5-14B": "qwen1.5",
+ "Qwen/Qwen2.5-14B-Instruct": "qwen1.5",
+ "Qwen/Qwen2.5-32B": "qwen1.5",
+ "Qwen/Qwen2.5-32B-Instruct": "qwen1.5",
+ "Qwen/Qwen2.5-72B": "qwen1.5",
+ "Qwen/Qwen2.5-72B-Instruct": "qwen1.5",
+ "Qwen/Qwen3-4B-Instruct-2507": "qwen3",
+ # Yi
+ "01-ai/Yi-1.5-6B": "yi",
+ "01-ai/Yi-1.5-6B-Chat": "yi",
+ "01-ai/Yi-1.5-9B": "yi",
+ "01-ai/Yi-1.5-9B-Chat": "yi",
+ "01-ai/Yi-1.5-34B": "yi",
+ "01-ai/Yi-1.5-34B-Chat": "yi",
+ # Mistral
+ "mistralai/Mistral-7B-v0.3": "mistral",
+ "mistralai/Mistral-7B-Instruct-v0.3": "mistral",
+ "mistralai/Ministral-8B-Instruct-2410": "mistral",
+ # Mixtral
+ "mistralai/Mixtral-8x7B-v0.1": "mixtral",
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral",
+ # Gemma 2
+ "google/gemma-2-2b": "gemma",
+ "google/gemma-2-9b": "gemma",
+ "google/gemma-2-27b": "gemma",
+ "google/gemma-2-2b-it": "gemma",
+ "google/gemma-2-9b-it": "gemma",
+ "google/gemma-2-27b-it": "gemma",
+ # LLaMA 3 + 3.1
+ "meta-llama/Meta-Llama-3-8B": "llama3",
+ "meta-llama/Meta-Llama-3-8B-Instruct": "llama3",
+ "meta-llama/Meta-Llama-3-70B": "llama3",
+ "meta-llama/Meta-Llama-3-70B-Instruct": "llama3",
+ "meta-llama/Meta-Llama-3.1-8B": "llama3",
+ "meta-llama/Meta-Llama-3.1-8B-Instruct": "llama3",
+ "meta-llama/Meta-Llama-3.1-70B": "llama3",
+ "meta-llama/Meta-Llama-3.1-70B-Instruct": "llama3",
+ # Phi 3
+ "microsoft/Phi-3.5-mini-instruct": "phi3",
+ "microsoft/Phi-3-mini-4k-instruct": "phi3",
+ "microsoft/Phi-3-medium-4k-instruct": "phi3",
+ # Phi 4
+ "microsoft/Phi-4-mini-instruct": "phi4",
+ "microsoft/phi-4": "phi4",
+}
diff --git a/src/core/template.py b/src/core/template.py
index 862c52f..cbb2db1 100644
--- a/src/core/template.py
+++ b/src/core/template.py
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from typing import Dict
+from constant import MODEL_TEMPLATE_MAP
@dataclass
@@ -67,6 +68,25 @@ def register_template(
stop_word="<|im_end|>",
)
+register_template(
+ template_name="qwen3",
+ system_format="<|im_start|>system\n{content}<|im_end|>\n",
+ user_format="<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n",
+ assistant_format="{content}<|im_end|>\n",
+ tool_format=(
+ "# Tools\n\n"
+ "You may call one or more functions to assist with the user query.\n\n"
+ "You are provided with function signatures within XML tags:\n"
+ "\n{content}\n\n\n"
+ "For each function call, return a json object with function name and arguments within XML tags:\n"
+ '\n{"name": , "arguments": }\n'
+ ),
+ function_format="\n{content}\n<|im_end|>\n",
+ observation_format="<|im_start|>user\n\n{content}\n<|im_end|>\n<|im_start|>assistant\n",
+ system="You are a helpful assistant.",
+ stop_word="<|im_end|>",
+)
+
register_template(
template_name="yi",
system_format="<|im_start|>system\n{content}<|im_end|>\n",
@@ -182,3 +202,6 @@ def register_template(
system=None,
stop_word="<|end|>",
)
+
+for model_name, template_name in MODEL_TEMPLATE_MAP.items():
+ template_dict[model_name] = template_dict[template_name]