FLock-io · astridesa · Sep 11, 2025 · Sep 11, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 torch>=1.13.1
-huggingface-hub==0.29.1
-transformers==4.49.0
+huggingface-hub>=0.34.0,<1.0
+transformers>=4.51.0
 datasets>=2.14.3
 accelerate>=0.27.2
 loguru==0.7.0

diff --git a/src/core/constant.py b/src/core/constant.py
@@ -14,6 +14,7 @@
     "Qwen/Qwen2.5-32B-Instruct",
     "Qwen/Qwen2.5-72B",
     "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen3-4B-Instruct-2507",
     # yi 1.5
     "01-ai/Yi-1.5-6B",
     "01-ai/Yi-1.5-6B-Chat",
@@ -50,3 +51,59 @@
     "microsoft/Phi-4-mini-instruct",
     "microsoft/phi-4",
 ]
+
+MODEL_TEMPLATE_MAP = {
+    # Qwen
+    "Qwen/Qwen2.5-0.5B": "qwen1.5",
+    "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-1.5B": "qwen1.5",
+    "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-3B": "qwen1.5",
+    "Qwen/Qwen2.5-3B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-7B": "qwen1.5",
+    "Qwen/Qwen2.5-7B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-14B": "qwen1.5",
+    "Qwen/Qwen2.5-14B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-32B": "qwen1.5",
+    "Qwen/Qwen2.5-32B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-72B": "qwen1.5",
+    "Qwen/Qwen2.5-72B-Instruct": "qwen1.5",
+    "Qwen/Qwen3-4B-Instruct-2507": "qwen3",
+    # Yi
+    "01-ai/Yi-1.5-6B": "yi",
+    "01-ai/Yi-1.5-6B-Chat": "yi",
+    "01-ai/Yi-1.5-9B": "yi",
+    "01-ai/Yi-1.5-9B-Chat": "yi",
+    "01-ai/Yi-1.5-34B": "yi",
+    "01-ai/Yi-1.5-34B-Chat": "yi",
+    # Mistral
+    "mistralai/Mistral-7B-v0.3": "mistral",
+    "mistralai/Mistral-7B-Instruct-v0.3": "mistral",
+    "mistralai/Ministral-8B-Instruct-2410": "mistral",
+    # Mixtral
+    "mistralai/Mixtral-8x7B-v0.1": "mixtral",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral",
+    # Gemma 2
+    "google/gemma-2-2b": "gemma",
+    "google/gemma-2-9b": "gemma",
+    "google/gemma-2-27b": "gemma",
+    "google/gemma-2-2b-it": "gemma",
+    "google/gemma-2-9b-it": "gemma",
+    "google/gemma-2-27b-it": "gemma",
+    # LLaMA 3 + 3.1
+    "meta-llama/Meta-Llama-3-8B": "llama3",
+    "meta-llama/Meta-Llama-3-8B-Instruct": "llama3",
+    "meta-llama/Meta-Llama-3-70B": "llama3",
+    "meta-llama/Meta-Llama-3-70B-Instruct": "llama3",
+    "meta-llama/Meta-Llama-3.1-8B": "llama3",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct": "llama3",
+    "meta-llama/Meta-Llama-3.1-70B": "llama3",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct": "llama3",
+    # Phi 3
+    "microsoft/Phi-3.5-mini-instruct": "phi3",
+    "microsoft/Phi-3-mini-4k-instruct": "phi3",
+    "microsoft/Phi-3-medium-4k-instruct": "phi3",
+    # Phi 4
+    "microsoft/Phi-4-mini-instruct": "phi4",
+    "microsoft/phi-4": "phi4",
+}
diff --git a/src/core/dataset.py b/src/core/dataset.py
@@ -43,13 +43,15 @@ def __getitem__(self, index):
                 target_mask = [0] * len(input_ids)
 
         # setting tool information
-        if "tools" in data.keys() and data["tools"]:
+        if "tools" in data.keys():
             tools = json.loads(data["tools"])
-            tool_prompt = tool_formater(tools)
-            tool_text = self.tool_format.format(content=tool_prompt)
-            tool_tokens = self.tokenizer.encode(tool_text, add_special_tokens=False)
-            input_ids = input_ids + tool_tokens
-            target_mask = target_mask + [0] * len(tool_tokens)
+            if tools:
+                # tool_prompt = tool_formater(tools)
+                tool_prompt = json.dumps(tools)
+                tool_text = self.tool_format.format(content=tool_prompt)
+                tool_tokens = self.tokenizer.encode(tool_text, add_special_tokens=False)
+                input_ids = input_ids + tool_tokens
+                target_mask = target_mask + [0] * len(tool_tokens)
 
         conversations = data["conversations"]
 
@@ -65,7 +67,8 @@ def __getitem__(self, index):
                     input_buffer += human
 
                 elif role == "function_call":
-                    tool_calls = function_formatter(json.loads(content))
+                    # tool_calls = function_formatter(json.loads(content))
+                    tool_calls = content
                     function = self.function_format.format(content=tool_calls)
                     input_buffer += function
 

diff --git a/src/core/template.py b/src/core/template.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from typing import Dict
+from .constant import MODEL_TEMPLATE_MAP
 
 
 @dataclass
@@ -67,6 +68,25 @@ def register_template(
     stop_word="<|im_end|>",
 )
 
+register_template(
+    template_name="qwen3",
+    system_format="<|im_start|>system\n{content}<|im_end|>\n",
+    user_format="<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n",
+    assistant_format="{content}<|im_end|>\n",
+    tool_format=(
+        "# Tools\n\n"
+        "You may call one or more functions to assist with the user query.\n\n"
+        "You are provided with function signatures within <tools></tools> XML tags:\n"
+        "<tools>\n{content}\n</tools>\n\n"
+        "For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n"
+        '<tool_call>\n{{"name": <function-name>, "arguments": <args-json-object>}}\n</tool_call>'
+    ),
+    function_format="<tool_call>\n{content}\n</tool_call><|im_end|>\n",
+    observation_format="<|im_start|>user\n<tool_response>\n{content}\n</tool_response><|im_end|>\n<|im_start|>assistant\n",
+    system="You are a helpful assistant.",
+    stop_word="<|im_end|>",
+)
+
 register_template(
     template_name="yi",
     system_format="<|im_start|>system\n{content}<|im_end|>\n",
@@ -182,3 +202,6 @@ def register_template(
     system=None,
     stop_word="<|end|>",
 )
+
+for model_name, template_name in MODEL_TEMPLATE_MAP.items():
+    template_dict[model_name] = template_dict[template_name]