From 795c156f4ac7c214f10c3fc8615147f79193ce1d Mon Sep 17 00:00:00 2001
From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com>
Date: Thu, 11 Sep 2025 15:57:48 +0100
Subject: [PATCH 1/6] Add support for Qwen3-4B-Instruct and add model-template
 mappings

---
 src/core/constant.py | 64 ++++++++++++++++++++++++++++++++++++++++++++
 src/core/template.py |  4 +++
 2 files changed, 68 insertions(+)

diff --git a/src/core/constant.py b/src/core/constant.py
index 056bbb1..dfc44d1 100644
--- a/src/core/constant.py
+++ b/src/core/constant.py
@@ -14,6 +14,7 @@
     "Qwen/Qwen2.5-32B-Instruct",
     "Qwen/Qwen2.5-72B",
     "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen3-4B-Instruct",
     # yi 1.5
     "01-ai/Yi-1.5-6B",
     "01-ai/Yi-1.5-6B-Chat",
@@ -50,3 +51,66 @@
     "microsoft/Phi-4-mini-instruct",
     "microsoft/phi-4",
 ]
+
+MODEL_TEMPLATE_MAP = {
+    # Qwen
+    "Qwen/Qwen2.5-0.5B": "qwen1.5",
+    "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-1.5B": "qwen1.5",
+    "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-3B": "qwen1.5",
+    "Qwen/Qwen2.5-3B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-7B": "qwen1.5",
+    "Qwen/Qwen2.5-7B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-14B": "qwen1.5",
+    "Qwen/Qwen2.5-14B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-32B": "qwen1.5",
+    "Qwen/Qwen2.5-32B-Instruct": "qwen1.5",
+    "Qwen/Qwen2.5-72B": "qwen1.5",
+    "Qwen/Qwen2.5-72B-Instruct": "qwen1.5",
+    "Qwen/Qwen3-4B-Instruct": "qwen1.5",
+
+    # Yi
+    "01-ai/Yi-1.5-6B": "yi",
+    "01-ai/Yi-1.5-6B-Chat": "yi",
+    "01-ai/Yi-1.5-9B": "yi",
+    "01-ai/Yi-1.5-9B-Chat": "yi",
+    "01-ai/Yi-1.5-34B": "yi",
+    "01-ai/Yi-1.5-34B-Chat": "yi",
+
+    # Mistral
+    "mistralai/Mistral-7B-v0.3": "mistral",
+    "mistralai/Mistral-7B-Instruct-v0.3": "mistral",
+    "mistralai/Ministral-8B-Instruct-2410": "mistral",
+
+    # Mixtral
+    "mistralai/Mixtral-8x7B-v0.1": "mixtral",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral",
+
+    # Gemma 2
+    "google/gemma-2-2b": "gemma",
+    "google/gemma-2-9b": "gemma",
+    "google/gemma-2-27b": "gemma",
+    "google/gemma-2-2b-it": "gemma",
+    "google/gemma-2-9b-it": "gemma",
+    "google/gemma-2-27b-it": "gemma",
+
+    # LLaMA 3 + 3.1
+    "meta-llama/Meta-Llama-3-8B": "llama3",
+    "meta-llama/Meta-Llama-3-8B-Instruct": "llama3",
+    "meta-llama/Meta-Llama-3-70B": "llama3",
+    "meta-llama/Meta-Llama-3-70B-Instruct": "llama3",
+    "meta-llama/Meta-Llama-3.1-8B": "llama3",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct": "llama3",
+    "meta-llama/Meta-Llama-3.1-70B": "llama3",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct": "llama3",
+
+    # Phi 3
+    "microsoft/Phi-3.5-mini-instruct": "phi3",
+    "microsoft/Phi-3-mini-4k-instruct": "phi3",
+    "microsoft/Phi-3-medium-4k-instruct": "phi3",
+
+    # Phi 4
+    "microsoft/Phi-4-mini-instruct": "phi4",
+    "microsoft/phi-4": "phi4",
+}
diff --git a/src/core/template.py b/src/core/template.py
index 862c52f..edbfaa7 100644
--- a/src/core/template.py
+++ b/src/core/template.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from typing import Dict
+from constant import MODEL_TEMPLATE_MAP
 
 
 @dataclass
@@ -182,3 +183,6 @@ def register_template(
     system=None,
     stop_word="<|end|>",
 )
+
+for model_name, template_name in MODEL_TEMPLATE_MAP.items():
+    template_dict[model_name] = template_dict[template_name]
\ No newline at end of file

From 33e1251c327a35823c468c6d71b4b44234def1ad Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Sep 2025 15:02:01 +0000
Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/core/constant.py | 7 -------
 src/core/template.py | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/core/constant.py b/src/core/constant.py
index dfc44d1..5513f78 100644
--- a/src/core/constant.py
+++ b/src/core/constant.py
@@ -69,7 +69,6 @@
     "Qwen/Qwen2.5-72B": "qwen1.5",
     "Qwen/Qwen2.5-72B-Instruct": "qwen1.5",
     "Qwen/Qwen3-4B-Instruct": "qwen1.5",
-
     # Yi
     "01-ai/Yi-1.5-6B": "yi",
     "01-ai/Yi-1.5-6B-Chat": "yi",
@@ -77,16 +76,13 @@
     "01-ai/Yi-1.5-9B-Chat": "yi",
     "01-ai/Yi-1.5-34B": "yi",
     "01-ai/Yi-1.5-34B-Chat": "yi",
-
     # Mistral
     "mistralai/Mistral-7B-v0.3": "mistral",
     "mistralai/Mistral-7B-Instruct-v0.3": "mistral",
     "mistralai/Ministral-8B-Instruct-2410": "mistral",
-
     # Mixtral
     "mistralai/Mixtral-8x7B-v0.1": "mixtral",
     "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral",
-
     # Gemma 2
     "google/gemma-2-2b": "gemma",
     "google/gemma-2-9b": "gemma",
@@ -94,7 +90,6 @@
     "google/gemma-2-2b-it": "gemma",
     "google/gemma-2-9b-it": "gemma",
     "google/gemma-2-27b-it": "gemma",
-
     # LLaMA 3 + 3.1
     "meta-llama/Meta-Llama-3-8B": "llama3",
     "meta-llama/Meta-Llama-3-8B-Instruct": "llama3",
@@ -104,12 +99,10 @@
     "meta-llama/Meta-Llama-3.1-8B-Instruct": "llama3",
     "meta-llama/Meta-Llama-3.1-70B": "llama3",
     "meta-llama/Meta-Llama-3.1-70B-Instruct": "llama3",
-
     # Phi 3
     "microsoft/Phi-3.5-mini-instruct": "phi3",
     "microsoft/Phi-3-mini-4k-instruct": "phi3",
     "microsoft/Phi-3-medium-4k-instruct": "phi3",
-
     # Phi 4
     "microsoft/Phi-4-mini-instruct": "phi4",
     "microsoft/phi-4": "phi4",
diff --git a/src/core/template.py b/src/core/template.py
index edbfaa7..5563a8b 100644
--- a/src/core/template.py
+++ b/src/core/template.py
@@ -185,4 +185,4 @@ def register_template(
 )
 
 for model_name, template_name in MODEL_TEMPLATE_MAP.items():
-    template_dict[model_name] = template_dict[template_name]
\ No newline at end of file
+    template_dict[model_name] = template_dict[template_name]

From b3f44928a39c57a2fbb3103020da3a9e9f7cd2c6 Mon Sep 17 00:00:00 2001
From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com>
Date: Fri, 12 Sep 2025 12:33:55 +0100
Subject: [PATCH 3/6] Add Support for Qwen3

---
 src/core/constant.py |  2 +-
 src/core/template.py | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/core/constant.py b/src/core/constant.py
index 5513f78..2805dcc 100644
--- a/src/core/constant.py
+++ b/src/core/constant.py
@@ -68,7 +68,7 @@
     "Qwen/Qwen2.5-32B-Instruct": "qwen1.5",
     "Qwen/Qwen2.5-72B": "qwen1.5",
     "Qwen/Qwen2.5-72B-Instruct": "qwen1.5",
-    "Qwen/Qwen3-4B-Instruct": "qwen1.5",
+    "Qwen/Qwen3-4B-Instruct": "qwen3",
     # Yi
     "01-ai/Yi-1.5-6B": "yi",
     "01-ai/Yi-1.5-6B-Chat": "yi",
diff --git a/src/core/template.py b/src/core/template.py
index 5563a8b..d8152a8 100644
--- a/src/core/template.py
+++ b/src/core/template.py
@@ -68,6 +68,25 @@ def register_template(
     stop_word="<|im_end|>",
 )
 
+register_template(
+    template_name="qwen3",
+    system_format="<|im_start|>system\n{content}<|im_end|>\n",
+    user_format="<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n",
+    assistant_format="{content}<|im_end|>\n",
+    tool_format = (
+        "# Tools\n\n"
+        "You may call one or more functions to assist with the user query.\n\n"
+        "You are provided with function signatures within <tools></tools> XML tags:\n"
+        "<tools>\n{content}\n</tools>\n\n"
+        "For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n"
+        "<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>"
+    ),
+    function_format="<tool_call>\n{content}\n</tool_call><|im_end|>\n",
+    observation_format="<|im_start|>user\n<tool_response>\n{content}\n</tool_response><|im_end|>\n<|im_start|>assistant\n",
+    system="You are a helpful assistant.",
+    stop_word="<|im_end|>",
+)
+
 register_template(
     template_name="yi",
     system_format="<|im_start|>system\n{content}<|im_end|>\n",

From c445ff1def1b0c744f5c16c1bdab359523624866 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 12 Sep 2025 11:36:30 +0000
Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/core/template.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/template.py b/src/core/template.py
index d8152a8..cbb2db1 100644
--- a/src/core/template.py
+++ b/src/core/template.py
@@ -73,13 +73,13 @@ def register_template(
     system_format="<|im_start|>system\n{content}<|im_end|>\n",
     user_format="<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n",
     assistant_format="{content}<|im_end|>\n",
-    tool_format = (
+    tool_format=(
         "# Tools\n\n"
         "You may call one or more functions to assist with the user query.\n\n"
         "You are provided with function signatures within <tools></tools> XML tags:\n"
         "<tools>\n{content}\n</tools>\n\n"
         "For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n"
-        "<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>"
+        '<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call>'
     ),
     function_format="<tool_call>\n{content}\n</tool_call><|im_end|>\n",
     observation_format="<|im_start|>user\n<tool_response>\n{content}\n</tool_response><|im_end|>\n<|im_start|>assistant\n",

From bc26deaf07cb18c60dc82f27ea94713d8ca841e5 Mon Sep 17 00:00:00 2001
From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com>
Date: Tue, 16 Sep 2025 20:11:07 +0100
Subject: [PATCH 5/6] modify dependency to support Qwen3

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3b19883..490590a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 torch>=1.13.1
 huggingface-hub==0.29.1
-transformers==4.49.0
+transformers>=4.51.0
 datasets>=2.14.3
 accelerate>=0.27.2
 loguru==0.7.0

From 45bfb7bdc9739b240a6018852857d7ae75967c75 Mon Sep 17 00:00:00 2001
From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com>
Date: Tue, 16 Sep 2025 21:03:40 +0100
Subject: [PATCH 6/6] Update repo names and update dependancies in
 requirements.txt

---
 requirements.txt     | 2 +-
 src/core/constant.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 490590a..27e1db5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 torch>=1.13.1
-huggingface-hub==0.29.1
+huggingface-hub>=0.34.0,<1.0
 transformers>=4.51.0
 datasets>=2.14.3
 accelerate>=0.27.2
diff --git a/src/core/constant.py b/src/core/constant.py
index 2805dcc..29738bc 100644
--- a/src/core/constant.py
+++ b/src/core/constant.py
@@ -14,7 +14,7 @@
     "Qwen/Qwen2.5-32B-Instruct",
     "Qwen/Qwen2.5-72B",
     "Qwen/Qwen2.5-72B-Instruct",
-    "Qwen/Qwen3-4B-Instruct",
+    "Qwen/Qwen3-4B-Instruct-2507",
     # yi 1.5
     "01-ai/Yi-1.5-6B",
     "01-ai/Yi-1.5-6B-Chat",
@@ -68,7 +68,7 @@
     "Qwen/Qwen2.5-32B-Instruct": "qwen1.5",
     "Qwen/Qwen2.5-72B": "qwen1.5",
     "Qwen/Qwen2.5-72B-Instruct": "qwen1.5",
-    "Qwen/Qwen3-4B-Instruct": "qwen3",
+    "Qwen/Qwen3-4B-Instruct-2507": "qwen3",
     # Yi
     "01-ai/Yi-1.5-6B": "yi",
     "01-ai/Yi-1.5-6B-Chat": "yi",