From 795c156f4ac7c214f10c3fc8615147f79193ce1d Mon Sep 17 00:00:00 2001 From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com> Date: Thu, 11 Sep 2025 15:57:48 +0100 Subject: [PATCH 1/6] Add support for Qwen3-4B-Instruct and add model-template mappings --- src/core/constant.py | 64 ++++++++++++++++++++++++++++++++++++++++++++ src/core/template.py | 4 +++ 2 files changed, 68 insertions(+) diff --git a/src/core/constant.py b/src/core/constant.py index 056bbb1..dfc44d1 100644 --- a/src/core/constant.py +++ b/src/core/constant.py @@ -14,6 +14,7 @@ "Qwen/Qwen2.5-32B-Instruct", "Qwen/Qwen2.5-72B", "Qwen/Qwen2.5-72B-Instruct", + "Qwen/Qwen3-4B-Instruct", # yi 1.5 "01-ai/Yi-1.5-6B", "01-ai/Yi-1.5-6B-Chat", @@ -50,3 +51,66 @@ "microsoft/Phi-4-mini-instruct", "microsoft/phi-4", ] + +MODEL_TEMPLATE_MAP = { + # Qwen + "Qwen/Qwen2.5-0.5B": "qwen1.5", + "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5", + "Qwen/Qwen2.5-1.5B": "qwen1.5", + "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5", + "Qwen/Qwen2.5-3B": "qwen1.5", + "Qwen/Qwen2.5-3B-Instruct": "qwen1.5", + "Qwen/Qwen2.5-7B": "qwen1.5", + "Qwen/Qwen2.5-7B-Instruct": "qwen1.5", + "Qwen/Qwen2.5-14B": "qwen1.5", + "Qwen/Qwen2.5-14B-Instruct": "qwen1.5", + "Qwen/Qwen2.5-32B": "qwen1.5", + "Qwen/Qwen2.5-32B-Instruct": "qwen1.5", + "Qwen/Qwen2.5-72B": "qwen1.5", + "Qwen/Qwen2.5-72B-Instruct": "qwen1.5", + "Qwen/Qwen3-4B-Instruct": "qwen1.5", + + # Yi + "01-ai/Yi-1.5-6B": "yi", + "01-ai/Yi-1.5-6B-Chat": "yi", + "01-ai/Yi-1.5-9B": "yi", + "01-ai/Yi-1.5-9B-Chat": "yi", + "01-ai/Yi-1.5-34B": "yi", + "01-ai/Yi-1.5-34B-Chat": "yi", + + # Mistral + "mistralai/Mistral-7B-v0.3": "mistral", + "mistralai/Mistral-7B-Instruct-v0.3": "mistral", + "mistralai/Ministral-8B-Instruct-2410": "mistral", + + # Mixtral + "mistralai/Mixtral-8x7B-v0.1": "mixtral", + "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral", + + # Gemma 2 + "google/gemma-2-2b": "gemma", + "google/gemma-2-9b": "gemma", + "google/gemma-2-27b": "gemma", + "google/gemma-2-2b-it": "gemma", + "google/gemma-2-9b-it": "gemma", + "google/gemma-2-27b-it": "gemma", + + # LLaMA 3 + 3.1 + "meta-llama/Meta-Llama-3-8B": "llama3", + "meta-llama/Meta-Llama-3-8B-Instruct": "llama3", + "meta-llama/Meta-Llama-3-70B": "llama3", + "meta-llama/Meta-Llama-3-70B-Instruct": "llama3", + "meta-llama/Meta-Llama-3.1-8B": "llama3", + "meta-llama/Meta-Llama-3.1-8B-Instruct": "llama3", + "meta-llama/Meta-Llama-3.1-70B": "llama3", + "meta-llama/Meta-Llama-3.1-70B-Instruct": "llama3", + + # Phi 3 + "microsoft/Phi-3.5-mini-instruct": "phi3", + "microsoft/Phi-3-mini-4k-instruct": "phi3", + "microsoft/Phi-3-medium-4k-instruct": "phi3", + + # Phi 4 + "microsoft/Phi-4-mini-instruct": "phi4", + "microsoft/phi-4": "phi4", +} diff --git a/src/core/template.py b/src/core/template.py index 862c52f..edbfaa7 100644 --- a/src/core/template.py +++ b/src/core/template.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from typing import Dict +from constant import MODEL_TEMPLATE_MAP @dataclass @@ -182,3 +183,6 @@ def register_template( system=None, stop_word="<|end|>", ) + +for model_name, template_name in MODEL_TEMPLATE_MAP.items(): + template_dict[model_name] = template_dict[template_name] \ No newline at end of file From 33e1251c327a35823c468c6d71b4b44234def1ad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 15:02:01 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/core/constant.py | 7 ------- src/core/template.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/core/constant.py b/src/core/constant.py index dfc44d1..5513f78 100644 --- a/src/core/constant.py +++ b/src/core/constant.py @@ -69,7 +69,6 @@ "Qwen/Qwen2.5-72B": "qwen1.5", "Qwen/Qwen2.5-72B-Instruct": "qwen1.5", "Qwen/Qwen3-4B-Instruct": "qwen1.5", - # Yi "01-ai/Yi-1.5-6B": "yi", "01-ai/Yi-1.5-6B-Chat": "yi", @@ -77,16 +76,13 @@ "01-ai/Yi-1.5-9B-Chat": "yi", "01-ai/Yi-1.5-34B": "yi", "01-ai/Yi-1.5-34B-Chat": "yi", - # Mistral "mistralai/Mistral-7B-v0.3": "mistral", "mistralai/Mistral-7B-Instruct-v0.3": "mistral", "mistralai/Ministral-8B-Instruct-2410": "mistral", - # Mixtral "mistralai/Mixtral-8x7B-v0.1": "mixtral", "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral", - # Gemma 2 "google/gemma-2-2b": "gemma", "google/gemma-2-9b": "gemma", @@ -94,7 +90,6 @@ "google/gemma-2-2b-it": "gemma", "google/gemma-2-9b-it": "gemma", "google/gemma-2-27b-it": "gemma", - # LLaMA 3 + 3.1 "meta-llama/Meta-Llama-3-8B": "llama3", "meta-llama/Meta-Llama-3-8B-Instruct": "llama3", @@ -104,12 +99,10 @@ "meta-llama/Meta-Llama-3.1-8B-Instruct": "llama3", "meta-llama/Meta-Llama-3.1-70B": "llama3", "meta-llama/Meta-Llama-3.1-70B-Instruct": "llama3", - # Phi 3 "microsoft/Phi-3.5-mini-instruct": "phi3", "microsoft/Phi-3-mini-4k-instruct": "phi3", "microsoft/Phi-3-medium-4k-instruct": "phi3", - # Phi 4 "microsoft/Phi-4-mini-instruct": "phi4", "microsoft/phi-4": "phi4", diff --git a/src/core/template.py b/src/core/template.py index edbfaa7..5563a8b 100644 --- a/src/core/template.py +++ b/src/core/template.py @@ -185,4 +185,4 @@ def register_template( ) for model_name, template_name in MODEL_TEMPLATE_MAP.items(): - template_dict[model_name] = template_dict[template_name] \ No newline at end of file + template_dict[model_name] = template_dict[template_name] From b3f44928a39c57a2fbb3103020da3a9e9f7cd2c6 Mon Sep 17 00:00:00 2001 From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com> Date: Fri, 12 Sep 2025 12:33:55 +0100 Subject: [PATCH 3/6] Add Support for Qwen3 --- src/core/constant.py | 2 +- src/core/template.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/core/constant.py b/src/core/constant.py index 5513f78..2805dcc 100644 --- a/src/core/constant.py +++ b/src/core/constant.py @@ -68,7 +68,7 @@ "Qwen/Qwen2.5-32B-Instruct": "qwen1.5", "Qwen/Qwen2.5-72B": "qwen1.5", "Qwen/Qwen2.5-72B-Instruct": "qwen1.5", - "Qwen/Qwen3-4B-Instruct": "qwen1.5", + "Qwen/Qwen3-4B-Instruct": "qwen3", # Yi "01-ai/Yi-1.5-6B": "yi", "01-ai/Yi-1.5-6B-Chat": "yi", diff --git a/src/core/template.py b/src/core/template.py index 5563a8b..d8152a8 100644 --- a/src/core/template.py +++ b/src/core/template.py @@ -68,6 +68,25 @@ def register_template( stop_word="<|im_end|>", ) +register_template( + template_name="qwen3", + system_format="<|im_start|>system\n{content}<|im_end|>\n", + user_format="<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n", + assistant_format="{content}<|im_end|>\n", + tool_format = ( + "# Tools\n\n" + "You may call one or more functions to assist with the user query.\n\n" + "You are provided with function signatures within XML tags:\n" + "\n{content}\n\n\n" + "For each function call, return a json object with function name and arguments within XML tags:\n" + "\n{\"name\": , \"arguments\": }\n" + ), + function_format="\n{content}\n<|im_end|>\n", + observation_format="<|im_start|>user\n\n{content}\n<|im_end|>\n<|im_start|>assistant\n", + system="You are a helpful assistant.", + stop_word="<|im_end|>", +) + register_template( template_name="yi", system_format="<|im_start|>system\n{content}<|im_end|>\n", From c445ff1def1b0c744f5c16c1bdab359523624866 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 11:36:30 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/core/template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/template.py b/src/core/template.py index d8152a8..cbb2db1 100644 --- a/src/core/template.py +++ b/src/core/template.py @@ -73,13 +73,13 @@ def register_template( system_format="<|im_start|>system\n{content}<|im_end|>\n", user_format="<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n", assistant_format="{content}<|im_end|>\n", - tool_format = ( + tool_format=( "# Tools\n\n" "You may call one or more functions to assist with the user query.\n\n" "You are provided with function signatures within XML tags:\n" "\n{content}\n\n\n" "For each function call, return a json object with function name and arguments within XML tags:\n" - "\n{\"name\": , \"arguments\": }\n" + '\n{"name": , "arguments": }\n' ), function_format="\n{content}\n<|im_end|>\n", observation_format="<|im_start|>user\n\n{content}\n<|im_end|>\n<|im_start|>assistant\n", From bc26deaf07cb18c60dc82f27ea94713d8ca841e5 Mon Sep 17 00:00:00 2001 From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com> Date: Tue, 16 Sep 2025 20:11:07 +0100 Subject: [PATCH 5/6] modify dependency to support Qwen3 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3b19883..490590a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ torch>=1.13.1 huggingface-hub==0.29.1 -transformers==4.49.0 +transformers>=4.51.0 datasets>=2.14.3 accelerate>=0.27.2 loguru==0.7.0 From 45bfb7bdc9739b240a6018852857d7ae75967c75 Mon Sep 17 00:00:00 2001 From: Mohammed-Faizzzz <110959467+Mohammed-Faizzzz@users.noreply.github.com> Date: Tue, 16 Sep 2025 21:03:40 +0100 Subject: [PATCH 6/6] Update repo names and update dependancies in requirements.txt --- requirements.txt | 2 +- src/core/constant.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 490590a..27e1db5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ torch>=1.13.1 -huggingface-hub==0.29.1 +huggingface-hub>=0.34.0,<1.0 transformers>=4.51.0 datasets>=2.14.3 accelerate>=0.27.2 diff --git a/src/core/constant.py b/src/core/constant.py index 2805dcc..29738bc 100644 --- a/src/core/constant.py +++ b/src/core/constant.py @@ -14,7 +14,7 @@ "Qwen/Qwen2.5-32B-Instruct", "Qwen/Qwen2.5-72B", "Qwen/Qwen2.5-72B-Instruct", - "Qwen/Qwen3-4B-Instruct", + "Qwen/Qwen3-4B-Instruct-2507", # yi 1.5 "01-ai/Yi-1.5-6B", "01-ai/Yi-1.5-6B-Chat", @@ -68,7 +68,7 @@ "Qwen/Qwen2.5-32B-Instruct": "qwen1.5", "Qwen/Qwen2.5-72B": "qwen1.5", "Qwen/Qwen2.5-72B-Instruct": "qwen1.5", - "Qwen/Qwen3-4B-Instruct": "qwen3", + "Qwen/Qwen3-4B-Instruct-2507": "qwen3", # Yi "01-ai/Yi-1.5-6B": "yi", "01-ai/Yi-1.5-6B-Chat": "yi",