Add reflection model (#1973)

tastelikefeet · web-flow · commit 38b32da92baa · 2024-09-07T19:50:01.000+08:00
diff --git a/README.md b/README.md
@@ -55,6 +55,7 @@ You can contact us and communicate with us by adding our group:
 <img src="asset/discord_qr.jpg" width="200" height="200">  |  <img src="asset/wechat.png" width="200" height="200">
 
 ## 🎉 News
+- 2024.09.07: Support the `Reflection-llama3-70b` model, use by `swift sft/infer --model_type reflection-llama_3_1-70b`.
 - 2024.09.06: Support fine-tuning and inference for mplug-owl3. Best practices can be found [here](https://github.com/modelscope/ms-swift/issues/1969).
 - 2024.09.05: Support for the minicpm3-4b model. Experience it using `swift infer --model_type minicpm3-4b`.
 - 2024.09.05: Support for the yi-coder series models. Experience it using `swift infer --model_type yi-coder-1_5b-chat`.
diff --git a/README_CN.md b/README_CN.md
@@ -56,6 +56,7 @@ SWIFT具有丰富全面的文档，请查看我们的文档网站:
 
 
 ## 🎉 新闻
+- 2024.09.07: 支持`Reflection-llama3-70b`模型， 使用`swift sft/infer --model_type reflection-llama_3_1-70b`命令即可训练和推理.
 - 2024.09.06: 支持mplug-owl3的微调和推理, 最佳实践可以查看[这里](https://github.com/modelscope/ms-swift/issues/1969).
 - 2024.09.05: 支持minicpm3-4b模型. 使用`swift infer --model_type minicpm3-4b`进行体验.
 - 2024.09.05: 支持yi-coder系列模型. 使用`swift infer --model_type yi-coder-1_5b-chat`进行体验.
diff --git a/docs/source/Instruction/支持的模型和数据集.md b/docs/source/Instruction/支持的模型和数据集.md
@@ -154,6 +154,7 @@
 |llama3_1-405b-instruct-awq|[LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4)|
 |llama3_1-405b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4)|
 |llama3_1-405b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4)|
+|reflection-llama_3_1-70b|[LLM-Research/Reflection-Llama-3.1-70B](https://modelscope.cn/models/LLM-Research/Reflection-Llama-3.1-70B/summary)|q_proj, k_proj, v_proj|reflection|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mattshumer/Reflection-Llama-3.1-70B](https://huggingface.co/mattshumer/Reflection-Llama-3.1-70B)|
 |longwriter-glm4-9b|[ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b/summary)|query_key_value|chatglm4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.42|-|[THUDM/LongWriter-glm4-9b](https://huggingface.co/THUDM/LongWriter-glm4-9b)|
 |longwriter-llama3_1-8b|[ZhipuAI/LongWriter-llama3.1-8b](https://modelscope.cn/models/ZhipuAI/LongWriter-llama3.1-8b/summary)|q_proj, k_proj, v_proj|longwriter-llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43|-|[THUDM/LongWriter-llama3.1-8b](https://huggingface.co/THUDM/LongWriter-llama3.1-8b)|
 |chinese-llama-2-1_3b|[AI-ModelScope/chinese-llama-2-1.3b](https://modelscope.cn/models/AI-ModelScope/chinese-llama-2-1.3b/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[hfl/chinese-llama-2-1.3b](https://huggingface.co/hfl/chinese-llama-2-1.3b)|
diff --git a/docs/source_en/Instruction/Supported-models-datasets.md b/docs/source_en/Instruction/Supported-models-datasets.md
@@ -154,6 +154,7 @@ The table below introcudes all models supported by SWIFT:
 |llama3_1-405b-instruct-awq|[LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4)|
 |llama3_1-405b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4)|
 |llama3_1-405b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4)|
+|reflection-llama_3_1-70b|[LLM-Research/Reflection-Llama-3.1-70B](https://modelscope.cn/models/LLM-Research/Reflection-Llama-3.1-70B/summary)|q_proj, k_proj, v_proj|reflection|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mattshumer/Reflection-Llama-3.1-70B](https://huggingface.co/mattshumer/Reflection-Llama-3.1-70B)|
 |longwriter-glm4-9b|[ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b/summary)|query_key_value|chatglm4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.42|-|[THUDM/LongWriter-glm4-9b](https://huggingface.co/THUDM/LongWriter-glm4-9b)|
 |longwriter-llama3_1-8b|[ZhipuAI/LongWriter-llama3.1-8b](https://modelscope.cn/models/ZhipuAI/LongWriter-llama3.1-8b/summary)|q_proj, k_proj, v_proj|longwriter-llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43|-|[THUDM/LongWriter-llama3.1-8b](https://huggingface.co/THUDM/LongWriter-llama3.1-8b)|
 |chinese-llama-2-1_3b|[AI-ModelScope/chinese-llama-2-1.3b](https://modelscope.cn/models/AI-ModelScope/chinese-llama-2-1.3b/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[hfl/chinese-llama-2-1.3b](https://huggingface.co/hfl/chinese-llama-2-1.3b)|
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -208,6 +208,8 @@ class ModelType:
     llama3_1_405b_instruct_awq = 'llama3_1-405b-instruct-awq'
     llama3_1_405b_instruct_gptq_int4 = 'llama3_1-405b-instruct-gptq-int4'
     llama3_1_405b_instruct_bnb = 'llama3_1-405b-instruct-bnb'
+    # reflection
+    reflection_llama_3_1_70b = 'reflection-llama_3_1-70b'
     # long writer
     longwriter_glm4_9b = 'longwriter-glm4-9b'
     longwriter_llama3_1_8b = 'longwriter-llama3_1-8b'
@@ -4720,6 +4722,15 @@ def get_model_tokenizer_deepseek_vl(model_dir: str,
     support_vllm=True,
     requires=['transformers>=4.43', 'bitsandbytes'],
     hf_model_id='unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit')
+@register_model(
+    ModelType.reflection_llama_3_1_70b,
+    'LLM-Research/Reflection-Llama-3.1-70B',
+    LoRATM.llama,
+    TemplateType.reflection,
+    support_flash_attn=True,
+    support_vllm=True,
+    requires=['transformers>=4.43'],
+    hf_model_id='mattshumer/Reflection-Llama-3.1-70B')
 @register_model(
     ModelType.llama3_1_70b_instruct_gptq_int4,
     'LLM-Research/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4',
diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py
@@ -58,6 +58,7 @@ class TemplateType:
     codegeex4 = 'codegeex4'
     llama = 'llama'  # llama2
     llama3 = 'llama3'
+    reflection = 'reflection'
     longwriter_llama3 = 'longwriter-llama3'
     # llava-hf
     llava1_5 = 'llava1_5'
@@ -1622,6 +1623,14 @@ class Llama3Template(Llama3TemplateMixin, Template):
     pass
 
 
+class ReflectionTemplate(Llama3TemplateMixin, Template):
+    system = ('You are a world-class AI system, capable of complex reasoning and reflection. '
+              'Reason through the query inside <thinking> tags, and then provide your final '
+              'response inside <output> tags. If you detect that you made a mistake in your reasoning '
+              'at any point, correct yourself inside <reflection> tags.')
+
+
+register_template(TemplateType.reflection, ReflectionTemplate())
 register_template(TemplateType.llama3, Llama3Template())
 
 OPENBUDDY_DEFAULT_SYSTEM = (