Skip to content

Commit 742d16b

Browse files
committed
Merge branch 'main' into release/2.3
2 parents e4e4c04 + e457716 commit 742d16b

File tree

15 files changed

+229
-154
lines changed

15 files changed

+229
-154
lines changed

docs/source/LLM/支持的模型和数据集.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,12 @@
373373
|llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
374374
|llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
375375
|llava1_6-yi-34b-instruct|[swift/llava-v1.6-34b-hf](https://modelscope.cn/models/swift/llava-v1.6-34b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-yi|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf)|
376-
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
377-
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen-instruct|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
378-
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen-instruct|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
376+
|llama3-llava-next-8b-hf|[swift/llama3-llava-next-8b-hf](https://modelscope.cn/models/swift/llama3-llava-next-8b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-llava-next-hf|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llama3-llava-next-8b-hf](https://huggingface.co/llava-hf/llama3-llava-next-8b-hf)|
377+
|llava-next-72b-hf|[AI-ModelScope/llava-next-72b-hf](https://modelscope.cn/models/AI-ModelScope/llava-next-72b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-qwen-hf|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-next-72b-hf](https://huggingface.co/llava-hf/llava-next-72b-hf)|
378+
|llava-next-110b-hf|[AI-ModelScope/llava-next-110b-hf](https://modelscope.cn/models/AI-ModelScope/llava-next-110b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-qwen-hf|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-next-110b-hf](https://huggingface.co/llava-hf/llava-next-110b-hf)|
379+
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
380+
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
381+
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
379382
|llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
380383
|llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
381384
|llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|

docs/source_en/LLM/Supported-models-datasets.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,12 @@ The table below introcudes all models supported by SWIFT:
373373
|llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
374374
|llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
375375
|llava1_6-yi-34b-instruct|[swift/llava-v1.6-34b-hf](https://modelscope.cn/models/swift/llava-v1.6-34b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-yi|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf)|
376-
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
377-
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen-instruct|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
378-
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen-instruct|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
376+
|llama3-llava-next-8b-hf|[swift/llama3-llava-next-8b-hf](https://modelscope.cn/models/swift/llama3-llava-next-8b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-llava-next-hf|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llama3-llava-next-8b-hf](https://huggingface.co/llava-hf/llama3-llava-next-8b-hf)|
377+
|llava-next-72b-hf|[AI-ModelScope/llava-next-72b-hf](https://modelscope.cn/models/AI-ModelScope/llava-next-72b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-qwen-hf|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-next-72b-hf](https://huggingface.co/llava-hf/llava-next-72b-hf)|
378+
|llava-next-110b-hf|[AI-ModelScope/llava-next-110b-hf](https://modelscope.cn/models/AI-ModelScope/llava-next-110b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama-qwen-hf|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-next-110b-hf](https://huggingface.co/llava-hf/llava-next-110b-hf)|
379+
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
380+
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
381+
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
379382
|llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
380383
|llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
381384
|llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|

swift/cli/main.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,11 @@ def cli_main() -> None:
5353
argv = ['--rlhf_type', method_name] + argv
5454
file_path = importlib.util.find_spec(ROUTE_MAPPING[method_name]).origin
5555
torchrun_args = get_torchrun_args()
56+
python_cmd = sys.executable
5657
if torchrun_args is None or method_name not in ('pt', 'sft', 'dpo', 'orpo', 'simpo', 'rlhf'):
57-
try:
58-
python_cmd = 'python'
59-
subprocess.run(
60-
[python_cmd, '--version'],
61-
stdout=subprocess.PIPE,
62-
stderr=subprocess.PIPE,
63-
)
64-
except FileNotFoundError:
65-
python_cmd = 'python3'
6658
args = [python_cmd, file_path, *argv]
6759
else:
68-
args = ['torchrun', *torchrun_args, file_path, *argv]
60+
args = [python_cmd, '-m', 'torch.distributed.run', *torchrun_args, file_path, *argv]
6961
print(f"run sh: `{' '.join(args)}`", flush=True)
7062
result = subprocess.run(args)
7163
if result.returncode != 0:

swift/llm/rlhf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ def llm_rlhf(args: RLHFArguments) -> Dict[str, Any]:
191191
You can also use the --model_type parameter to specify the template.')
192192
template: Template = get_template(
193193
'chatml', tokenizer, args.system, args.max_length, args.truncation_strategy, model=model)
194+
template._is_training = True
194195
args.system = template.default_system
195196
logger.info(f'system: {args.system}')
196197

swift/llm/sft.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ def llm_sft(args: SftArguments) -> Dict[str, Any]:
296296
args.truncation_strategy,
297297
model=model,
298298
**template_kwargs)
299+
template._is_training = True
299300
if streaming:
300301
template.encode = partial(template.encode, streaming=streaming)
301302
args.system = template.default_system

swift/llm/utils/argument.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,10 @@ def __post_init__(self) -> None:
988988
self.learning_rate = 1e-5
989989
if self.save_only_model is None:
990990
self.save_only_model = True
991+
logger.warning(
992+
'Due to the adoption of full-parameter training, '
993+
'in order to avoid saving excessive weights, we set save_only_model to True. '
994+
'If you want to resume training from a checkpoint, please manually pass `--save_only_model false`.')
991995
if self.eval_steps is None:
992996
self.eval_steps = 200
993997
else:

0 commit comments

Comments
 (0)