llm support multimodal (#32)

Jintao-Huang · web-flow · commit 585f6a199430 · 2023-08-25T10:12:18.000+08:00
diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md
@@ -16,9 +16,9 @@
 
 ## Features
 1. supported sft method: [lora](https://arxiv.org/abs/2106.09685), [qlora](https://arxiv.org/abs/2305.14314), full(full parameter fine tuning), ...
-2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b
+2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, qwen-vl, **qwen-vl-chat**, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b
 3. supported feature: quantization, ddp, model parallelism(device map), gradient checkpoint, gradient accumulation steps, push to modelscope hub, custom datasets, ...
-4. supported datasets: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, cot-en, cot-zh
+4. supported datasets: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, cot-en, cot-zh, coco-en
 5. supported templates: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default
 
 ## Prepare the Environment
diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md
@@ -17,9 +17,9 @@
 
 ## 特性
 1. [lora](https://arxiv.org/abs/2106.09685), [qlora](https://arxiv.org/abs/2305.14314), 全参数微调, ...
-2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b
+2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, qwen-vl, **qwen-vl-chat**, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b
 3. 支持的特性: 模型量化, DDP, 模型并行(device_map), gradient checkpoint, 梯度累加, 支持推送modelscope hub, 支持自定义数据集, ...
-4. 支持的数据集: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, cot-en, cot-zh
+4. 支持的数据集: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, cot-en, cot-zh, coco-en
 5. 支持的template: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default
 
 ## 准备实验环境
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 32 \
+    --lora_alpha 16 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --batch_size 1 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
@@ -13,7 +13,7 @@ python src/llm_sft.py \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 32 \
+    --lora_alpha 16 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --batch_size 1 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 32 \
+    --lora_alpha 16 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --batch_size 1 \
diff --git a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh
@@ -0,0 +1,18 @@
+# 10G
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_infer.py \
+    --model_type qwen-vl \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --ckpt_dir "runs/qwen-vl/vx_xxx/checkpoint-xxx" \
+    --eval_human false \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --max_new_tokens 1024 \
+    --temperature 0.9 \
+    --top_k 50 \
+    --top_p 0.9 \
+    --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh
@@ -0,0 +1,38 @@
+# 4 * 16GB VRAM
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+torchrun \
+    --nproc_per_node=$nproc_per_node \
+    --master_port 29500 \
+    src/llm_sft.py \
+    --model_type qwen-vl \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --output_dir runs \
+    --ddp_backend nccl \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --num_train_epochs 1 \
+    --max_length 1024 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --lora_rank 64 \
+    --lora_alpha 16 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --batch_size 1 \
+    --weight_decay 0. \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --push_to_hub false \
+    --hub_model_id qwen-vl-qlora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp/infer.sh
@@ -0,0 +1,16 @@
+# 19G
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_infer.py \
+    --model_type qwen-vl-chat \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --ckpt_dir "runs/qwen-vl-chat/vx_xxx/checkpoint-xxx" \
+    --eval_human false \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --max_new_tokens 1024 \
+    --temperature 0.9 \
+    --top_k 50 \
+    --top_p 0.9 \
+    --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp/sft.sh
@@ -0,0 +1,35 @@
+# 4 * 23GB VRAM
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+torchrun \
+    --nproc_per_node=$nproc_per_node \
+    --master_port 29500 \
+    src/llm_sft.py \
+    --model_type qwen-vl-chat \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --output_dir runs \
+    --ddp_backend nccl \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --num_train_epochs 1 \
+    --max_length 1024 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --batch_size 1 \
+    --weight_decay 0. \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --push_to_hub false \
+    --hub_model_id qwen-vl-chat-lora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh
@@ -0,0 +1,18 @@
+# 10G
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_infer.py \
+    --model_type qwen-vl-chat \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --ckpt_dir "runs/qwen-vl-chat/vx_xxx/checkpoint-xxx" \
+    --eval_human false \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --max_new_tokens 1024 \
+    --temperature 0.9 \
+    --top_k 50 \
+    --top_p 0.9 \
+    --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh
@@ -0,0 +1,33 @@
+# 19GB VRAM
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_sft.py \
+    --model_type qwen-vl-chat \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --output_dir runs \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --num_train_epochs 1 \
+    --max_length 1024 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --lora_rank 64 \
+    --lora_alpha 16 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --batch_size 1 \
+    --weight_decay 0. \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --push_to_hub false \
+    --hub_model_id qwen-vl-chat-qlora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/infer.sh
@@ -0,0 +1,18 @@
+# 10G
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_infer.py \
+    --model_type qwen-vl-chat \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --ckpt_dir "runs/qwen-vl-chat/vx_xxx/checkpoint-xxx" \
+    --eval_human false \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --max_new_tokens 1024 \
+    --temperature 0.9 \
+    --top_k 50 \
+    --top_p 0.9 \
+    --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh
@@ -0,0 +1,38 @@
+# 4 * 19GB VRAM
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+torchrun \
+    --nproc_per_node=$nproc_per_node \
+    --master_port 29500 \
+    src/llm_sft.py \
+    --model_type qwen-vl-chat \
+    --sft_type lora \
+    --template_type chatml \
+    --dtype bf16 \
+    --output_dir runs \
+    --ddp_backend nccl \
+    --dataset coco-en \
+    --dataset_sample 20000 \
+    --num_train_epochs 1 \
+    --max_length 1024 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --lora_rank 64 \
+    --lora_alpha 16 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --batch_size 1 \
+    --weight_decay 0. \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --push_to_hub false \
+    --hub_model_id qwen-vl-chat-qlora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py
@@ -36,7 +36,7 @@ class InferArguments:
         default='alpaca-en,alpaca-zh',
         metadata={'help': f'dataset choices: {list(DATASET_MAPPING.keys())}'})
     dataset_seed: int = 42
-    dataset_sample: int = 20000  # -1: all dataset
+    dataset_sample: int = -1  # -1: all dataset
     dataset_test_size: float = 0.01
     system: str = 'you are a helpful assistant!'
     max_length: Optional[int] = 1024
@@ -103,8 +103,8 @@ def llm_infer(args: InferArguments) -> None:
 
     # ### Inference
     template_type = MODEL_MAPPING[args.model_type]['template']
-    preprocess_func = get_preprocess(template_type, tokenizer, args.system,
-                                     args.max_length)
+    preprocess_func = get_preprocess(
+        template_type, tokenizer, args.system, args.max_length, batched=False)
     streamer = TextStreamer(
         tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_config = GenerationConfig(
@@ -132,13 +132,13 @@ def llm_infer(args: InferArguments) -> None:
         mini_test_dataset = test_dataset.select(range(10))
         del dataset
         for data in mini_test_dataset:
-            output = data['output']
-            data['output'] = None
+            response = data['response']
+            data['response'] = None
             input_ids = preprocess_func(data)['input_ids']
             inference(input_ids, model, tokenizer, streamer, generation_config,
                       args.skip_prompt)
             print()
-            print(f'[LABELS]{output}')
+            print(f'[LABELS]{response}')
             print('-' * 80)
             # input('next[ENTER]')
 
diff --git a/examples/pytorch/llm/src/llm_sft.py b/examples/pytorch/llm/src/llm_sft.py
@@ -48,7 +48,7 @@ class SftArguments:
         default='alpaca-en,alpaca-zh',
         metadata={'help': f'dataset choices: {list(DATASET_MAPPING.keys())}'})
     dataset_seed: int = 42
-    dataset_sample: int = 20000  # -1: all dataset
+    dataset_sample: int = -1  # -1: all dataset
     dataset_test_size: float = 0.01
     system: str = 'you are a helpful assistant!'
     max_length: Optional[int] = 1024
@@ -72,6 +72,8 @@ class SftArguments:
     gradient_checkpointing: bool = True
     batch_size: int = 1
     num_train_epochs: int = 1
+    # if max_steps >= 0, override num_train_epochs
+    max_steps: int = -1
     optim: str = 'adamw_torch'
     learning_rate: Optional[float] = None
     weight_decay: float = 0.01
@@ -218,17 +220,22 @@ def llm_sft(args: SftArguments) -> None:
 
     show_layers(model)
     print_model_info(model)
+    logger.info(str(model))
 
     # ### Loading Dataset
     dataset = get_dataset(args.dataset.split(','))
     train_dataset, val_dataset = process_dataset(dataset,
                                                  args.dataset_test_size,
                                                  args.dataset_sample,
                                                  args.dataset_seed)
-    preprocess_func = get_preprocess(args.template_type, tokenizer,
-                                     args.system, args.max_length)
-    train_dataset = train_dataset.map(preprocess_func)
-    val_dataset = val_dataset.map(preprocess_func)
+    preprocess_func = get_preprocess(
+        args.template_type,
+        tokenizer,
+        args.system,
+        args.max_length,
+        batched=True)
+    train_dataset = train_dataset.map(preprocess_func, batched=True)
+    val_dataset = val_dataset.map(preprocess_func, batched=True)
     del dataset
     # Data analysis
     stat_dataset(train_dataset)
@@ -255,6 +262,7 @@ def llm_sft(args: SftArguments) -> None:
         weight_decay=args.weight_decay,
         max_grad_norm=args.max_grad_norm,
         num_train_epochs=args.num_train_epochs,
+        max_steps=args.max_steps,
         lr_scheduler_type=args.lr_scheduler_type,
         warmup_ratio=args.warmup_ratio,
         logging_steps=args.logging_steps,
diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py
diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py
diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py
diff --git a/examples/pytorch/llm/src/utils/trainer_patch.py b/examples/pytorch/llm/src/utils/trainer_patch.py
diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py
diff --git a/swift/utils/__init__.py b/swift/utils/__init__.py