Skip to content

Commit 9424cfa

Browse files
committed
[XPU]optimize Qwen2_vl for xpu
1 parent 3ec0419 commit 9424cfa

File tree

4 files changed

+177
-72
lines changed

4 files changed

+177
-72
lines changed

paddlemix/examples/qwen2_vl/qwen2vl_finetune.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import sys
2121
import traceback
2222
from dataclasses import dataclass, field
23-
from typing import Dict, Optional, Sequence, Any
23+
from typing import Any, Dict, Optional, Sequence
2424

2525
import numpy as np
2626
import paddle
@@ -31,6 +31,7 @@
3131
from paddlenlp.trainer import PdArgumentParser, TrainingArguments, set_seed
3232
from paddlenlp.trainer.trainer import Trainer
3333
from paddlenlp.trainer.trainer_utils import get_last_checkpoint
34+
from paddlenlp.transformers.processing_utils import ProcessorMixin
3435
from PIL import Image, ImageFile, PngImagePlugin, UnidentifiedImageError
3536

3637
from paddlemix.datasets.internvl_dataset import ConcatDataset, WeightedConcatDataset
@@ -42,7 +43,6 @@
4243
Qwen2VLImageProcessor,
4344
Qwen2VLProcessor,
4445
)
45-
from paddlenlp.transformers.processing_utils import ProcessorMixin
4646

4747
Image.MAX_IMAGE_PIXELS = None
4848
ImageFile.LOAD_TRUNCATED_IMAGES = True
@@ -355,7 +355,7 @@ def pure_text_get_item(self, data_item):
355355
attention_mask=attention_mask,
356356
images=[],
357357
)
358-
358+
359359
return ret
360360

361361
def __getitem__(self, i) -> Dict[str, paddle.Tensor]:
@@ -460,7 +460,7 @@ def __post_init__(self):
460460

461461
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "paddle.Tensor"]:
462462
batch_images, batch_videos, batch_imglens, batch_vidlens, batch_input_ids = [], [], [], [], []
463-
463+
464464
for feature in features:
465465
images = feature.pop("images", None) or []
466466
videos = feature.pop("videos", None) or []
@@ -470,17 +470,15 @@ def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "paddle.Tens
470470
batch_vidlens.append(len(videos))
471471
batch_input_ids.append(feature["input_ids"])
472472

473-
if (
474-
self.processor is not None and sum(batch_imglens) == 0 and sum(batch_vidlens) == 0
475-
):
473+
if self.processor is not None and sum(batch_imglens) == 0 and sum(batch_vidlens) == 0:
476474
fake_messages = [{"role": "user", "content": IMAGE_PLACEHOLDER}]
477475
fake_images = [Image.new("RGB", (64, 64), (255, 255, 255))]
478476
fake_messages = self.template.mm_plugin.process_messages(fake_messages, fake_images, [], self.processor)
479477
fake_input_ids = self.tokenizer.encode(fake_messages[0]["content"], add_special_tokens=False)
480478
fake_input_ids, _ = self.template.mm_plugin.process_token_ids(
481479
fake_input_ids, None, fake_images, [], self.tokenizer, self.processor
482480
)
483-
481+
484482
if self.tokenizer.padding_side == "right":
485483
features[0]["input_ids"] = features[0]["input_ids"] + fake_input_ids
486484
features[0]["attention_mask"] = features[0]["attention_mask"] + [0] * len(fake_input_ids)
@@ -530,7 +528,6 @@ def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "paddle.Tens
530528
return features
531529

532530

533-
534531
def main():
535532
parser = PdArgumentParser((ModelArguments, DataTrainingArguments, PreTrainingArguments))
536533
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
@@ -565,6 +562,16 @@ def main():
565562
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
566563
)
567564

565+
if paddle.is_compiled_with_xpu() and training_args.gradient_accumulation_steps > 1:
566+
try:
567+
from paddle_xpu.layers.nn.linear import LinearConfig # noqa: F401
568+
569+
LinearConfig.enable_accumulate_steps_opt()
570+
LinearConfig.set_accumulate_steps(training_args.gradient_accumulation_steps)
571+
except ImportError:
572+
# It's OK, not use accumulate_steps optimization
573+
pass
574+
568575
# Load model
569576
if "npu" in paddle.get_device():
570577
is_bfloat16_supported = True

paddlemix/examples/qwen2_vl/shell/baseline_7b_bs32_1e8.sh

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2-
#
2+
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
6-
#
6+
#
77
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
8+
#
99
# Unless required by applicable law or agreed to in writing, software
1010
# distributed under the License is distributed on an "AS IS" BASIS,
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -38,6 +38,25 @@ MASTER='127.0.0.1:8080'
3838

3939
meta_path="paddlemix/examples/qwen2_vl/configs/baseline_6data_330k.json"
4040

41+
### XPU ###
42+
export XPU_CDNN_CLUSTER_PARALLEL=1
43+
export XPU_CDNN_CLUSTER_PARALLEL_STREAM_NUMBER=2
44+
export XPU_PADDLE_FUSE_SHARDING_BUFFER=1
45+
export FLAGS_use_stride_kernel="0"
46+
# export XPU_PADDLE_L3_SIZE=98566144 # 94 MB
47+
# export XBLAS_FC_AUTOTUNE_FILE="/zhangyikun02/PaddleMIX/autotune_qwen2_vl_7b"
48+
export BKCL_TREE_THRESHOLD=0
49+
50+
export XPU_FUSE_RMSNorm=1
51+
export XPU_FUSE_ATTN_QKV=1
52+
export XPU_FUSE_FFN=1
53+
export XPU_FUSE_ROPE=1
54+
# export PRINT_TIMMER=1
55+
# export PROFILER=1
56+
57+
# export XPUAPI_DEBUG=1
58+
# export XPURT_DISPATCH_MODE=PROFILING
59+
4160
TRAINING_PYTHON="python -m paddle.distributed.launch --master ${MASTER} --nnodes 1 --nproc_per_node ${GPUS} --rank 0 --ips ${TRAINER_INSTANCES} --run_mode=collective"
4261
${TRAINING_PYTHON} --log_dir ${OUTPUT_DIR}/paddle_distributed_logs \
4362
paddlemix/examples/qwen2_vl/qwen2vl_finetune.py \
@@ -73,6 +92,7 @@ ${TRAINING_PYTHON} --log_dir ${OUTPUT_DIR}/paddle_distributed_logs \
7392
--report_to "visualdl" \
7493
--tensor_parallel_degree=${tensor_parallel_degree} \
7594
--sharding_parallel_degree=${sharding_parallel_degree} \
95+
--sharding_parallel_config "split_param" \
7696
--pipeline_parallel_degree=1 \
7797
--sep_parallel_degree=1 \
7898
--sharding="stage1" \

paddlemix/models/qwen2_vl/configuration_qwen2_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
depth=32,
3232
embed_dim=1280,
3333
hidden_size=3584,
34-
hidden_act="quick_gelu",
34+
hidden_act="gelu",
3535
mlp_ratio=4,
3636
num_heads=16,
3737
in_channels=3,

0 commit comments

Comments
 (0)