Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 68 additions & 86 deletions ernie/utils/download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,67 +12,78 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import os
import json
from paddleformers.utils.log import logger
from paddleformers.utils.download import (
DownloadSource,
register_model_group,
check_repo,
)

MODEL_DOWNLOAD_MAP = {
"ERNIE-4.5-300B-A47B-Base": {
"hf_hub": "baidu/ERNIE-4.5-300B-A47B-Base-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-300B-A47B-Base-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-300B-A47B-Base-Paddle",
},
"ERNIE-4.5-300B-A47B": {
"hf_hub": "baidu/ERNIE-4.5-300B-A47B-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle",
},
"ERNIE-4.5-21B-A3B-Base": {
"hf_hub": "baidu/ERNIE-4.5-21B-A3B-Base-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-21B-A3B-Base-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-21B-A3B-Base-Paddle",
},
"ERNIE-4.5-21B-A3B": {
"hf_hub": "baidu/ERNIE-4.5-21B-A3B-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle",
},
"ERNIE-4.5-0.3B-Base": {
"hf_hub": "baidu/ERNIE-4.5-0.3B-Base-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-0.3B-Base-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-0.3B-Base-Paddle",
},
"ERNIE-4.5-0.3B": {
"hf_hub": "baidu/ERNIE-4.5-0.3B-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-0.3B-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-0.3B-Paddle",
},
"ERNIE-4.5-VL-424B-A47B-Base": {
"hf_hub": "baidu/ERNIE-4.5-VL-424B-A47B-Base-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-VL-424B-A47B-Base-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-VL-424B-A47B-Base-Paddle",
},
"ERNIE-4.5-VL-424B": {
"hf_hub": "baidu/ERNIE-4.5-VL-424B-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-VL-424B-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-VL-424B-Paddle",
},
"ERNIE-4.5-VL-28B-A3B-Base": {
"hf_hub": "baidu/ERNIE-4.5-VL-28B-A3B-Base-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Base-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Base-Paddle",
},
"ERNIE-4.5-VL-28B-A3B": {
"hf_hub": "baidu/ERNIE-4.5-VL-28B-A3B-Paddle",
"aistudio": "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Paddle",
"modelscope": "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Paddle",
},
}
register_model_group(
models={
"ERNIE-4.5-300B-A47B-Base": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-300B-A47B-Base-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-300B-A47B-Base-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-300B-A47B-Base-Paddle",
},
"ERNIE-4.5-300B-A47B": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-300B-A47B-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle",
},
"ERNIE-4.5-21B-A3B-Base": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-21B-A3B-Base-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-21B-A3B-Base-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-21B-A3B-Base-Paddle",
},
"ERNIE-4.5-21B-A3B": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-21B-A3B-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle",
},
"ERNIE-4.5-0.3B-Base": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-0.3B-Base-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-0.3B-Base-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-0.3B-Base-Paddle",
},
"ERNIE-4.5-0.3B": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-0.3B-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-0.3B-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-0.3B-Paddle",
},
}
)


register_model_group(
models={
"ERNIE-4.5-VL-424B-A47B-Base": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-VL-424B-A47B-Base-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-VL-424B-A47B-Base-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-VL-424B-A47B-Base-Paddle",
},
"ERNIE-4.5-VL-424B": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-VL-424B-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-VL-424B-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-VL-424B-Paddle",
},
"ERNIE-4.5-VL-28B-A3B-Base": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-VL-28B-A3B-Base-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Base-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Base-Paddle",
},
"ERNIE-4.5-VL-28B-A3B": {
DownloadSource.HUGGINGFACE: "baidu/ERNIE-4.5-VL-28B-A3B-Paddle",
DownloadSource.AISTUDIO: "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Paddle",
DownloadSource.MODELSCOPE: "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Paddle",
},
}
)


def check_download_repo(
model_name_or_path, from_hf_hub=False, from_aistudio=False, from_modelscope=False
model_name_or_path, download_hub: DownloadSource = DownloadSource.DEFAULT
):
# Detect torch model.
is_local = os.path.isfile(model_name_or_path) or os.path.isdir(model_name_or_path)
Expand All @@ -84,37 +95,8 @@ def check_download_repo(
raise ValueError(
"Unsupported weight format: Torch weights are not compatible with Paddle model currently."
)

return model_name_or_path
else:
# check remote repo
model_name = model_name_or_path.split("/")[-1].rstrip("-Paddle")
if model_name in MODEL_DOWNLOAD_MAP.keys():
if re.match(
r"^(baidu|PaddlePaddle)/ERNIE-4\.5-.+-Paddle$", model_name_or_path
): # model download from baidu
download_repo = MODEL_DOWNLOAD_MAP[model_name]
if from_hf_hub:
if model_name_or_path != download_repo["hf_hub"]:
logger.warning(
f"The repo id of baidu's model in the hf_hub should be 'baidu', model_name_or_path has changed to {download_repo['hf_hub']}"
)
return download_repo["hf_hub"]
elif from_aistudio:
if model_name_or_path != download_repo["aistudio"]:
logger.warning(
f"The repo id of baidu's model in the aistudio should be 'PaddlePaddle', model_name_or_path has changed to {download_repo['aistudio']}"
)
return download_repo["aistudio"]
elif from_modelscope:
if model_name_or_path != download_repo["modelscope"]:
logger.warning(
f"The repo id of baidu's model in the modelscope should be 'PaddlePaddle', model_name_or_path has changed to {download_repo['modelscope']}"
)
return download_repo["modelscope"]
else:
raise ValueError(
"please select a model downloading source: --from_hf_hub, --from_aistudio, --from_modelscope"
)
model_name_or_path = check_repo(model_name_or_path, download_hub)

return model_name_or_path
return model_name_or_path
19 changes: 5 additions & 14 deletions erniekit/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,20 +219,14 @@ def run_eval(args: Optional[dict[str, Any]] = None) -> None:

model_args.model_name_or_path = check_download_repo(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
from_modelscope=model_args.from_modelscope,
download_hub=model_args.download_hub,
)

if getattr(model_args, "from_modelscope", False):
os.environ["from_modelscope"] = "True"

model_config = Ernie4_5_MoeConfig.from_pretrained(
model_args.model_name_or_path,
dtype=dtype,
quantization_config=quantization_config,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)
model_config.tensor_parallel_degree = finetuning_args.tensor_parallel_degree
Expand Down Expand Up @@ -288,16 +282,14 @@ def run_eval(args: Optional[dict[str, Any]] = None) -> None:
model = model_class.from_pretrained(
model_args.model_name_or_path,
config=model_config,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)
else:
model = model_class.from_config(
model_config,
dtype=dtype,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)

Expand All @@ -320,8 +312,7 @@ def run_eval(args: Optional[dict[str, Any]] = None) -> None:

tokenizer = Ernie4_5_Tokenizer.from_pretrained(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)

Expand Down
9 changes: 2 additions & 7 deletions erniekit/export/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,13 @@ def run_export(args: Optional[dict[str, Any]] = None) -> None:

model_args.model_name_or_path = check_download_repo(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
from_modelscope=model_args.from_modelscope,
download_hub=model_args.download_hub,
)
if getattr(model_args, "from_modelscope", False):
os.environ["from_modelscope"] = "True"

resolve_result = resolve_file_path(
model_args.model_name_or_path,
[SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME],
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
)
if resolve_result is not None:
resolve_path = os.path.dirname(resolve_result)
Expand Down
16 changes: 5 additions & 11 deletions erniekit/hparams/model_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,11 @@ class ModelArguments:
"help": "Whether to use the fused gate-detach matmul implementation."
},
)
from_hf_hub: bool = field(
default=False,
metadata={"help": "Whether to download model from huggingface hub"},
)
from_aistudio: bool = field(
default=False,
metadata={"help": "Whether to download model from aistudio"},
)
from_modelscope: bool = field(
default=False,
metadata={"help": "Whether to download model from modelscope"},
download_hub: str = field(
default="aistudio",
metadata={
"help": "The source for model downloading, options include `huggingface`, `aistudio`, `modelscope`, default `aistudio`."
},
)

# performance
Expand Down
16 changes: 4 additions & 12 deletions erniekit/train/dpo/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,9 @@ def run_dpo(

model_args.model_name_or_path = check_download_repo(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
from_modelscope=model_args.from_modelscope,
download_hub=model_args.download_hub,
)

if getattr(model_args, "from_modelscope", False):
os.environ["from_modelscope"] = "True"

# fuse_softmax_mask only support for rocm.
if not paddle.is_compiled_with_rocm():
if model_args.fuse_softmax_mask:
Expand Down Expand Up @@ -287,8 +282,7 @@ def run_dpo(
num_acc_steps=finetuning_args.gradient_accumulation_steps,
add_tail_layers=model_args.add_tail_layers,
num_nextn_predict_layers=0,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
)
if model_args.moe_use_aux_free is False:
model_kwargs.update({"moe_use_aux_free": model_args.moe_use_aux_free})
Expand Down Expand Up @@ -318,8 +312,7 @@ def run_dpo(
model = model_class.from_pretrained(
model_args.model_name_or_path,
config=config,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)
else:
Expand Down Expand Up @@ -357,8 +350,7 @@ def run_dpo(

tokenizer = Ernie4_5_Tokenizer.from_pretrained(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)
logger.info("Loading model & tokenizer successfully !")
Expand Down
16 changes: 4 additions & 12 deletions erniekit/train/sft/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,9 @@ def run_sft(

model_args.model_name_or_path = check_download_repo(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
from_modelscope=model_args.from_modelscope,
download_hub=model_args.download_hub,
)

if getattr(model_args, "from_modelscope", False):
os.environ["from_modelscope"] = "True"

model_class = Ernie4_5_MoeForCausalLM
if finetuning_args.pipeline_parallel_degree > 1:
model_class = Ernie4_5_MoeForCausalLMPipe
Expand Down Expand Up @@ -277,8 +272,7 @@ def run_sft(
model_args.model_name_or_path,
dtype=dtype,
quantization_config=quantization_config,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)
model_config.tensor_parallel_degree = finetuning_args.tensor_parallel_degree
Expand Down Expand Up @@ -344,8 +338,7 @@ def run_sft(
model = model_class.from_pretrained(
model_args.model_name_or_path,
config=model_config,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)
else:
Expand All @@ -360,8 +353,7 @@ def run_sft(
logger.info(f"{runtime_timer.log()}")
tokenizer = Ernie4_5_Tokenizer.from_pretrained(
model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False,
)

Expand Down
13 changes: 3 additions & 10 deletions examples/post-training/dpo/dpo_estimate_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,19 +215,12 @@ def dpo_estimate_training(tokenizer, data_args, training_args, config, train_dat
raise ValueError(f"num_of_gpus must be positive, but got num_of_gpus={training_args.num_of_gpus}")

model_args.model_name_or_path = check_download_repo(model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
from_modelscope=model_args.from_modelscope)

if getattr(model_args, "from_modelscope", False):
os.environ["from_modelscope"] = "True"
download_hub=model_args.download_hub)

tokenizer = Ernie4_5_Tokenizer.from_pretrained(model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False)
config = Ernie4_5_MoeConfig.from_pretrained(model_args.model_name_or_path,
from_hf_hub=model_args.from_hf_hub,
from_aistudio=model_args.from_aistudio,
download_hub=model_args.download_hub,
convert_from_torch=False)
dpo_estimate_training(tokenizer, data_args, training_args, config)
Loading
Loading