Skip to content

使用mixlora等moe结构进行多任务学习时,模型不收敛 #32

@zhy665

Description

@zhy665

import subprocess

def gen(datasets, multi_task=False, adapter_name="lora"):
command = [
"python", "launch.py", "gen",
"--template", f"{adapter_name}",
"--tasks", f"{datasets}",
"--multi_task", f"{multi_task}",
"--adapter_name", f"{adapter_name}"
]
subprocess.run(command)

def run(adapter_name="lora"):
# 构造命令
command = [
"python", "./launch.py", "run",
"--base_model", "models/Qwen2-7b/",
"--cuda_device", "0",
"--config_name", f"{adapter_name}"
]
# 执行命令
subprocess.run(command)

def help():
# 构造命令
command = [
"python", "./launch.py", "help",
]
# 执行命令
subprocess.run(command)

if name == "main":
# single taskdf
# datasets = ["arc-c", "arc-e", "boolq", "obqa", "piqa" , "siqa", "hellaswag", "winogrande"]
# for dataset in datasets:
# print(f"{dataset} is training")
# gen(dataset)
# run(adapter_name=f"{dataset}")
# multi task
datasets = "arc-c;arc-e;boolq;obqa;piqa;siqa;hellaswag;winogrande"
gen(datasets, multi_task=True, adapter_name="mixlora")
run(adapter_name="mixlora-Q")

我在使用以上执行命令式,出现了以下的输出结果,但是我的损失值在训练过程中是下降的:
[
{
"adapter_name": "mixlora_0",
"task_name": "arc-c",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.22440273037542663
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "arc-e",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.2398989898989899
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "boolq",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.6217125382262997
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "obqa",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.276
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "piqa",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.49510337323177367
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "siqa",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.32906857727737976
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "hellaswag",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.2504481179047998
},
"training_steps": 29108
},
{
"adapter_name": "mixlora_0",
"task_name": "winogrande",
"date_time": "2025-06-21 01:24:33",
"metrics": {
"accuracy": 0.4956590370955012
},
"training_steps": 29108
}
]
请问作者是否遇到过类似的问题?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions