我用一张A800 80G的显卡训练 加载了一天 还是一直没开始训练 想问一下这个是什么原因呢,lora的是elign
CUDA_VISIBLE_DEVICES=1 accelerate launch --num_processes=1 examples/qwen_image/model_training/train.py
--dataset_base_path
--dataset_metadata_path
--data_file_keys "image,eligen_entity_masks"
--max_pixels 1048576
--dataset_repeat 20
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors"
--learning_rate 1e-4
--num_epochs 5
--remove_prefix_in_ckpt "pipe.dit."
--output_path "./models/train/Qwen-Image-EliGen_lora"
--lora_base_model "dit"
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1"
--lora_rank 32
--extra_inputs "eligen_entity_masks,eligen_entity_prompts"
--use_gradient_checkpointing
--find_unused_parameters
我用一张A800 80G的显卡训练 加载了一天 还是一直没开始训练 想问一下这个是什么原因呢,lora的是elign
CUDA_VISIBLE_DEVICES=1 accelerate launch --num_processes=1 examples/qwen_image/model_training/train.py
--dataset_base_path
--dataset_metadata_path
--data_file_keys "image,eligen_entity_masks"
--max_pixels 1048576
--dataset_repeat 20
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors"
--learning_rate 1e-4
--num_epochs 5
--remove_prefix_in_ckpt "pipe.dit."
--output_path "./models/train/Qwen-Image-EliGen_lora"
--lora_base_model "dit"
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1"
--lora_rank 32
--extra_inputs "eligen_entity_masks,eligen_entity_prompts"
--use_gradient_checkpointing
--find_unused_parameters