Skip to content

Commit b8bf0fc

Browse files
authored
Fix enable_model_cpu_offload problems (#320)
* update * update * update
1 parent 226c98d commit b8bf0fc

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

finetrainers/args.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -853,8 +853,9 @@ def _validate_dataset_args(args: BaseArgs):
853853

854854

855855
def _validate_validation_args(args: BaseArgs):
856-
if args.dp_shards > 1 and args.enable_model_cpu_offload:
857-
raise ValueError("Model CPU offload is not supported with FSDP at the moment.")
856+
if args.enable_model_cpu_offload:
857+
if any(x > 1 for x in [args.pp_degree, args.dp_degree, args.dp_shards, args.cp_degree, args.tp_degree]):
858+
raise ValueError("Model CPU offload is not supported on multi-GPU at the moment.")
858859

859860

860861
def _display_helper_messages(args: argparse.Namespace):

finetrainers/trainer/sft_trainer/trainer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,7 @@ def _validate(self, step: int, final_validation: bool = False) -> None:
730730

731731
parallel_backend.wait_for_everyone()
732732
if not final_validation:
733+
self._move_components_to_device()
733734
self.transformer.train()
734735

735736
def _evaluate(self) -> None:

0 commit comments

Comments
 (0)