@@ -464,8 +464,9 @@ def apply_group_offloading(
464464 offload_type (`str`, defaults to "block_level"):
465465 The type of offloading to be applied. Can be one of "block_level" or "leaf_level". Default is
466466 "block_level".
467- offload_to_disk_path (`str`, *optional*):
468- The path to the directory where offloaded parameters will be stored.
467+ offload_to_disk_path (`str`, *optional*, defaults to `None`):
468+ The path to the directory where parameters will be offloaded. Setting this option can be useful in limited
469+ RAM environment settings where a reasonable speed-memory trade-off is desired.
469470 num_blocks_per_group (`int`, *optional*):
470471 The number of blocks per group when using offload_type="block_level". This is required when using
471472 offload_type="block_level".
@@ -566,7 +567,9 @@ def _apply_group_offloading_block_level(
566567 The module to which group offloading is applied.
567568 offload_device (`torch.device`):
568569 The device to which the group of modules are offloaded. This should typically be the CPU.
569- offload_to_disk_path: TODO
570+ offload_to_disk_path (`str`, *optional*, defaults to `None`):
571+ The path to the directory where parameters will be offloaded. Setting this option can be useful in limited
572+ RAM environment settings where a reasonable speed-memory trade-off is desired.
570573 onload_device (`torch.device`):
571574 The device to which the group of modules are onloaded.
572575 non_blocking (`bool`):
@@ -678,7 +681,9 @@ def _apply_group_offloading_leaf_level(
678681 The device to which the group of modules are offloaded. This should typically be the CPU.
679682 onload_device (`torch.device`):
680683 The device to which the group of modules are onloaded.
681- offload_to_disk_path: TODO
684+ offload_to_disk_path (`str`, *optional*, defaults to `None`):
685+ The path to the directory where parameters will be offloaded. Setting this option can be useful in limited
686+ RAM environment settings where a reasonable speed-memory trade-off is desired.
682687 non_blocking (`bool`):
683688 If True, offloading and onloading is done asynchronously. This can be useful for overlapping computation
684689 and data transfer.
0 commit comments