@@ -489,8 +489,14 @@ def _apply_group_offloading_block_level(
489489        stream (`torch.cuda.Stream`, *optional*): 
490490            If provided, offloading and onloading is done asynchronously using the provided stream. This can be useful 
491491            for overlapping computation and data transfer. 
492-         record_stream: TODO 
493-         low_cpu_mem_usage: TODO 
492+         record_stream (`bool`, defaults to `False`): When enabled with `use_stream`, it marks the current tensor 
493+             as having been used by this stream. It is faster at the expense of slightly more memory usage. Refer to the 
494+             [PyTorch official docs](https://pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html) more 
495+             details. 
496+         low_cpu_mem_usage (`bool`, defaults to `False`): 
497+             If True, the CPU memory usage is minimized by pinning tensors on-the-fly instead of pre-pinning them. This 
498+             option only matters when using streamed CPU offloading (i.e. `use_stream=True`). This can be useful when 
499+             the CPU memory is a bottleneck but may counteract the benefits of using streams. 
494500    """ 
495501
496502    # Create module groups for ModuleList and Sequential blocks 
@@ -586,8 +592,14 @@ def _apply_group_offloading_leaf_level(
586592        stream (`torch.cuda.Stream`, *optional*): 
587593            If provided, offloading and onloading is done asynchronously using the provided stream. This can be useful 
588594            for overlapping computation and data transfer. 
589-         record_stream: TODO 
590-         low_cpu_mem_usage: TODO 
595+         record_stream (`bool`, defaults to `False`): When enabled with `use_stream`, it marks the current tensor 
596+             as having been used by this stream. It is faster at the expense of slightly more memory usage. Refer to the 
597+             [PyTorch official docs](https://pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html) more 
598+             details. 
599+         low_cpu_mem_usage (`bool`, defaults to `False`): 
600+             If True, the CPU memory usage is minimized by pinning tensors on-the-fly instead of pre-pinning them. This 
601+             option only matters when using streamed CPU offloading (i.e. `use_stream=True`). This can be useful when 
602+             the CPU memory is a bottleneck but may counteract the benefits of using streams. 
591603    """ 
592604
593605    # Create module groups for leaf modules and apply group offloading hooks 
0 commit comments